diff options
author | bellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162> | 2004-04-22 00:10:48 +0000 |
---|---|---|
committer | bellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162> | 2004-04-22 00:10:48 +0000 |
commit | f0cbd3ec9f4a3de1a9ef94deda09704543889f44 (patch) | |
tree | 153d540a8c714054614f5f368a3d23432f915f17 | |
parent | probe static SDL link (diff) | |
download | qemu-kvm-f0cbd3ec9f4a3de1a9ef94deda09704543889f44.tar.gz qemu-kvm-f0cbd3ec9f4a3de1a9ef94deda09704543889f44.tar.bz2 qemu-kvm-f0cbd3ec9f4a3de1a9ef94deda09704543889f44.zip |
initial user mode network support
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@733 c046a42c-6fe2-441c-8c8c-71466251a162
-rw-r--r-- | slirp/bootp.c | 212 | ||||
-rw-r--r-- | slirp/bootp.h | 113 | ||||
-rw-r--r-- | slirp/cksum.c | 141 | ||||
-rw-r--r-- | slirp/ctl.h | 7 | ||||
-rw-r--r-- | slirp/debug.c | 376 | ||||
-rw-r--r-- | slirp/debug.h | 50 | ||||
-rw-r--r-- | slirp/icmp_var.h | 69 | ||||
-rw-r--r-- | slirp/if.c | 320 | ||||
-rw-r--r-- | slirp/if.h | 50 | ||||
-rw-r--r-- | slirp/ip.h | 317 | ||||
-rw-r--r-- | slirp/ip_icmp.c | 376 | ||||
-rw-r--r-- | slirp/ip_icmp.h | 164 | ||||
-rw-r--r-- | slirp/ip_input.c | 697 | ||||
-rw-r--r-- | slirp/ip_output.c | 205 | ||||
-rw-r--r-- | slirp/libslirp.h | 19 | ||||
-rw-r--r-- | slirp/main.h | 54 | ||||
-rw-r--r-- | slirp/mbuf.c | 245 | ||||
-rw-r--r-- | slirp/mbuf.h | 147 | ||||
-rw-r--r-- | slirp/misc.c | 925 | ||||
-rw-r--r-- | slirp/misc.h | 87 | ||||
-rw-r--r-- | slirp/sbuf.c | 201 | ||||
-rw-r--r-- | slirp/sbuf.h | 31 | ||||
-rw-r--r-- | slirp/slirp.c | 550 | ||||
-rw-r--r-- | slirp/slirp.h | 308 | ||||
-rw-r--r-- | slirp/slirp_config.h | 186 | ||||
-rw-r--r-- | slirp/socket.c | 696 | ||||
-rw-r--r-- | slirp/socket.h | 104 | ||||
-rw-r--r-- | slirp/tcp.h | 169 | ||||
-rw-r--r-- | slirp/tcp_input.c | 1745 | ||||
-rw-r--r-- | slirp/tcp_output.c | 608 | ||||
-rw-r--r-- | slirp/tcp_subr.c | 1325 | ||||
-rw-r--r-- | slirp/tcp_timer.c | 329 | ||||
-rw-r--r-- | slirp/tcp_timer.h | 142 | ||||
-rw-r--r-- | slirp/tcp_var.h | 252 | ||||
-rw-r--r-- | slirp/tcpip.h | 74 | ||||
-rw-r--r-- | slirp/udp.c | 654 | ||||
-rw-r--r-- | slirp/udp.h | 109 |
37 files changed, 12057 insertions, 0 deletions
diff --git a/slirp/bootp.c b/slirp/bootp.c new file mode 100644 index 000000000..7e4b5bab0 --- /dev/null +++ b/slirp/bootp.c @@ -0,0 +1,212 @@ +/* + * QEMU BOOTP/DHCP server + * + * Copyright (c) 2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include <slirp.h> + +/* XXX: only DHCP is supported */ + +#define NB_ADDR 16 + +#define START_ADDR 15 + +#define LEASE_TIME (24 * 3600) + +typedef struct { + uint8_t allocated; +} BOOTPClient; + +BOOTPClient bootp_clients[NB_ADDR]; + +static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; + +#ifdef DEBUG +#define dprintf(fmt, args...) \ +if (slirp_debug & DBG_CALL) { fprintf(dfd, fmt, ## args); fflush(dfd); } +#else +#define dprintf(fmt, args...) +#endif + +static BOOTPClient *get_new_addr(struct in_addr *paddr) +{ + BOOTPClient *bc; + int i; + + for(i = 0; i < NB_ADDR; i++) { + if (!bootp_clients[i].allocated) + goto found; + } + return NULL; + found: + bc = &bootp_clients[i]; + bc->allocated = 1; + paddr->s_addr = htonl(ntohl(special_addr.s_addr) | (i + START_ADDR)); + return bc; +} + +static void dhcp_decode(const uint8_t *buf, int size, + int *pmsg_type) +{ + const uint8_t *p, *p_end; + int len, tag; + + *pmsg_type = 0; + + p = buf; + p_end = buf + size; + if (size < 5) + return; + if (memcmp(p, rfc1533_cookie, 4) != 0) + return; + p += 4; + while (p < p_end) { + tag = p[0]; + if (tag == RFC1533_PAD) { + p++; + } else if (tag == RFC1533_END) { + break; + } else { + p++; + if (p >= p_end) + break; + len = *p++; + dprintf("dhcp: tag=0x%02x len=%d\n", tag, len); + + switch(tag) { + case RFC2132_MSG_TYPE: + if (len >= 1) + *pmsg_type = p[0]; + break; + default: + break; + } + p += len; + } + } +} + +static void bootp_reply(struct bootp_t *bp) +{ + BOOTPClient *bc; + struct mbuf *m; + struct bootp_t *rbp; + struct sockaddr_in saddr, daddr; + struct in_addr dns_addr; + int dhcp_msg_type, val; + uint8_t *q; + + /* extract exact DHCP msg type */ + dhcp_decode(bp->bp_vend, DHCP_OPT_LEN, &dhcp_msg_type); + dprintf("bootp packet op=%d msgtype=%d\n", bp->bp_op, dhcp_msg_type); + + if (dhcp_msg_type != DHCPDISCOVER && + dhcp_msg_type != DHCPREQUEST) + return; + /* XXX: this is a hack to get the client mac address */ + memcpy(client_ethaddr, bp->bp_hwaddr, 6); + + if ((m = m_get()) == NULL) + return; + m->m_data += if_maxlinkhdr; + rbp = (struct bootp_t *)m->m_data; + m->m_data += sizeof(struct udpiphdr); + memset(rbp, 0, sizeof(struct bootp_t)); + + bc = get_new_addr(&daddr.sin_addr); + if (!bc) { + dprintf("no address left\n"); + return; + } + dprintf("offered addr=%08x\n", ntohl(daddr.sin_addr.s_addr)); + + saddr.sin_addr.s_addr = htonl(ntohl(special_addr.s_addr) | CTL_ALIAS); + saddr.sin_port = htons(BOOTP_SERVER); + + daddr.sin_port = htons(BOOTP_CLIENT); + + rbp->bp_op = BOOTP_REPLY; + rbp->bp_xid = bp->bp_xid; + rbp->bp_htype = 1; + rbp->bp_hlen = 6; + memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, 6); + + rbp->bp_yiaddr = daddr.sin_addr; /* IP address */ + + q = rbp->bp_vend; + memcpy(q, rfc1533_cookie, 4); + q += 4; + + if (dhcp_msg_type == DHCPDISCOVER) { + *q++ = RFC2132_MSG_TYPE; + *q++ = 1; + *q++ = DHCPOFFER; + } else if (dhcp_msg_type == DHCPREQUEST) { + *q++ = RFC2132_MSG_TYPE; + *q++ = 1; + *q++ = DHCPACK; + } + + if (dhcp_msg_type == DHCPDISCOVER || + dhcp_msg_type == DHCPREQUEST) { + *q++ = RFC2132_SRV_ID; + *q++ = 4; + memcpy(q, &saddr.sin_addr, 4); + q += 4; + + *q++ = RFC1533_NETMASK; + *q++ = 4; + *q++ = 0xff; + *q++ = 0xff; + *q++ = 0xff; + *q++ = 0x00; + + *q++ = RFC1533_GATEWAY; + *q++ = 4; + memcpy(q, &saddr.sin_addr, 4); + q += 4; + + *q++ = RFC1533_DNS; + *q++ = 4; + dns_addr.s_addr = htonl(ntohl(special_addr.s_addr) | CTL_DNS); + memcpy(q, &dns_addr, 4); + q += 4; + + *q++ = RFC2132_LEASE_TIME; + *q++ = 4; + val = htonl(LEASE_TIME); + memcpy(q, &val, 4); + q += 4; + } + *q++ = RFC1533_END; + + m->m_len = sizeof(struct bootp_t); + udp_output2(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); +} + +void bootp_input(struct mbuf *m) +{ + struct bootp_t *bp = (struct bootp_t *)m->m_data; + + if (bp->bp_op == BOOTP_REQUEST) { + bootp_reply(bp); + } +} diff --git a/slirp/bootp.h b/slirp/bootp.h new file mode 100644 index 000000000..d3b2baa04 --- /dev/null +++ b/slirp/bootp.h @@ -0,0 +1,113 @@ +/* bootp/dhcp defines */ + +#define BOOTP_SERVER 67 +#define BOOTP_CLIENT 68 + +#define BOOTP_REQUEST 1 +#define BOOTP_REPLY 2 + +#define RFC1533_COOKIE 99, 130, 83, 99 +#define RFC1533_PAD 0 +#define RFC1533_NETMASK 1 +#define RFC1533_TIMEOFFSET 2 +#define RFC1533_GATEWAY 3 +#define RFC1533_TIMESERVER 4 +#define RFC1533_IEN116NS 5 +#define RFC1533_DNS 6 +#define RFC1533_LOGSERVER 7 +#define RFC1533_COOKIESERVER 8 +#define RFC1533_LPRSERVER 9 +#define RFC1533_IMPRESSSERVER 10 +#define RFC1533_RESOURCESERVER 11 +#define RFC1533_HOSTNAME 12 +#define RFC1533_BOOTFILESIZE 13 +#define RFC1533_MERITDUMPFILE 14 +#define RFC1533_DOMAINNAME 15 +#define RFC1533_SWAPSERVER 16 +#define RFC1533_ROOTPATH 17 +#define RFC1533_EXTENSIONPATH 18 +#define RFC1533_IPFORWARDING 19 +#define RFC1533_IPSOURCEROUTING 20 +#define RFC1533_IPPOLICYFILTER 21 +#define RFC1533_IPMAXREASSEMBLY 22 +#define RFC1533_IPTTL 23 +#define RFC1533_IPMTU 24 +#define RFC1533_IPMTUPLATEAU 25 +#define RFC1533_INTMTU 26 +#define RFC1533_INTLOCALSUBNETS 27 +#define RFC1533_INTBROADCAST 28 +#define RFC1533_INTICMPDISCOVER 29 +#define RFC1533_INTICMPRESPOND 30 +#define RFC1533_INTROUTEDISCOVER 31 +#define RFC1533_INTROUTESOLICIT 32 +#define RFC1533_INTSTATICROUTES 33 +#define RFC1533_LLTRAILERENCAP 34 +#define RFC1533_LLARPCACHETMO 35 +#define RFC1533_LLETHERNETENCAP 36 +#define RFC1533_TCPTTL 37 +#define RFC1533_TCPKEEPALIVETMO 38 +#define RFC1533_TCPKEEPALIVEGB 39 +#define RFC1533_NISDOMAIN 40 +#define RFC1533_NISSERVER 41 +#define RFC1533_NTPSERVER 42 +#define RFC1533_VENDOR 43 +#define RFC1533_NBNS 44 +#define RFC1533_NBDD 45 +#define RFC1533_NBNT 46 +#define RFC1533_NBSCOPE 47 +#define RFC1533_XFS 48 +#define RFC1533_XDM 49 + +#define RFC2132_REQ_ADDR 50 +#define RFC2132_LEASE_TIME 51 +#define RFC2132_MSG_TYPE 53 +#define RFC2132_SRV_ID 54 +#define RFC2132_PARAM_LIST 55 +#define RFC2132_MAX_SIZE 57 +#define RFC2132_RENEWAL_TIME 58 +#define RFC2132_REBIND_TIME 59 + +#define DHCPDISCOVER 1 +#define DHCPOFFER 2 +#define DHCPREQUEST 3 +#define DHCPACK 5 + +#define RFC1533_VENDOR_MAJOR 0 +#define RFC1533_VENDOR_MINOR 0 + +#define RFC1533_VENDOR_MAGIC 128 +#define RFC1533_VENDOR_ADDPARM 129 +#define RFC1533_VENDOR_ETHDEV 130 +#define RFC1533_VENDOR_HOWTO 132 +#define RFC1533_VENDOR_MNUOPTS 160 +#define RFC1533_VENDOR_SELECTION 176 +#define RFC1533_VENDOR_MOTD 184 +#define RFC1533_VENDOR_NUMOFMOTD 8 +#define RFC1533_VENDOR_IMG 192 +#define RFC1533_VENDOR_NUMOFIMG 16 + +#define RFC1533_END 255 +#define BOOTP_VENDOR_LEN 64 +#define DHCP_OPT_LEN 312 + +struct bootp_t { + struct ip ip; + struct udphdr udp; + uint8_t bp_op; + uint8_t bp_htype; + uint8_t bp_hlen; + uint8_t bp_hops; + unsigned long bp_xid; + unsigned short bp_secs; + unsigned short unused; + struct in_addr bp_ciaddr; + struct in_addr bp_yiaddr; + struct in_addr bp_siaddr; + struct in_addr bp_giaddr; + uint8_t bp_hwaddr[16]; + uint8_t bp_sname[64]; + uint8_t bp_file[128]; + uint8_t bp_vend[DHCP_OPT_LEN]; +}; + +void bootp_input(struct mbuf *m); diff --git a/slirp/cksum.c b/slirp/cksum.c new file mode 100644 index 000000000..f8f7512b6 --- /dev/null +++ b/slirp/cksum.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp + */ + +#include <slirp.h> + +/* + * Checksum routine for Internet Protocol family headers (Portable Version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + * + * XXX Since we will never span more than 1 mbuf, we can optimise this + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} + +int cksum(struct mbuf *m, int len) +{ + register u_int16_t *w; + register int sum = 0; + register int mlen = 0; + int byte_swapped = 0; + + union { + u_int8_t c[2]; + u_int16_t s; + } s_util; + union { + u_int16_t s[2]; + u_int32_t l; + } l_util; + + if (m->m_len == 0) + goto cont; + w = mtod(m, u_int16_t *); + + mlen = m->m_len; + + if (len < mlen) + mlen = len; + len -= mlen; + /* + * Force to even boundary. + */ + if ((1 & (long) w) && (mlen > 0)) { + REDUCE; + sum <<= 8; + s_util.c[0] = *(u_int8_t *)w; + w = (u_int16_t *)((int8_t *)w + 1); + mlen--; + byte_swapped = 1; + } + /* + * Unroll the loop to make overhead from + * branches &c small. + */ + while ((mlen -= 32) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; + sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; + w += 16; + } + mlen += 32; + while ((mlen -= 8) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + w += 4; + } + mlen += 8; + if (mlen == 0 && byte_swapped == 0) + goto cont; + REDUCE; + while ((mlen -= 2) >= 0) { + sum += *w++; + } + + if (byte_swapped) { + REDUCE; + sum <<= 8; + byte_swapped = 0; + if (mlen == -1) { + s_util.c[1] = *(u_int8_t *)w; + sum += s_util.s; + mlen = 0; + } else + + mlen = -1; + } else if (mlen == -1) + s_util.c[0] = *(u_int8_t *)w; + +cont: +#ifdef DEBUG + if (len) { + DEBUG_ERROR((dfd, "cksum: out of data\n")); + DEBUG_ERROR((dfd, " len = %d\n", len)); + } +#endif + if (mlen == -1) { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte may be shifted left by 8 bits + or not as determined by endian-ness of the machine) */ + s_util.c[1] = 0; + sum += s_util.s; + } + REDUCE; + return (~sum & 0xffff); +} diff --git a/slirp/ctl.h b/slirp/ctl.h new file mode 100644 index 000000000..4a8576dc1 --- /dev/null +++ b/slirp/ctl.h @@ -0,0 +1,7 @@ +#define CTL_CMD 0 +#define CTL_EXEC 1 +#define CTL_ALIAS 2 +#define CTL_DNS 3 + +#define CTL_SPECIAL "10.0.2.0" +#define CTL_LOCAL "10.0.2.15" diff --git a/slirp/debug.c b/slirp/debug.c new file mode 100644 index 000000000..d3d8c5796 --- /dev/null +++ b/slirp/debug.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * Portions copyright (c) 2000 Kelly Price. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> + +FILE *dfd = NULL; +#ifdef DEBUG +int dostats = 1; +#else +int dostats = 0; +#endif +int slirp_debug = 0; + +extern char *strerror _P((int)); + +/* Carry over one item from main.c so that the tty's restored. + * Only done when the tty being used is /dev/tty --RedWolf */ +extern struct termios slirp_tty_settings; +extern int slirp_tty_restore; + + +void +debug_init(file, dbg) + char *file; + int dbg; +{ + /* Close the old debugging file */ + if (dfd) + fclose(dfd); + + dfd = fopen(file,"w"); + if (dfd != NULL) { +#if 0 + fprintf(dfd,"Slirp %s - Debugging Started.\n", SLIRP_VERSION); +#endif + fprintf(dfd,"Debugging Started level %i.\r\n",dbg); + fflush(dfd); + slirp_debug = dbg; + } else { + lprint("Error: Debugging file \"%s\" could not be opened: %s\r\n", + file, strerror(errno)); + } +} + +/* + * Dump a packet in the same format as tcpdump -x + */ +#ifdef DEBUG +void +dump_packet(dat, n) + void *dat; + int n; +{ + u_char *pptr = (u_char *)dat; + int j,k; + + n /= 16; + n++; + DEBUG_MISC((dfd, "PACKET DUMPED: \n")); + for(j = 0; j < n; j++) { + for(k = 0; k < 6; k++) + DEBUG_MISC((dfd, "%02x ", *pptr++)); + DEBUG_MISC((dfd, "\n")); + fflush(dfd); + } +} +#endif + +#if 0 +/* + * Statistic routines + * + * These will print statistics to the screen, the debug file (dfd), or + * a buffer, depending on "type", so that the stats can be sent over + * the link as well. + */ + +void +ttystats(ttyp) + struct ttys *ttyp; +{ + struct slirp_ifstats *is = &ttyp->ifstats; + char buff[512]; + + lprint(" \r\n"); + + if (if_comp & IF_COMPRESS) + strcpy(buff, "on"); + else if (if_comp & IF_NOCOMPRESS) + strcpy(buff, "off"); + else + strcpy(buff, "off (for now)"); + lprint("Unit %d:\r\n", ttyp->unit); + lprint(" using %s encapsulation (VJ compression is %s)\r\n", ( +#ifdef USE_PPP + ttyp->proto==PROTO_PPP?"PPP": +#endif + "SLIP"), buff); + lprint(" %d baudrate\r\n", ttyp->baud); + lprint(" interface is %s\r\n", ttyp->up?"up":"down"); + lprint(" using fd %d, guardian pid is %d\r\n", ttyp->fd, ttyp->pid); +#ifndef FULL_BOLT + lprint(" towrite is %d bytes\r\n", ttyp->towrite); +#endif + if (ttyp->zeros) + lprint(" %d zeros have been typed\r\n", ttyp->zeros); + else if (ttyp->ones) + lprint(" %d ones have been typed\r\n", ttyp->ones); + lprint("Interface stats:\r\n"); + lprint(" %6d output packets sent (%d bytes)\r\n", is->out_pkts, is->out_bytes); + lprint(" %6d output packets dropped (%d bytes)\r\n", is->out_errpkts, is->out_errbytes); + lprint(" %6d input packets received (%d bytes)\r\n", is->in_pkts, is->in_bytes); + lprint(" %6d input packets dropped (%d bytes)\r\n", is->in_errpkts, is->in_errbytes); + lprint(" %6d bad input packets\r\n", is->in_mbad); +} + +void +allttystats() +{ + struct ttys *ttyp; + + for (ttyp = ttys; ttyp; ttyp = ttyp->next) + ttystats(ttyp); +} +#endif + +void +ipstats() +{ + lprint(" \r\n"); + + lprint("IP stats:\r\n"); + lprint(" %6d total packets received (%d were unaligned)\r\n", + ipstat.ips_total, ipstat.ips_unaligned); + lprint(" %6d with incorrect version\r\n", ipstat.ips_badvers); + lprint(" %6d with bad header checksum\r\n", ipstat.ips_badsum); + lprint(" %6d with length too short (len < sizeof(iphdr))\r\n", ipstat.ips_tooshort); + lprint(" %6d with length too small (len < ip->len)\r\n", ipstat.ips_toosmall); + lprint(" %6d with bad header length\r\n", ipstat.ips_badhlen); + lprint(" %6d with bad packet length\r\n", ipstat.ips_badlen); + lprint(" %6d fragments received\r\n", ipstat.ips_fragments); + lprint(" %6d fragments dropped\r\n", ipstat.ips_fragdropped); + lprint(" %6d fragments timed out\r\n", ipstat.ips_fragtimeout); + lprint(" %6d packets reassembled ok\r\n", ipstat.ips_reassembled); + lprint(" %6d outgoing packets fragmented\r\n", ipstat.ips_fragmented); + lprint(" %6d total outgoing fragments\r\n", ipstat.ips_ofragments); + lprint(" %6d with bad protocol field\r\n", ipstat.ips_noproto); + lprint(" %6d total packets delivered\r\n", ipstat.ips_delivered); +} + +#if 0 +void +vjstats() +{ + lprint(" \r\n"); + + lprint("VJ compression stats:\r\n"); + + lprint(" %6d outbound packets (%d compressed)\r\n", + comp_s.sls_packets, comp_s.sls_compressed); + lprint(" %6d searches for connection stats (%d misses)\r\n", + comp_s.sls_searches, comp_s.sls_misses); + lprint(" %6d inbound uncompressed packets\r\n", comp_s.sls_uncompressedin); + lprint(" %6d inbound compressed packets\r\n", comp_s.sls_compressedin); + lprint(" %6d inbound unknown type packets\r\n", comp_s.sls_errorin); + lprint(" %6d inbound packets tossed due to error\r\n", comp_s.sls_tossed); +} +#endif + +void +tcpstats() +{ + lprint(" \r\n"); + + lprint("TCP stats:\r\n"); + + lprint(" %6d packets sent\r\n", tcpstat.tcps_sndtotal); + lprint(" %6d data packets (%d bytes)\r\n", + tcpstat.tcps_sndpack, tcpstat.tcps_sndbyte); + lprint(" %6d data packets retransmitted (%d bytes)\r\n", + tcpstat.tcps_sndrexmitpack, tcpstat.tcps_sndrexmitbyte); + lprint(" %6d ack-only packets (%d delayed)\r\n", + tcpstat.tcps_sndacks, tcpstat.tcps_delack); + lprint(" %6d URG only packets\r\n", tcpstat.tcps_sndurg); + lprint(" %6d window probe packets\r\n", tcpstat.tcps_sndprobe); + lprint(" %6d window update packets\r\n", tcpstat.tcps_sndwinup); + lprint(" %6d control (SYN/FIN/RST) packets\r\n", tcpstat.tcps_sndctrl); + lprint(" %6d times tcp_output did nothing\r\n", tcpstat.tcps_didnuttin); + + lprint(" %6d packets received\r\n", tcpstat.tcps_rcvtotal); + lprint(" %6d acks (for %d bytes)\r\n", + tcpstat.tcps_rcvackpack, tcpstat.tcps_rcvackbyte); + lprint(" %6d duplicate acks\r\n", tcpstat.tcps_rcvdupack); + lprint(" %6d acks for unsent data\r\n", tcpstat.tcps_rcvacktoomuch); + lprint(" %6d packets received in sequence (%d bytes)\r\n", + tcpstat.tcps_rcvpack, tcpstat.tcps_rcvbyte); + lprint(" %6d completely duplicate packets (%d bytes)\r\n", + tcpstat.tcps_rcvduppack, tcpstat.tcps_rcvdupbyte); + + lprint(" %6d packets with some duplicate data (%d bytes duped)\r\n", + tcpstat.tcps_rcvpartduppack, tcpstat.tcps_rcvpartdupbyte); + lprint(" %6d out-of-order packets (%d bytes)\r\n", + tcpstat.tcps_rcvoopack, tcpstat.tcps_rcvoobyte); + lprint(" %6d packets of data after window (%d bytes)\r\n", + tcpstat.tcps_rcvpackafterwin, tcpstat.tcps_rcvbyteafterwin); + lprint(" %6d window probes\r\n", tcpstat.tcps_rcvwinprobe); + lprint(" %6d window update packets\r\n", tcpstat.tcps_rcvwinupd); + lprint(" %6d packets received after close\r\n", tcpstat.tcps_rcvafterclose); + lprint(" %6d discarded for bad checksums\r\n", tcpstat.tcps_rcvbadsum); + lprint(" %6d discarded for bad header offset fields\r\n", + tcpstat.tcps_rcvbadoff); + + lprint(" %6d connection requests\r\n", tcpstat.tcps_connattempt); + lprint(" %6d connection accepts\r\n", tcpstat.tcps_accepts); + lprint(" %6d connections established (including accepts)\r\n", tcpstat.tcps_connects); + lprint(" %6d connections closed (including %d drop)\r\n", + tcpstat.tcps_closed, tcpstat.tcps_drops); + lprint(" %6d embryonic connections dropped\r\n", tcpstat.tcps_conndrops); + lprint(" %6d segments we tried to get rtt (%d succeeded)\r\n", + tcpstat.tcps_segstimed, tcpstat.tcps_rttupdated); + lprint(" %6d retransmit timeouts\r\n", tcpstat.tcps_rexmttimeo); + lprint(" %6d connections dropped by rxmt timeout\r\n", + tcpstat.tcps_timeoutdrop); + lprint(" %6d persist timeouts\r\n", tcpstat.tcps_persisttimeo); + lprint(" %6d keepalive timeouts\r\n", tcpstat.tcps_keeptimeo); + lprint(" %6d keepalive probes sent\r\n", tcpstat.tcps_keepprobe); + lprint(" %6d connections dropped by keepalive\r\n", tcpstat.tcps_keepdrops); + lprint(" %6d correct ACK header predictions\r\n", tcpstat.tcps_predack); + lprint(" %6d correct data packet header predictions\n", tcpstat.tcps_preddat); + lprint(" %6d TCP cache misses\r\n", tcpstat.tcps_socachemiss); + + +/* lprint(" Packets received too short: %d\r\n", tcpstat.tcps_rcvshort); */ +/* lprint(" Segments dropped due to PAWS: %d\r\n", tcpstat.tcps_pawsdrop); */ + +} + +void +udpstats() +{ + lprint(" \r\n"); + + lprint("UDP stats:\r\n"); + lprint(" %6d datagrams received\r\n", udpstat.udps_ipackets); + lprint(" %6d with packets shorter than header\r\n", udpstat.udps_hdrops); + lprint(" %6d with bad checksums\r\n", udpstat.udps_badsum); + lprint(" %6d with data length larger than packet\r\n", udpstat.udps_badlen); + lprint(" %6d UDP socket cache misses\r\n", udpstat.udpps_pcbcachemiss); + lprint(" %6d datagrams sent\r\n", udpstat.udps_opackets); +} + +void +icmpstats() +{ + lprint(" \r\n"); + lprint("ICMP stats:\r\n"); + lprint(" %6d ICMP packets received\r\n", icmpstat.icps_received); + lprint(" %6d were too short\r\n", icmpstat.icps_tooshort); + lprint(" %6d with bad checksums\r\n", icmpstat.icps_checksum); + lprint(" %6d with type not supported\r\n", icmpstat.icps_notsupp); + lprint(" %6d with bad type feilds\r\n", icmpstat.icps_badtype); + lprint(" %6d ICMP packets sent in reply\r\n", icmpstat.icps_reflect); +} + +void +mbufstats() +{ + struct mbuf *m; + int i; + + lprint(" \r\n"); + + lprint("Mbuf stats:\r\n"); + + lprint(" %6d mbufs allocated (%d max)\r\n", mbuf_alloced, mbuf_max); + + i = 0; + for (m = m_freelist.m_next; m != &m_freelist; m = m->m_next) + i++; + lprint(" %6d mbufs on free list\r\n", i); + + i = 0; + for (m = m_usedlist.m_next; m != &m_usedlist; m = m->m_next) + i++; + lprint(" %6d mbufs on used list\r\n", i); + lprint(" %6d mbufs queued as packets\r\n\r\n", if_queued); +} + +void +sockstats() +{ + char buff[256]; + int n; + struct socket *so; + + lprint(" \r\n"); + + lprint( + "Proto[state] Sock Local Address, Port Remote Address, Port RecvQ SendQ\r\n"); + + for (so = tcb.so_next; so != &tcb; so = so->so_next) { + + n = sprintf(buff, "tcp[%s]", so->so_tcpcb?tcpstates[so->so_tcpcb->t_state]:"NONE"); + while (n < 17) + buff[n++] = ' '; + buff[17] = 0; + lprint("%s %3d %15s %5d ", + buff, so->s, + inet_ntoa(so->so_laddr), ntohs(so->so_lport)); + lprint("%15s %5d %5d %5d\r\n", + inet_ntoa(so->so_faddr), ntohs(so->so_fport), + so->so_rcv.sb_cc, so->so_snd.sb_cc); + } + + for (so = udb.so_next; so != &udb; so = so->so_next) { + + n = sprintf(buff, "udp[%d sec]", (so->so_expire - curtime) / 1000); + while (n < 17) + buff[n++] = ' '; + buff[17] = 0; + lprint("%s %3d %15s %5d ", + buff, so->s, + inet_ntoa(so->so_laddr), ntohs(so->so_lport)); + lprint("%15s %5d %5d %5d\r\n", + inet_ntoa(so->so_faddr), ntohs(so->so_fport), + so->so_rcv.sb_cc, so->so_snd.sb_cc); + } +} + +#if 0 +void +slirp_exit(exit_status) + int exit_status; +{ + struct ttys *ttyp; + + DEBUG_CALL("slirp_exit"); + DEBUG_ARG("exit_status = %d", exit_status); + + if (dostats) { + lprint_print = (int (*) _P((void *, const char *, va_list)))vfprintf; + if (!dfd) + debug_init("slirp_stats", 0xf); + lprint_arg = (char **)&dfd; + + ipstats(); + tcpstats(); + udpstats(); + icmpstats(); + mbufstats(); + sockstats(); + allttystats(); + vjstats(); + } + + for (ttyp = ttys; ttyp; ttyp = ttyp->next) + tty_detached(ttyp, 1); + + if (slirp_forked) { + /* Menendez time */ + if (kill(getppid(), SIGQUIT) < 0) + lprint("Couldn't kill parent process %ld!\n", + (long) getppid()); + } + + /* Restore the terminal if we gotta */ + if(slirp_tty_restore) + tcsetattr(0,TCSANOW, &slirp_tty_settings); /* NOW DAMMIT! */ + exit(exit_status); +} +#endif diff --git a/slirp/debug.h b/slirp/debug.h new file mode 100644 index 000000000..6e8444dab --- /dev/null +++ b/slirp/debug.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#define PRN_STDERR 1 +#define PRN_SPRINTF 2 + +extern FILE *dfd; +extern FILE *lfd; +extern int dostats; +extern int slirp_debug; + +#define DBG_CALL 0x1 +#define DBG_MISC 0x2 +#define DBG_ERROR 0x4 +#define DEBUG_DEFAULT DBG_CALL|DBG_MISC|DBG_ERROR + +#ifdef DEBUG +#define DEBUG_CALL(x) if (slirp_debug & DBG_CALL) { fprintf(dfd, "%s...\n", x); fflush(dfd); } +#define DEBUG_ARG(x, y) if (slirp_debug & DBG_CALL) { fputc(' ', dfd); fprintf(dfd, x, y); fputc('\n', dfd); fflush(dfd); } +#define DEBUG_ARGS(x) if (slirp_debug & DBG_CALL) { fprintf x ; fflush(dfd); } +#define DEBUG_MISC(x) if (slirp_debug & DBG_MISC) { fprintf x ; fflush(dfd); } +#define DEBUG_ERROR(x) if (slirp_debug & DBG_ERROR) {fprintf x ; fflush(dfd); } + + +#else + +#define DEBUG_CALL(x) +#define DEBUG_ARG(x, y) +#define DEBUG_ARGS(x) +#define DEBUG_MISC(x) +#define DEBUG_ERROR(x) + +#endif + +void debug_init _P((char *, int)); +//void ttystats _P((struct ttys *)); +void allttystats _P((void)); +void ipstats _P((void)); +void vjstats _P((void)); +void tcpstats _P((void)); +void udpstats _P((void)); +void icmpstats _P((void)); +void mbufstats _P((void)); +void sockstats _P((void)); +void slirp_exit _P((int)); + diff --git a/slirp/icmp_var.h b/slirp/icmp_var.h new file mode 100644 index 000000000..03fc8c3ac --- /dev/null +++ b/slirp/icmp_var.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)icmp_var.h 8.1 (Berkeley) 6/10/93 + * icmp_var.h,v 1.4 1995/02/16 00:27:40 wollman Exp + */ + +#ifndef _NETINET_ICMP_VAR_H_ +#define _NETINET_ICMP_VAR_H_ + +/* + * Variables related to this implementation + * of the internet control message protocol. + */ +struct icmpstat { +/* statistics related to input messages processed */ + u_long icps_received; /* #ICMP packets received */ + u_long icps_tooshort; /* packet < ICMP_MINLEN */ + u_long icps_checksum; /* bad checksum */ + u_long icps_notsupp; /* #ICMP packets not supported */ + u_long icps_badtype; /* #with bad type feild */ + u_long icps_reflect; /* number of responses */ +}; + +/* + * Names for ICMP sysctl objects + */ +#define ICMPCTL_MASKREPL 1 /* allow replies to netmask requests */ +#define ICMPCTL_STATS 2 /* statistics (read-only) */ +#define ICMPCTL_MAXID 3 + +#define ICMPCTL_NAMES { \ + { 0, 0 }, \ + { "maskrepl", CTLTYPE_INT }, \ + { "stats", CTLTYPE_STRUCT }, \ +} + +extern struct icmpstat icmpstat; + +#endif diff --git a/slirp/if.c b/slirp/if.c new file mode 100644 index 000000000..282b674c5 --- /dev/null +++ b/slirp/if.c @@ -0,0 +1,320 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> + +int if_mtu, if_mru; +int if_comp; +int if_maxlinkhdr; +int if_queued = 0; /* Number of packets queued so far */ +int if_thresh = 10; /* Number of packets queued before we start sending + * (to prevent allocing too many mbufs) */ + +struct mbuf if_fastq; /* fast queue (for interactive data) */ +struct mbuf if_batchq; /* queue for non-interactive data */ +struct mbuf *next_m; /* Pointer to next mbuf to output */ + +#define ifs_init(ifm) ((ifm)->ifs_next = (ifm)->ifs_prev = (ifm)) + +void +ifs_insque(ifm, ifmhead) + struct mbuf *ifm, *ifmhead; +{ + ifm->ifs_next = ifmhead->ifs_next; + ifmhead->ifs_next = ifm; + ifm->ifs_prev = ifmhead; + ifm->ifs_next->ifs_prev = ifm; +} + +void +ifs_remque(ifm) + struct mbuf *ifm; +{ + ifm->ifs_prev->ifs_next = ifm->ifs_next; + ifm->ifs_next->ifs_prev = ifm->ifs_prev; +} + +void +if_init() +{ +#if 0 + /* + * Set if_maxlinkhdr to 48 because it's 40 bytes for TCP/IP, + * and 8 bytes for PPP, but need to have it on an 8byte boundary + */ +#ifdef USE_PPP + if_maxlinkhdr = 48; +#else + if_maxlinkhdr = 40; +#endif +#else + /* 14 for ethernet + 40 */ + if_maxlinkhdr = 14 + 40; +#endif + if_mtu = 1500; + if_mru = 1500; + if_comp = IF_AUTOCOMP; + if_fastq.ifq_next = if_fastq.ifq_prev = &if_fastq; + if_batchq.ifq_next = if_batchq.ifq_prev = &if_batchq; + // sl_compress_init(&comp_s); + next_m = &if_batchq; +} + +#if 0 +/* + * This shouldn't be needed since the modem is blocking and + * we don't expect any signals, but what the hell.. + */ +inline int +writen(fd, bptr, n) + int fd; + char *bptr; + int n; +{ + int ret; + int total; + + /* This should succeed most of the time */ + ret = write(fd, bptr, n); + if (ret == n || ret <= 0) + return ret; + + /* Didn't write everything, go into the loop */ + total = ret; + while (n > total) { + ret = write(fd, bptr+total, n-total); + if (ret <= 0) + return ret; + total += ret; + } + return total; +} + +/* + * if_input - read() the tty, do "top level" processing (ie: check for any escapes), + * and pass onto (*ttyp->if_input) + * + * XXXXX Any zeros arriving by themselves are NOT placed into the arriving packet. + */ +#define INBUFF_SIZE 2048 /* XXX */ +void +if_input(ttyp) + struct ttys *ttyp; +{ + u_char if_inbuff[INBUFF_SIZE]; + int if_n; + + DEBUG_CALL("if_input"); + DEBUG_ARG("ttyp = %lx", (long)ttyp); + + if_n = read(ttyp->fd, (char *)if_inbuff, INBUFF_SIZE); + + DEBUG_MISC((dfd, " read %d bytes\n", if_n)); + + if (if_n <= 0) { + if (if_n == 0 || (errno != EINTR && errno != EAGAIN)) { + if (ttyp->up) + link_up--; + tty_detached(ttyp, 0); + } + return; + } + if (if_n == 1) { + if (*if_inbuff == '0') { + ttyp->ones = 0; + if (++ttyp->zeros >= 5) + slirp_exit(0); + return; + } + if (*if_inbuff == '1') { + ttyp->zeros = 0; + if (++ttyp->ones >= 5) + tty_detached(ttyp, 0); + return; + } + } + ttyp->ones = ttyp->zeros = 0; + + (*ttyp->if_input)(ttyp, if_inbuff, if_n); +} +#endif + +/* + * if_output: Queue packet into an output queue. + * There are 2 output queue's, if_fastq and if_batchq. + * Each output queue is a doubly linked list of double linked lists + * of mbufs, each list belonging to one "session" (socket). This + * way, we can output packets fairly by sending one packet from each + * session, instead of all the packets from one session, then all packets + * from the next session, etc. Packets on the if_fastq get absolute + * priority, but if one session hogs the link, it gets "downgraded" + * to the batchq until it runs out of packets, then it'll return + * to the fastq (eg. if the user does an ls -alR in a telnet session, + * it'll temporarily get downgraded to the batchq) + */ +void +if_output(so, ifm) + struct socket *so; + struct mbuf *ifm; +{ + struct mbuf *ifq; + int on_fastq = 1; + + DEBUG_CALL("if_output"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("ifm = %lx", (long)ifm); + + /* + * First remove the mbuf from m_usedlist, + * since we're gonna use m_next and m_prev ourselves + * XXX Shouldn't need this, gotta change dtom() etc. + */ + if (ifm->m_flags & M_USEDLIST) { + remque(ifm); + ifm->m_flags &= ~M_USEDLIST; + } + + /* + * See if there's already a batchq list for this session. + * This can include an interactive session, which should go on fastq, + * but gets too greedy... hence it'll be downgraded from fastq to batchq. + * We mustn't put this packet back on the fastq (or we'll send it out of order) + * XXX add cache here? + */ + for (ifq = if_batchq.ifq_prev; ifq != &if_batchq; ifq = ifq->ifq_prev) { + if (so == ifq->ifq_so) { + /* A match! */ + ifm->ifq_so = so; + ifs_insque(ifm, ifq->ifs_prev); + goto diddit; + } + } + + /* No match, check which queue to put it on */ + if (so && (so->so_iptos & IPTOS_LOWDELAY)) { + ifq = if_fastq.ifq_prev; + on_fastq = 1; + /* + * Check if this packet is a part of the last + * packet's session + */ + if (ifq->ifq_so == so) { + ifm->ifq_so = so; + ifs_insque(ifm, ifq->ifs_prev); + goto diddit; + } + } else + ifq = if_batchq.ifq_prev; + + /* Create a new doubly linked list for this session */ + ifm->ifq_so = so; + ifs_init(ifm); + insque(ifm, ifq); + +diddit: + ++if_queued; + + if (so) { + /* Update *_queued */ + so->so_queued++; + so->so_nqueued++; + /* + * Check if the interactive session should be downgraded to + * the batchq. A session is downgraded if it has queued 6 + * packets without pausing, and at least 3 of those packets + * have been sent over the link + * (XXX These are arbitrary numbers, probably not optimal..) + */ + if (on_fastq && ((so->so_nqueued >= 6) && + (so->so_nqueued - so->so_queued) >= 3)) { + + /* Remove from current queue... */ + remque(ifm->ifs_next); + + /* ...And insert in the new. That'll teach ya! */ + insque(ifm->ifs_next, &if_batchq); + } + } + +#ifndef FULL_BOLT + /* + * This prevents us from malloc()ing too many mbufs + */ + if (link_up) { + /* if_start will check towrite */ + if_start(); + } +#endif +} + +/* + * Send a packet + * We choose a packet based on it's position in the output queues; + * If there are packets on the fastq, they are sent FIFO, before + * everything else. Otherwise we choose the first packet from the + * batchq and send it. the next packet chosen will be from the session + * after this one, then the session after that one, and so on.. So, + * for example, if there are 3 ftp session's fighting for bandwidth, + * one packet will be sent from the first session, then one packet + * from the second session, then one packet from the third, then back + * to the first, etc. etc. + */ +void +if_start(void) +{ + struct mbuf *ifm, *ifqt; + + DEBUG_CALL("if_start"); + + if (if_queued == 0) + return; /* Nothing to do */ + + again: + /* check if we can really output */ + if (!slirp_can_output()) + return; + + /* + * See which queue to get next packet from + * If there's something in the fastq, select it immediately + */ + if (if_fastq.ifq_next != &if_fastq) { + ifm = if_fastq.ifq_next; + } else { + /* Nothing on fastq, see if next_m is valid */ + if (next_m != &if_batchq) + ifm = next_m; + else + ifm = if_batchq.ifq_next; + + /* Set which packet to send on next iteration */ + next_m = ifm->ifq_next; + } + /* Remove it from the queue */ + ifqt = ifm->ifq_prev; + remque(ifm); + --if_queued; + + /* If there are more packets for this session, re-queue them */ + if (ifm->ifs_next != /* ifm->ifs_prev != */ ifm) { + insque(ifm->ifs_next, ifqt); + ifs_remque(ifm); + } + + /* Update so_queued */ + if (ifm->ifq_so) { + if (--ifm->ifq_so->so_queued == 0) + /* If there's no more queued, reset nqueued */ + ifm->ifq_so->so_nqueued = 0; + } + + /* Encapsulate the packet for sending */ + if_encap(ifm->m_data, ifm->m_len); + + if (if_queued) + goto again; +} diff --git a/slirp/if.h b/slirp/if.h new file mode 100644 index 000000000..5d96a9034 --- /dev/null +++ b/slirp/if.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef _IF_H_ +#define _IF_H_ + +#define IF_COMPRESS 0x01 /* We want compression */ +#define IF_NOCOMPRESS 0x02 /* Do not do compression */ +#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ +#define IF_NOCIDCOMP 0x08 /* CID compression */ + +/* Needed for FreeBSD */ +#undef if_mtu +extern int if_mtu; +extern int if_mru; /* MTU and MRU */ +extern int if_comp; /* Flags for compression */ +extern int if_maxlinkhdr; +extern int if_queued; /* Number of packets queued so far */ +extern int if_thresh; /* Number of packets queued before we start sending + * (to prevent allocing too many mbufs) */ + +extern struct mbuf if_fastq; /* fast queue (for interactive data) */ +extern struct mbuf if_batchq; /* queue for non-interactive data */ +extern struct mbuf *next_m; + +#define ifs_init(ifm) ((ifm)->ifs_next = (ifm)->ifs_prev = (ifm)) + +/* Interface statistics */ +struct slirp_ifstats { + u_int out_pkts; /* Output packets */ + u_int out_bytes; /* Output bytes */ + u_int out_errpkts; /* Output Error Packets */ + u_int out_errbytes; /* Output Error Bytes */ + u_int in_pkts; /* Input packets */ + u_int in_bytes; /* Input bytes */ + u_int in_errpkts; /* Input Error Packets */ + u_int in_errbytes; /* Input Error Bytes */ + + u_int bytes_saved; /* Number of bytes that compression "saved" */ + /* ie: number of bytes that didn't need to be sent over the link + * because of compression */ + + u_int in_mbad; /* Bad incoming packets */ +}; + +#endif diff --git a/slirp/ip.h b/slirp/ip.h new file mode 100644 index 000000000..8280e55b8 --- /dev/null +++ b/slirp/ip.h @@ -0,0 +1,317 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip.h 8.1 (Berkeley) 6/10/93 + * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp + */ + +#ifndef _IP_H_ +#define _IP_H_ + +#ifdef WORDS_BIGENDIAN +# ifndef NTOHL +# define NTOHL(d) +# endif +# ifndef NTOHS +# define NTOHS(d) +# endif +# ifndef HTONL +# define HTONL(d) +# endif +# ifndef HTONS +# define HTONS(d) +# endif +#else +# ifndef NTOHL +# define NTOHL(d) ((d) = ntohl((d))) +# endif +# ifndef NTOHS +# define NTOHS(d) ((d) = ntohs((u_int16_t)(d))) +# endif +# ifndef HTONL +# define HTONL(d) ((d) = htonl((d))) +# endif +# ifndef HTONS +# define HTONS(d) ((d) = htons((u_int16_t)(d))) +# endif +#endif + +typedef u_int32_t n_long; /* long as received from the net */ + +/* + * Definitions for internet protocol version 4. + * Per RFC 791, September 1981. + */ +#define IPVERSION 4 + +/* + * Structure of an internet header, naked of options. + * + * We declare ip_len and ip_off to be short, rather than u_short + * pragmatically since otherwise unsigned comparisons can result + * against negative integers quite easily, and fail in subtle ways. + */ +struct ip { +#ifdef WORDS_BIGENDIAN + u_int ip_v:4, /* version */ + ip_hl:4; /* header length */ +#else + u_int ip_hl:4, /* header length */ + ip_v:4; /* version */ +#endif + u_int8_t ip_tos; /* type of service */ + int16_t ip_len; /* total length */ + u_int16_t ip_id; /* identification */ + int16_t ip_off; /* fragment offset field */ +#define IP_DF 0x4000 /* don't fragment flag */ +#define IP_MF 0x2000 /* more fragments flag */ +#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ + u_int8_t ip_ttl; /* time to live */ + u_int8_t ip_p; /* protocol */ + u_int16_t ip_sum; /* checksum */ + struct in_addr ip_src,ip_dst; /* source and dest address */ +}; + +#define IP_MAXPACKET 65535 /* maximum packet size */ + +/* + * Definitions for IP type of service (ip_tos) + */ +#define IPTOS_LOWDELAY 0x10 +#define IPTOS_THROUGHPUT 0x08 +#define IPTOS_RELIABILITY 0x04 + +/* + * Definitions for options. + */ +#define IPOPT_COPIED(o) ((o)&0x80) +#define IPOPT_CLASS(o) ((o)&0x60) +#define IPOPT_NUMBER(o) ((o)&0x1f) + +#define IPOPT_CONTROL 0x00 +#define IPOPT_RESERVED1 0x20 +#define IPOPT_DEBMEAS 0x40 +#define IPOPT_RESERVED2 0x60 + +#define IPOPT_EOL 0 /* end of option list */ +#define IPOPT_NOP 1 /* no operation */ + +#define IPOPT_RR 7 /* record packet route */ +#define IPOPT_TS 68 /* timestamp */ +#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ +#define IPOPT_LSRR 131 /* loose source route */ +#define IPOPT_SATID 136 /* satnet id */ +#define IPOPT_SSRR 137 /* strict source route */ + +/* + * Offsets to fields in options other than EOL and NOP. + */ +#define IPOPT_OPTVAL 0 /* option ID */ +#define IPOPT_OLEN 1 /* option length */ +#define IPOPT_OFFSET 2 /* offset within option */ +#define IPOPT_MINOFF 4 /* min value of above */ + +/* + * Time stamp option structure. + */ +struct ip_timestamp { + u_int8_t ipt_code; /* IPOPT_TS */ + u_int8_t ipt_len; /* size of structure (variable) */ + u_int8_t ipt_ptr; /* index of current entry */ +#ifdef WORDS_BIGENDIAN + u_int ipt_oflw:4, /* overflow counter */ + ipt_flg:4; /* flags, see below */ +#else + u_int ipt_flg:4, /* flags, see below */ + ipt_oflw:4; /* overflow counter */ +#endif + union ipt_timestamp { + n_long ipt_time[1]; + struct ipt_ta { + struct in_addr ipt_addr; + n_long ipt_time; + } ipt_ta[1]; + } ipt_timestamp; +}; + +/* flag bits for ipt_flg */ +#define IPOPT_TS_TSONLY 0 /* timestamps only */ +#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ +#define IPOPT_TS_PRESPEC 3 /* specified modules only */ + +/* bits for security (not byte swapped) */ +#define IPOPT_SECUR_UNCLASS 0x0000 +#define IPOPT_SECUR_CONFID 0xf135 +#define IPOPT_SECUR_EFTO 0x789a +#define IPOPT_SECUR_MMMM 0xbc4d +#define IPOPT_SECUR_RESTR 0xaf13 +#define IPOPT_SECUR_SECRET 0xd788 +#define IPOPT_SECUR_TOPSECRET 0x6bc5 + +/* + * Internet implementation parameters. + */ +#define MAXTTL 255 /* maximum time to live (seconds) */ +#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ +#define IPFRAGTTL 60 /* time to live for frags, slowhz */ +#define IPTTLDEC 1 /* subtracted when forwarding */ + +#define IP_MSS 576 /* default maximum segment size */ + +#ifdef HAVE_SYS_TYPES32_H /* Overcome some Solaris 2.x junk */ +#include <sys/types32.h> +#else +#if SIZEOF_CHAR_P == 4 +typedef caddr_t caddr32_t; +#else +typedef u_int32_t caddr32_t; +#endif +#endif + +#if SIZEOF_CHAR_P == 4 +typedef struct ipq *ipqp_32; +typedef struct ipasfrag *ipasfragp_32; +#else +typedef caddr32_t ipqp_32; +typedef caddr32_t ipasfragp_32; +#endif + +/* + * Overlay for ip header used by other protocols (tcp, udp). + */ +struct ipovly { + caddr32_t ih_next, ih_prev; /* for protocol sequence q's */ + u_int8_t ih_x1; /* (unused) */ + u_int8_t ih_pr; /* protocol */ + int16_t ih_len; /* protocol length */ + struct in_addr ih_src; /* source internet address */ + struct in_addr ih_dst; /* destination internet address */ +}; + +/* + * Ip reassembly queue structure. Each fragment + * being reassembled is attached to one of these structures. + * They are timed out after ipq_ttl drops to 0, and may also + * be reclaimed if memory becomes tight. + * size 28 bytes + */ +struct ipq { + ipqp_32 next,prev; /* to other reass headers */ + u_int8_t ipq_ttl; /* time for reass q to live */ + u_int8_t ipq_p; /* protocol of this fragment */ + u_int16_t ipq_id; /* sequence id for reassembly */ + ipasfragp_32 ipq_next,ipq_prev; + /* to ip headers of fragments */ + struct in_addr ipq_src,ipq_dst; +}; + +/* + * Ip header, when holding a fragment. + * + * Note: ipf_next must be at same offset as ipq_next above + */ +struct ipasfrag { +#ifdef WORDS_BIGENDIAN + u_int ip_v:4, + ip_hl:4; +#else + u_int ip_hl:4, + ip_v:4; +#endif + /* BUG : u_int changed to u_int8_t. + * sizeof(u_int)==4 on linux 2.0 + */ + u_int8_t ipf_mff; /* XXX overlays ip_tos: use low bit + * to avoid destroying tos (PPPDTRuu); + * copied from (ip_off&IP_MF) */ + int16_t ip_len; + u_int16_t ip_id; + int16_t ip_off; + u_int8_t ip_ttl; + u_int8_t ip_p; + u_int16_t ip_sum; + ipasfragp_32 ipf_next; /* next fragment */ + ipasfragp_32 ipf_prev; /* previous fragment */ +}; + +/* + * Structure stored in mbuf in inpcb.ip_options + * and passed to ip_output when ip options are in use. + * The actual length of the options (including ipopt_dst) + * is in m_len. + */ +#define MAX_IPOPTLEN 40 + +struct ipoption { + struct in_addr ipopt_dst; /* first-hop dst if source routed */ + int8_t ipopt_list[MAX_IPOPTLEN]; /* options proper */ +}; + +/* + * Structure attached to inpcb.ip_moptions and + * passed to ip_output when IP multicast options are in use. + */ + +struct ipstat { + u_long ips_total; /* total packets received */ + u_long ips_badsum; /* checksum bad */ + u_long ips_tooshort; /* packet too short */ + u_long ips_toosmall; /* not enough data */ + u_long ips_badhlen; /* ip header length < data size */ + u_long ips_badlen; /* ip length < ip header length */ + u_long ips_fragments; /* fragments received */ + u_long ips_fragdropped; /* frags dropped (dups, out of space) */ + u_long ips_fragtimeout; /* fragments timed out */ + u_long ips_forward; /* packets forwarded */ + u_long ips_cantforward; /* packets rcvd for unreachable dest */ + u_long ips_redirectsent; /* packets forwarded on same net */ + u_long ips_noproto; /* unknown or unsupported protocol */ + u_long ips_delivered; /* datagrams delivered to upper level*/ + u_long ips_localout; /* total ip packets generated here */ + u_long ips_odropped; /* lost packets due to nobufs, etc. */ + u_long ips_reassembled; /* total packets reassembled ok */ + u_long ips_fragmented; /* datagrams successfully fragmented */ + u_long ips_ofragments; /* output fragments created */ + u_long ips_cantfrag; /* don't fragment flag was set, etc. */ + u_long ips_badoptions; /* error in option processing */ + u_long ips_noroute; /* packets discarded due to no route */ + u_long ips_badvers; /* ip version != 4 */ + u_long ips_rawout; /* total raw ip packets generated */ + u_long ips_unaligned; /* times the ip packet was not aligned */ +}; + +extern struct ipstat ipstat; +extern struct ipq ipq; /* ip reass. queue */ +extern u_int16_t ip_id; /* ip packet ctr, for ids */ +extern int ip_defttl; /* default IP ttl */ + +#endif diff --git a/slirp/ip_icmp.c b/slirp/ip_icmp.c new file mode 100644 index 000000000..8bc97a078 --- /dev/null +++ b/slirp/ip_icmp.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 + * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp + */ + +#include "slirp.h" +#include "ip_icmp.h" + +struct icmpstat icmpstat; + +/* The message sent when emulating PING */ +/* Be nice and tell them it's just a psuedo-ping packet */ +char icmp_ping_msg[] = "This is a psuedo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST packets.\n"; + +/* list of actions for icmp_error() on RX of an icmp message */ +static int icmp_flush[19] = { +/* ECHO REPLY (0) */ 0, + 1, + 1, +/* DEST UNREACH (3) */ 1, +/* SOURCE QUENCH (4)*/ 1, +/* REDIRECT (5) */ 1, + 1, + 1, +/* ECHO (8) */ 0, +/* ROUTERADVERT (9) */ 1, +/* ROUTERSOLICIT (10) */ 1, +/* TIME EXCEEDED (11) */ 1, +/* PARAMETER PROBLEM (12) */ 1, +/* TIMESTAMP (13) */ 0, +/* TIMESTAMP REPLY (14) */ 0, +/* INFO (15) */ 0, +/* INFO REPLY (16) */ 0, +/* ADDR MASK (17) */ 0, +/* ADDR MASK REPLY (18) */ 0 +}; + +/* + * Process a received ICMP message. + */ +void +icmp_input(m, hlen) + struct mbuf *m; + int hlen; +{ + register struct icmp *icp; + register struct ip *ip=mtod(m, struct ip *); + int icmplen=ip->ip_len; + /* int code; */ + + DEBUG_CALL("icmp_input"); + DEBUG_ARG("m = %lx", (long )m); + DEBUG_ARG("m_len = %d", m->m_len); + + icmpstat.icps_received++; + + /* + * Locate icmp structure in mbuf, and check + * that its not corrupted and of at least minimum length. + */ + if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ + icmpstat.icps_tooshort++; + freeit: + m_freem(m); + goto end_error; + } + + m->m_len -= hlen; + m->m_data += hlen; + icp = mtod(m, struct icmp *); + if (cksum(m, icmplen)) { + icmpstat.icps_checksum++; + goto freeit; + } + m->m_len += hlen; + m->m_data -= hlen; + + /* icmpstat.icps_inhist[icp->icmp_type]++; */ + /* code = icp->icmp_code; */ + + DEBUG_ARG("icmp_type = %d", icp->icmp_type); + switch (icp->icmp_type) { + case ICMP_ECHO: + icp->icmp_type = ICMP_ECHOREPLY; + ip->ip_len += hlen; /* since ip_input subtracts this */ + if (ip->ip_dst.s_addr == our_addr.s_addr || + (ip->ip_dst.s_addr == (special_addr.s_addr|htonl(CTL_ALIAS))) ) { + icmp_reflect(m); + } else { + struct socket *so; + struct sockaddr_in addr; + if ((so = socreate()) == NULL) goto freeit; + if(udp_attach(so) == -1) { + DEBUG_MISC((dfd,"icmp_input udp_attach errno = %d-%s\n", + errno,strerror(errno))); + sofree(so); + m_free(m); + goto end_error; + } + so->so_m = m; + so->so_faddr = ip->ip_dst; + so->so_fport = htons(7); + so->so_laddr = ip->ip_src; + so->so_lport = htons(9); + so->so_iptos = ip->ip_tos; + so->so_type = IPPROTO_ICMP; + so->so_state = SS_ISFCONNECTED; + + /* Send the packet */ + addr.sin_family = AF_INET; + if ((so->so_faddr.s_addr & htonl(0xffffff00)) == special_addr.s_addr) { + /* It's an alias */ + switch(ntohl(so->so_faddr.s_addr) & 0xff) { + case CTL_DNS: + addr.sin_addr = dns_addr; + break; + case CTL_ALIAS: + default: + addr.sin_addr = loopback_addr; + break; + } + } else { + addr.sin_addr = so->so_faddr; + } + addr.sin_port = so->so_fport; + if(sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, + (struct sockaddr *)&addr, sizeof(addr)) == -1) { + DEBUG_MISC((dfd,"icmp_input udp sendto tx errno = %d-%s\n", + errno,strerror(errno))); + icmp_error(m, ICMP_UNREACH,ICMP_UNREACH_NET, 0,strerror(errno)); + udp_detach(so); + } + } /* if ip->ip_dst.s_addr == our_addr.s_addr */ + break; + case ICMP_UNREACH: + /* XXX? report error? close socket? */ + case ICMP_TIMXCEED: + case ICMP_PARAMPROB: + case ICMP_SOURCEQUENCH: + case ICMP_TSTAMP: + case ICMP_MASKREQ: + case ICMP_REDIRECT: + icmpstat.icps_notsupp++; + m_freem(m); + break; + + default: + icmpstat.icps_badtype++; + m_freem(m); + } /* swith */ + +end_error: + /* m is m_free()'d xor put in a socket xor or given to ip_send */ + return; +} + + +/* + * Send an ICMP message in response to a situation + * + * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. MAY send more (we do). + * MUST NOT change this header information. + * MUST NOT reply to a multicast/broadcast IP address. + * MUST NOT reply to a multicast/broadcast MAC address. + * MUST reply to only the first fragment. + */ +/* + * Send ICMP_UNREACH back to the source regarding msrc. + * mbuf *msrc is used as a template, but is NOT m_free()'d. + * It is reported as the bad ip packet. The header should + * be fully correct and in host byte order. + * ICMP fragmentation is illegal. All machines must accept 576 bytes in one + * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 + */ + +#define ICMP_MAXDATALEN (IP_MSS-28) +void +icmp_error(msrc, type, code, minsize, message) + struct mbuf *msrc; + u_char type; + u_char code; + int minsize; + char *message; +{ + unsigned hlen, shlen, s_ip_len; + register struct ip *ip; + register struct icmp *icp; + register struct mbuf *m; + + DEBUG_CALL("icmp_error"); + DEBUG_ARG("msrc = %lx", (long )msrc); + DEBUG_ARG("msrc_len = %d", msrc->m_len); + + if(type!=ICMP_UNREACH && type!=ICMP_TIMXCEED) goto end_error; + + /* check msrc */ + if(!msrc) goto end_error; + ip = mtod(msrc, struct ip *); +#if DEBUG + { char bufa[20], bufb[20]; + strcpy(bufa, inet_ntoa(ip->ip_src)); + strcpy(bufb, inet_ntoa(ip->ip_dst)); + DEBUG_MISC((dfd, " %.16s to %.16s\n", bufa, bufb)); + } +#endif + if(ip->ip_off & IP_OFFMASK) goto end_error; /* Only reply to fragment 0 */ + + shlen=ip->ip_hl << 2; + s_ip_len=ip->ip_len; + if(ip->ip_p == IPPROTO_ICMP) { + icp = (struct icmp *)((char *)ip + shlen); + /* + * Assume any unknown ICMP type is an error. This isn't + * specified by the RFC, but think about it.. + */ + if(icp->icmp_type>18 || icmp_flush[icp->icmp_type]) goto end_error; + } + + /* make a copy */ + if(!(m=m_get())) goto end_error; /* get mbuf */ + { int new_m_size; + new_m_size=sizeof(struct ip )+ICMP_MINLEN+msrc->m_len+ICMP_MAXDATALEN; + if(new_m_size>m->m_size) m_inc(m, new_m_size); + } + memcpy(m->m_data, msrc->m_data, msrc->m_len); + m->m_len = msrc->m_len; /* copy msrc to m */ + + /* make the header of the reply packet */ + ip = mtod(m, struct ip *); + hlen= sizeof(struct ip ); /* no options in reply */ + + /* fill in icmp */ + m->m_data += hlen; + m->m_len -= hlen; + + icp = mtod(m, struct icmp *); + + if(minsize) s_ip_len=shlen+ICMP_MINLEN; /* return header+8b only */ + else if(s_ip_len>ICMP_MAXDATALEN) /* maximum size */ + s_ip_len=ICMP_MAXDATALEN; + + m->m_len=ICMP_MINLEN+s_ip_len; /* 8 bytes ICMP header */ + + /* min. size = 8+sizeof(struct ip)+8 */ + + icp->icmp_type = type; + icp->icmp_code = code; + icp->icmp_id = 0; + icp->icmp_seq = 0; + + memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ + HTONS(icp->icmp_ip.ip_len); + HTONS(icp->icmp_ip.ip_id); + HTONS(icp->icmp_ip.ip_off); + +#if DEBUG + if(message) { /* DEBUG : append message to ICMP packet */ + int message_len; + char *cpnt; + message_len=strlen(message); + if(message_len>ICMP_MAXDATALEN) message_len=ICMP_MAXDATALEN; + cpnt=(char *)m->m_data+m->m_len; + memcpy(cpnt, message, message_len); + m->m_len+=message_len; + } +#endif + + icp->icmp_cksum = 0; + icp->icmp_cksum = cksum(m, m->m_len); + + m->m_data -= hlen; + m->m_len += hlen; + + /* fill in ip */ + ip->ip_hl = hlen >> 2; + ip->ip_len = m->m_len; + + ip->ip_tos=((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ + + ip->ip_ttl = MAXTTL; + ip->ip_p = IPPROTO_ICMP; + ip->ip_dst = ip->ip_src; /* ip adresses */ + ip->ip_src = our_addr; + + (void ) ip_output((struct socket *)NULL, m); + + icmpstat.icps_reflect++; + +end_error: + return; +} +#undef ICMP_MAXDATALEN + +/* + * Reflect the ip packet back to the source + */ +void +icmp_reflect(m) + struct mbuf *m; +{ + register struct ip *ip = mtod(m, struct ip *); + int hlen = ip->ip_hl << 2; + int optlen = hlen - sizeof(struct ip ); + register struct icmp *icp; + + /* + * Send an icmp packet back to the ip level, + * after supplying a checksum. + */ + m->m_data += hlen; + m->m_len -= hlen; + icp = mtod(m, struct icmp *); + + icp->icmp_cksum = 0; + icp->icmp_cksum = cksum(m, ip->ip_len - hlen); + + m->m_data -= hlen; + m->m_len += hlen; + + /* fill in ip */ + if (optlen > 0) { + /* + * Strip out original options by copying rest of first + * mbuf's data back, and adjust the IP length. + */ + memmove((caddr_t)(ip + 1), (caddr_t)ip + hlen, + (unsigned )(m->m_len - hlen)); + hlen -= optlen; + ip->ip_hl = hlen >> 2; + ip->ip_len -= optlen; + m->m_len -= optlen; + } + + ip->ip_ttl = MAXTTL; + { /* swap */ + struct in_addr icmp_dst; + icmp_dst = ip->ip_dst; + ip->ip_dst = ip->ip_src; + ip->ip_src = icmp_dst; + } + + (void ) ip_output((struct socket *)NULL, m); + + icmpstat.icps_reflect++; +} diff --git a/slirp/ip_icmp.h b/slirp/ip_icmp.h new file mode 100644 index 000000000..7ddaaf8f3 --- /dev/null +++ b/slirp/ip_icmp.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 + * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp + */ + +#ifndef _NETINET_IP_ICMP_H_ +#define _NETINET_IP_ICMP_H_ + +/* + * Interface Control Message Protocol Definitions. + * Per RFC 792, September 1981. + */ + +typedef u_int32_t n_time; + +/* + * Structure of an icmp header. + */ +struct icmp { + u_char icmp_type; /* type of message, see below */ + u_char icmp_code; /* type sub code */ + u_short icmp_cksum; /* ones complement cksum of struct */ + union { + u_char ih_pptr; /* ICMP_PARAMPROB */ + struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ + struct ih_idseq { + u_short icd_id; + u_short icd_seq; + } ih_idseq; + int ih_void; + + /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ + struct ih_pmtu { + u_short ipm_void; + u_short ipm_nextmtu; + } ih_pmtu; + } icmp_hun; +#define icmp_pptr icmp_hun.ih_pptr +#define icmp_gwaddr icmp_hun.ih_gwaddr +#define icmp_id icmp_hun.ih_idseq.icd_id +#define icmp_seq icmp_hun.ih_idseq.icd_seq +#define icmp_void icmp_hun.ih_void +#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void +#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu + union { + struct id_ts { + n_time its_otime; + n_time its_rtime; + n_time its_ttime; + } id_ts; + struct id_ip { + struct ip idi_ip; + /* options and then 64 bits of data */ + } id_ip; + u_long id_mask; + char id_data[1]; + } icmp_dun; +#define icmp_otime icmp_dun.id_ts.its_otime +#define icmp_rtime icmp_dun.id_ts.its_rtime +#define icmp_ttime icmp_dun.id_ts.its_ttime +#define icmp_ip icmp_dun.id_ip.idi_ip +#define icmp_mask icmp_dun.id_mask +#define icmp_data icmp_dun.id_data +}; + +/* + * Lower bounds on packet lengths for various types. + * For the error advice packets must first insure that the + * packet is large enought to contain the returned ip header. + * Only then can we do the check to see if 64 bits of packet + * data have been returned, since we need to check the returned + * ip header length. + */ +#define ICMP_MINLEN 8 /* abs minimum */ +#define ICMP_TSLEN (8 + 3 * sizeof (n_time)) /* timestamp */ +#define ICMP_MASKLEN 12 /* address mask */ +#define ICMP_ADVLENMIN (8 + sizeof (struct ip) + 8) /* min */ +#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) + /* N.B.: must separately check that ip_hl >= 5 */ + +/* + * Definition of type and code field values. + */ +#define ICMP_ECHOREPLY 0 /* echo reply */ +#define ICMP_UNREACH 3 /* dest unreachable, codes: */ +#define ICMP_UNREACH_NET 0 /* bad net */ +#define ICMP_UNREACH_HOST 1 /* bad host */ +#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ +#define ICMP_UNREACH_PORT 3 /* bad port */ +#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ +#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ +#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ +#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ +#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ +#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ +#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ +#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ +#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ +#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ +#define ICMP_REDIRECT 5 /* shorter route, codes: */ +#define ICMP_REDIRECT_NET 0 /* for network */ +#define ICMP_REDIRECT_HOST 1 /* for host */ +#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ +#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ +#define ICMP_ECHO 8 /* echo service */ +#define ICMP_ROUTERADVERT 9 /* router advertisement */ +#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ +#define ICMP_TIMXCEED 11 /* time exceeded, code: */ +#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ +#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ +#define ICMP_PARAMPROB 12 /* ip header bad */ +#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ +#define ICMP_TSTAMP 13 /* timestamp request */ +#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ +#define ICMP_IREQ 15 /* information request */ +#define ICMP_IREQREPLY 16 /* information reply */ +#define ICMP_MASKREQ 17 /* address mask request */ +#define ICMP_MASKREPLY 18 /* address mask reply */ + +#define ICMP_MAXTYPE 18 + +#define ICMP_INFOTYPE(type) \ + ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ + (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ + (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ + (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ + (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) + +void icmp_input _P((struct mbuf *, int)); +void icmp_error _P((struct mbuf *, u_char, u_char, int, char *)); +void icmp_reflect _P((struct mbuf *)); + +#endif diff --git a/slirp/ip_input.c b/slirp/ip_input.c new file mode 100644 index 000000000..74b922316 --- /dev/null +++ b/slirp/ip_input.c @@ -0,0 +1,697 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 + * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp + */ + +/* + * Changes and additions relating to SLiRP are + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> +#include "ip_icmp.h" + +int ip_defttl; +struct ipstat ipstat; +struct ipq ipq; + +/* + * IP initialization: fill in IP protocol switch table. + * All protocols not implemented in kernel go to raw IP protocol handler. + */ +void +ip_init() +{ + ipq.next = ipq.prev = (ipqp_32)&ipq; + ip_id = tt.tv_sec & 0xffff; + udp_init(); + tcp_init(); + ip_defttl = IPDEFTTL; +} + +/* + * Ip input routine. Checksum and byte swap header. If fragmented + * try to reassemble. Process options. Pass to next level. + */ +void +ip_input(m) + struct mbuf *m; +{ + register struct ip *ip; + int hlen; + + DEBUG_CALL("ip_input"); + DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("m_len = %d", m->m_len); + + ipstat.ips_total++; + + if (m->m_len < sizeof (struct ip)) { + ipstat.ips_toosmall++; + return; + } + + ip = mtod(m, struct ip *); + + if (ip->ip_v != IPVERSION) { + ipstat.ips_badvers++; + goto bad; + } + + hlen = ip->ip_hl << 2; + if (hlen<sizeof(struct ip ) || hlen>m->m_len) {/* min header length */ + ipstat.ips_badhlen++; /* or packet too short */ + goto bad; + } + + /* keep ip header intact for ICMP reply + * ip->ip_sum = cksum(m, hlen); + * if (ip->ip_sum) { + */ + if(cksum(m,hlen)) { + ipstat.ips_badsum++; + goto bad; + } + + /* + * Convert fields to host representation. + */ + NTOHS(ip->ip_len); + if (ip->ip_len < hlen) { + ipstat.ips_badlen++; + goto bad; + } + NTOHS(ip->ip_id); + NTOHS(ip->ip_off); + + /* + * Check that the amount of data in the buffers + * is as at least much as the IP header would have us expect. + * Trim mbufs if longer than we expect. + * Drop packet if shorter than we expect. + */ + if (m->m_len < ip->ip_len) { + ipstat.ips_tooshort++; + goto bad; + } + /* Should drop packet if mbuf too long? hmmm... */ + if (m->m_len > ip->ip_len) + m_adj(m, ip->ip_len - m->m_len); + + /* check ip_ttl for a correct ICMP reply */ + if(ip->ip_ttl==0 || ip->ip_ttl==1) { + icmp_error(m, ICMP_TIMXCEED,ICMP_TIMXCEED_INTRANS, 0,"ttl"); + goto bad; + } + + /* + * Process options and, if not destined for us, + * ship it on. ip_dooptions returns 1 when an + * error was detected (causing an icmp message + * to be sent and the original packet to be freed). + */ +/* We do no IP options */ +/* if (hlen > sizeof (struct ip) && ip_dooptions(m)) + * goto next; + */ + /* + * If offset or IP_MF are set, must reassemble. + * Otherwise, nothing need be done. + * (We could look in the reassembly queue to see + * if the packet was previously fragmented, + * but it's not worth the time; just let them time out.) + * + * XXX This should fail, don't fragment yet + */ + if (ip->ip_off &~ IP_DF) { + register struct ipq *fp; + /* + * Look for queue of fragments + * of this datagram. + */ + for (fp = (struct ipq *) ipq.next; fp != &ipq; + fp = (struct ipq *) fp->next) + if (ip->ip_id == fp->ipq_id && + ip->ip_src.s_addr == fp->ipq_src.s_addr && + ip->ip_dst.s_addr == fp->ipq_dst.s_addr && + ip->ip_p == fp->ipq_p) + goto found; + fp = 0; + found: + + /* + * Adjust ip_len to not reflect header, + * set ip_mff if more fragments are expected, + * convert offset of this to bytes. + */ + ip->ip_len -= hlen; + if (ip->ip_off & IP_MF) + ((struct ipasfrag *)ip)->ipf_mff |= 1; + else + ((struct ipasfrag *)ip)->ipf_mff &= ~1; + + ip->ip_off <<= 3; + + /* + * If datagram marked as having more fragments + * or if this is not the first fragment, + * attempt reassembly; if it succeeds, proceed. + */ + if (((struct ipasfrag *)ip)->ipf_mff & 1 || ip->ip_off) { + ipstat.ips_fragments++; + ip = ip_reass((struct ipasfrag *)ip, fp); + if (ip == 0) + return; + ipstat.ips_reassembled++; + m = dtom(ip); + } else + if (fp) + ip_freef(fp); + + } else + ip->ip_len -= hlen; + + /* + * Switch out to protocol's input routine. + */ + ipstat.ips_delivered++; + switch (ip->ip_p) { + case IPPROTO_TCP: + tcp_input(m, hlen, (struct socket *)NULL); + break; + case IPPROTO_UDP: + udp_input(m, hlen); + break; + case IPPROTO_ICMP: + icmp_input(m, hlen); + break; + default: + ipstat.ips_noproto++; + m_free(m); + } + return; +bad: + m_freem(m); + return; +} + +/* + * Take incoming datagram fragment and try to + * reassemble it into whole datagram. If a chain for + * reassembly of this datagram already exists, then it + * is given as fp; otherwise have to make a chain. + */ +struct ip * +ip_reass(ip, fp) + register struct ipasfrag *ip; + register struct ipq *fp; +{ + register struct mbuf *m = dtom(ip); + register struct ipasfrag *q; + int hlen = ip->ip_hl << 2; + int i, next; + + DEBUG_CALL("ip_reass"); + DEBUG_ARG("ip = %lx", (long)ip); + DEBUG_ARG("fp = %lx", (long)fp); + DEBUG_ARG("m = %lx", (long)m); + + /* + * Presence of header sizes in mbufs + * would confuse code below. + * Fragment m_data is concatenated. + */ + m->m_data += hlen; + m->m_len -= hlen; + + /* + * If first fragment to arrive, create a reassembly queue. + */ + if (fp == 0) { + struct mbuf *t; + if ((t = m_get()) == NULL) goto dropfrag; + fp = mtod(t, struct ipq *); + insque_32(fp, &ipq); + fp->ipq_ttl = IPFRAGTTL; + fp->ipq_p = ip->ip_p; + fp->ipq_id = ip->ip_id; + fp->ipq_next = fp->ipq_prev = (ipasfragp_32)fp; + fp->ipq_src = ((struct ip *)ip)->ip_src; + fp->ipq_dst = ((struct ip *)ip)->ip_dst; + q = (struct ipasfrag *)fp; + goto insert; + } + + /* + * Find a segment which begins after this one does. + */ + for (q = (struct ipasfrag *)fp->ipq_next; q != (struct ipasfrag *)fp; + q = (struct ipasfrag *)q->ipf_next) + if (q->ip_off > ip->ip_off) + break; + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if (q->ipf_prev != (ipasfragp_32)fp) { + i = ((struct ipasfrag *)(q->ipf_prev))->ip_off + + ((struct ipasfrag *)(q->ipf_prev))->ip_len - ip->ip_off; + if (i > 0) { + if (i >= ip->ip_len) + goto dropfrag; + m_adj(dtom(ip), i); + ip->ip_off += i; + ip->ip_len -= i; + } + } + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) { + i = (ip->ip_off + ip->ip_len) - q->ip_off; + if (i < q->ip_len) { + q->ip_len -= i; + q->ip_off += i; + m_adj(dtom(q), i); + break; + } + q = (struct ipasfrag *) q->ipf_next; + m_freem(dtom((struct ipasfrag *) q->ipf_prev)); + ip_deq((struct ipasfrag *) q->ipf_prev); + } + +insert: + /* + * Stick new segment in its place; + * check for complete reassembly. + */ + ip_enq(ip, (struct ipasfrag *) q->ipf_prev); + next = 0; + for (q = (struct ipasfrag *) fp->ipq_next; q != (struct ipasfrag *)fp; + q = (struct ipasfrag *) q->ipf_next) { + if (q->ip_off != next) + return (0); + next += q->ip_len; + } + if (((struct ipasfrag *)(q->ipf_prev))->ipf_mff & 1) + return (0); + + /* + * Reassembly is complete; concatenate fragments. + */ + q = (struct ipasfrag *) fp->ipq_next; + m = dtom(q); + + q = (struct ipasfrag *) q->ipf_next; + while (q != (struct ipasfrag *)fp) { + struct mbuf *t; + t = dtom(q); + m_cat(m, t); + q = (struct ipasfrag *) q->ipf_next; + } + + /* + * Create header for new ip packet by + * modifying header of first packet; + * dequeue and discard fragment reassembly header. + * Make header visible. + */ + ip = (struct ipasfrag *) fp->ipq_next; + + /* + * If the fragments concatenated to an mbuf that's + * bigger than the total size of the fragment, then and + * m_ext buffer was alloced. But fp->ipq_next points to + * the old buffer (in the mbuf), so we must point ip + * into the new buffer. + */ + if (m->m_flags & M_EXT) { + int delta; + delta = (char *)ip - m->m_dat; + ip = (struct ipasfrag *)(m->m_ext + delta); + } + + /* DEBUG_ARG("ip = %lx", (long)ip); + * ip=(struct ipasfrag *)m->m_data; */ + + ip->ip_len = next; + ip->ipf_mff &= ~1; + ((struct ip *)ip)->ip_src = fp->ipq_src; + ((struct ip *)ip)->ip_dst = fp->ipq_dst; + remque_32(fp); + (void) m_free(dtom(fp)); + m = dtom(ip); + m->m_len += (ip->ip_hl << 2); + m->m_data -= (ip->ip_hl << 2); + + return ((struct ip *)ip); + +dropfrag: + ipstat.ips_fragdropped++; + m_freem(m); + return (0); +} + +/* + * Free a fragment reassembly header and all + * associated datagrams. + */ +void +ip_freef(fp) + struct ipq *fp; +{ + register struct ipasfrag *q, *p; + + for (q = (struct ipasfrag *) fp->ipq_next; q != (struct ipasfrag *)fp; + q = p) { + p = (struct ipasfrag *) q->ipf_next; + ip_deq(q); + m_freem(dtom(q)); + } + remque_32(fp); + (void) m_free(dtom(fp)); +} + +/* + * Put an ip fragment on a reassembly chain. + * Like insque, but pointers in middle of structure. + */ +void +ip_enq(p, prev) + register struct ipasfrag *p, *prev; +{ + DEBUG_CALL("ip_enq"); + DEBUG_ARG("prev = %lx", (long)prev); + p->ipf_prev = (ipasfragp_32) prev; + p->ipf_next = prev->ipf_next; + ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = (ipasfragp_32) p; + prev->ipf_next = (ipasfragp_32) p; +} + +/* + * To ip_enq as remque is to insque. + */ +void +ip_deq(p) + register struct ipasfrag *p; +{ + ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; + ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; +} + +/* + * IP timer processing; + * if a timer expires on a reassembly + * queue, discard it. + */ +void +ip_slowtimo() +{ + register struct ipq *fp; + + DEBUG_CALL("ip_slowtimo"); + + fp = (struct ipq *) ipq.next; + if (fp == 0) + return; + + while (fp != &ipq) { + --fp->ipq_ttl; + fp = (struct ipq *) fp->next; + if (((struct ipq *)(fp->prev))->ipq_ttl == 0) { + ipstat.ips_fragtimeout++; + ip_freef((struct ipq *) fp->prev); + } + } +} + +/* + * Do option processing on a datagram, + * possibly discarding it if bad options are encountered, + * or forwarding it if source-routed. + * Returns 1 if packet has been forwarded/freed, + * 0 if the packet should be processed further. + */ + +#ifdef notdef + +int +ip_dooptions(m) + struct mbuf *m; +{ + register struct ip *ip = mtod(m, struct ip *); + register u_char *cp; + register struct ip_timestamp *ipt; + register struct in_ifaddr *ia; +/* int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; */ + int opt, optlen, cnt, off, code, type, forward = 0; + struct in_addr *sin, dst; +typedef u_int32_t n_time; + n_time ntime; + + dst = ip->ip_dst; + cp = (u_char *)(ip + 1); + cnt = (ip->ip_hl << 2) - sizeof (struct ip); + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[IPOPT_OPTVAL]; + if (opt == IPOPT_EOL) + break; + if (opt == IPOPT_NOP) + optlen = 1; + else { + optlen = cp[IPOPT_OLEN]; + if (optlen <= 0 || optlen > cnt) { + code = &cp[IPOPT_OLEN] - (u_char *)ip; + goto bad; + } + } + switch (opt) { + + default: + break; + + /* + * Source routing with record. + * Find interface with current destination address. + * If none on this machine then drop if strictly routed, + * or do nothing if loosely routed. + * Record interface address and bring up next address + * component. If strictly routed make sure next + * address is on directly accessible net. + */ + case IPOPT_LSRR: + case IPOPT_SSRR: + if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { + code = &cp[IPOPT_OFFSET] - (u_char *)ip; + goto bad; + } + ipaddr.sin_addr = ip->ip_dst; + ia = (struct in_ifaddr *) + ifa_ifwithaddr((struct sockaddr *)&ipaddr); + if (ia == 0) { + if (opt == IPOPT_SSRR) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_SRCFAIL; + goto bad; + } + /* + * Loose routing, and not at next destination + * yet; nothing to do except forward. + */ + break; + } + off--; / * 0 origin * / + if (off > optlen - sizeof(struct in_addr)) { + /* + * End of source route. Should be for us. + */ + save_rte(cp, ip->ip_src); + break; + } + /* + * locate outgoing interface + */ + bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr, + sizeof(ipaddr.sin_addr)); + if (opt == IPOPT_SSRR) { +#define INA struct in_ifaddr * +#define SA struct sockaddr * + if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0) + ia = (INA)ifa_ifwithnet((SA)&ipaddr); + } else + ia = ip_rtaddr(ipaddr.sin_addr); + if (ia == 0) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_SRCFAIL; + goto bad; + } + ip->ip_dst = ipaddr.sin_addr; + bcopy((caddr_t)&(IA_SIN(ia)->sin_addr), + (caddr_t)(cp + off), sizeof(struct in_addr)); + cp[IPOPT_OFFSET] += sizeof(struct in_addr); + /* + * Let ip_intr's mcast routing check handle mcast pkts + */ + forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr)); + break; + + case IPOPT_RR: + if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { + code = &cp[IPOPT_OFFSET] - (u_char *)ip; + goto bad; + } + /* + * If no space remains, ignore. + */ + off--; * 0 origin * + if (off > optlen - sizeof(struct in_addr)) + break; + bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr, + sizeof(ipaddr.sin_addr)); + /* + * locate outgoing interface; if we're the destination, + * use the incoming interface (should be same). + */ + if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 && + (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_HOST; + goto bad; + } + bcopy((caddr_t)&(IA_SIN(ia)->sin_addr), + (caddr_t)(cp + off), sizeof(struct in_addr)); + cp[IPOPT_OFFSET] += sizeof(struct in_addr); + break; + + case IPOPT_TS: + code = cp - (u_char *)ip; + ipt = (struct ip_timestamp *)cp; + if (ipt->ipt_len < 5) + goto bad; + if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) { + if (++ipt->ipt_oflw == 0) + goto bad; + break; + } + sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1); + switch (ipt->ipt_flg) { + + case IPOPT_TS_TSONLY: + break; + + case IPOPT_TS_TSANDADDR: + if (ipt->ipt_ptr + sizeof(n_time) + + sizeof(struct in_addr) > ipt->ipt_len) + goto bad; + ipaddr.sin_addr = dst; + ia = (INA)ifaof_ i f p foraddr((SA)&ipaddr, + m->m_pkthdr.rcvif); + if (ia == 0) + continue; + bcopy((caddr_t)&IA_SIN(ia)->sin_addr, + (caddr_t)sin, sizeof(struct in_addr)); + ipt->ipt_ptr += sizeof(struct in_addr); + break; + + case IPOPT_TS_PRESPEC: + if (ipt->ipt_ptr + sizeof(n_time) + + sizeof(struct in_addr) > ipt->ipt_len) + goto bad; + bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr, + sizeof(struct in_addr)); + if (ifa_ifwithaddr((SA)&ipaddr) == 0) + continue; + ipt->ipt_ptr += sizeof(struct in_addr); + break; + + default: + goto bad; + } + ntime = iptime(); + bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1, + sizeof(n_time)); + ipt->ipt_ptr += sizeof(n_time); + } + } + if (forward) { + ip_forward(m, 1); + return (1); + } + } + } + return (0); +bad: + /* ip->ip_len -= ip->ip_hl << 2; XXX icmp_error adds in hdr length */ + +/* Not yet */ + icmp_error(m, type, code, 0, 0); + + ipstat.ips_badoptions++; + return (1); +} + +#endif /* notdef */ + +/* + * Strip out IP options, at higher + * level protocol in the kernel. + * Second argument is buffer to which options + * will be moved, and return value is their length. + * (XXX) should be deleted; last arg currently ignored. + */ +void +ip_stripoptions(m, mopt) + register struct mbuf *m; + struct mbuf *mopt; +{ + register int i; + struct ip *ip = mtod(m, struct ip *); + register caddr_t opts; + int olen; + + olen = (ip->ip_hl<<2) - sizeof (struct ip); + opts = (caddr_t)(ip + 1); + i = m->m_len - (sizeof (struct ip) + olen); + memcpy(opts, opts + olen, (unsigned)i); + m->m_len -= olen; + + ip->ip_hl = sizeof(struct ip) >> 2; +} diff --git a/slirp/ip_output.c b/slirp/ip_output.c new file mode 100644 index 000000000..f3dc9b70e --- /dev/null +++ b/slirp/ip_output.c @@ -0,0 +1,205 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 + * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp + */ + +/* + * Changes and additions relating to SLiRP are + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> + +u_int16_t ip_id; + +/* + * IP output. The packet in mbuf chain m contains a skeletal IP + * header (with len, off, ttl, proto, tos, src, dst). + * The mbuf chain containing the packet will be freed. + * The mbuf opt, if present, will not be freed. + */ +int +ip_output(so, m0) + struct socket *so; + struct mbuf *m0; +{ + register struct ip *ip; + register struct mbuf *m = m0; + register int hlen = sizeof(struct ip ); + int len, off, error = 0; + + DEBUG_CALL("ip_output"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("m0 = %lx", (long)m0); + + /* We do no options */ +/* if (opt) { + * m = ip_insertoptions(m, opt, &len); + * hlen = len; + * } + */ + ip = mtod(m, struct ip *); + /* + * Fill in IP header. + */ + ip->ip_v = IPVERSION; + ip->ip_off &= IP_DF; + ip->ip_id = htons(ip_id++); + ip->ip_hl = hlen >> 2; + ipstat.ips_localout++; + + /* + * Verify that we have any chance at all of being able to queue + * the packet or packet fragments + */ + /* XXX Hmmm... */ +/* if (if_queued > if_thresh && towrite <= 0) { + * error = ENOBUFS; + * goto bad; + * } + */ + + /* + * If small enough for interface, can just send directly. + */ + if ((u_int16_t)ip->ip_len <= if_mtu) { + ip->ip_len = htons((u_int16_t)ip->ip_len); + ip->ip_off = htons((u_int16_t)ip->ip_off); + ip->ip_sum = 0; + ip->ip_sum = cksum(m, hlen); + + if_output(so, m); + goto done; + } + + /* + * Too large for interface; fragment if possible. + * Must be able to put at least 8 bytes per fragment. + */ + if (ip->ip_off & IP_DF) { + error = -1; + ipstat.ips_cantfrag++; + goto bad; + } + + len = (if_mtu - hlen) &~ 7; /* ip databytes per packet */ + if (len < 8) { + error = -1; + goto bad; + } + + { + int mhlen, firstlen = len; + struct mbuf **mnext = &m->m_nextpkt; + + /* + * Loop through length of segment after first fragment, + * make new header and copy data of each part and link onto chain. + */ + m0 = m; + mhlen = sizeof (struct ip); + for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) { + register struct ip *mhip; + m = m_get(); + if (m == 0) { + error = -1; + ipstat.ips_odropped++; + goto sendorfree; + } + m->m_data += if_maxlinkhdr; + mhip = mtod(m, struct ip *); + *mhip = *ip; + + /* No options */ +/* if (hlen > sizeof (struct ip)) { + * mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); + * mhip->ip_hl = mhlen >> 2; + * } + */ + m->m_len = mhlen; + mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); + if (ip->ip_off & IP_MF) + mhip->ip_off |= IP_MF; + if (off + len >= (u_int16_t)ip->ip_len) + len = (u_int16_t)ip->ip_len - off; + else + mhip->ip_off |= IP_MF; + mhip->ip_len = htons((u_int16_t)(len + mhlen)); + + if (m_copy(m, m0, off, len) < 0) { + error = -1; + goto sendorfree; + } + + mhip->ip_off = htons((u_int16_t)mhip->ip_off); + mhip->ip_sum = 0; + mhip->ip_sum = cksum(m, mhlen); + *mnext = m; + mnext = &m->m_nextpkt; + ipstat.ips_ofragments++; + } + /* + * Update first fragment by trimming what's been copied out + * and updating header, then send each fragment (in order). + */ + m = m0; + m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len); + ip->ip_len = htons((u_int16_t)m->m_len); + ip->ip_off = htons((u_int16_t)(ip->ip_off | IP_MF)); + ip->ip_sum = 0; + ip->ip_sum = cksum(m, hlen); +sendorfree: + for (m = m0; m; m = m0) { + m0 = m->m_nextpkt; + m->m_nextpkt = 0; + if (error == 0) + if_output(so, m); + else + m_freem(m); + } + + if (error == 0) + ipstat.ips_fragmented++; + } + +done: + return (error); + +bad: + m_freem(m0); + goto done; +} diff --git a/slirp/libslirp.h b/slirp/libslirp.h new file mode 100644 index 000000000..31ddaea4d --- /dev/null +++ b/slirp/libslirp.h @@ -0,0 +1,19 @@ +#ifndef _LIBSLIRP_H +#define _LIBSLIRP_H + +#include <sys/select.h> + +void slirp_init(void); + +void slirp_select_fill(int *pnfds, + fd_set *readfds, fd_set *writefds, fd_set *xfds); + +void slirp_select_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds); + +void slirp_input(const uint8_t *pkt, int pkt_len); + +/* you must provide the following functions: */ +int slirp_can_output(void); +void slirp_output(const uint8_t *pkt, int pkt_len); + +#endif diff --git a/slirp/main.h b/slirp/main.h new file mode 100644 index 000000000..dc06d6fe7 --- /dev/null +++ b/slirp/main.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifdef HAVE_SYS_SELECT_H +#include <sys/select.h> +#endif + +#define TOWRITEMAX 512 +#define min(x,y) ((x) < (y) ? (x) : (y)) + +extern struct timeval tt; +extern int link_up; +extern int slirp_socket; +extern int slirp_socket_unit; +extern int slirp_socket_port; +extern u_int32_t slirp_socket_addr; +extern char *slirp_socket_passwd; +extern int ctty_closed; + +/* + * Get the difference in 2 times from updtim() + * Allow for wraparound times, "just in case" + * x is the greater of the 2 (current time) and y is + * what it's being compared against. + */ +#define TIME_DIFF(x,y) (x)-(y) < 0 ? ~0-(y)+(x) : (x)-(y) + +extern char *slirp_tty; +extern char *exec_shell; +extern u_int curtime; +extern fd_set *global_readfds, *global_writefds, *global_xfds; +extern struct in_addr ctl_addr; +extern struct in_addr special_addr; +extern struct in_addr our_addr; +extern struct in_addr loopback_addr; +extern struct in_addr dns_addr; +extern char *username; +extern char *socket_path; +extern int towrite_max; +extern int ppp_exit; +extern int so_options; +extern int tcp_keepintvl; +extern uint8_t client_ethaddr[6]; + +#define PROTO_SLIP 0x1 +#ifdef USE_PPP +#define PROTO_PPP 0x2 +#endif + +void if_encap(const uint8_t *ip_data, int ip_data_len); diff --git a/slirp/mbuf.c b/slirp/mbuf.c new file mode 100644 index 000000000..fa36d8950 --- /dev/null +++ b/slirp/mbuf.c @@ -0,0 +1,245 @@ +/* + * Copyright (c) 1995 Danny Gasparovski + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +/* + * mbuf's in SLiRP are much simpler than the real mbufs in + * FreeBSD. They are fixed size, determined by the MTU, + * so that one whole packet can fit. Mbuf's cannot be + * chained together. If there's more data than the mbuf + * could hold, an external malloced buffer is pointed to + * by m_ext (and the data pointers) and M_EXT is set in + * the flags + */ + +#include <slirp.h> + +struct mbuf *mbutl; +char *mclrefcnt; +int mbuf_alloced = 0; +struct mbuf m_freelist, m_usedlist; +int mbuf_thresh = 30; +int mbuf_max = 0; +int msize; + +void +m_init() +{ + m_freelist.m_next = m_freelist.m_prev = &m_freelist; + m_usedlist.m_next = m_usedlist.m_prev = &m_usedlist; + msize_init(); +} + +void +msize_init() +{ + /* + * Find a nice value for msize + * XXX if_maxlinkhdr already in mtu + */ + msize = (if_mtu>if_mru?if_mtu:if_mru) + + if_maxlinkhdr + sizeof(struct m_hdr ) + 6; +} + +/* + * Get an mbuf from the free list, if there are none + * malloc one + * + * Because fragmentation can occur if we alloc new mbufs and + * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, + * which tells m_free to actually free() it + */ +struct mbuf * +m_get() +{ + register struct mbuf *m; + int flags = 0; + + DEBUG_CALL("m_get"); + + if (m_freelist.m_next == &m_freelist) { + m = (struct mbuf *)malloc(msize); + if (m == NULL) goto end_error; + mbuf_alloced++; + if (mbuf_alloced > mbuf_thresh) + flags = M_DOFREE; + if (mbuf_alloced > mbuf_max) + mbuf_max = mbuf_alloced; + } else { + m = m_freelist.m_next; + remque(m); + } + + /* Insert it in the used list */ + insque(m,&m_usedlist); + m->m_flags = (flags | M_USEDLIST); + + /* Initialise it */ + m->m_size = msize - sizeof(struct m_hdr); + m->m_data = m->m_dat; + m->m_len = 0; + m->m_nextpkt = 0; + m->m_prevpkt = 0; +end_error: + DEBUG_ARG("m = %lx", (long )m); + return m; +} + +void +m_free(m) + struct mbuf *m; +{ + + DEBUG_CALL("m_free"); + DEBUG_ARG("m = %lx", (long )m); + + if(m) { + /* Remove from m_usedlist */ + if (m->m_flags & M_USEDLIST) + remque(m); + + /* If it's M_EXT, free() it */ + if (m->m_flags & M_EXT) + free(m->m_ext); + + /* + * Either free() it or put it on the free list + */ + if (m->m_flags & M_DOFREE) { + free(m); + mbuf_alloced--; + } else if ((m->m_flags & M_FREELIST) == 0) { + insque(m,&m_freelist); + m->m_flags = M_FREELIST; /* Clobber other flags */ + } + } /* if(m) */ +} + +/* + * Copy data from one mbuf to the end of + * the other.. if result is too big for one mbuf, malloc() + * an M_EXT data segment + */ +void +m_cat(m, n) + register struct mbuf *m, *n; +{ + /* + * If there's no room, realloc + */ + if (M_FREEROOM(m) < n->m_len) + m_inc(m,m->m_size+MINCSIZE); + + memcpy(m->m_data+m->m_len, n->m_data, n->m_len); + m->m_len += n->m_len; + + m_free(n); +} + + +/* make m size bytes large */ +void +m_inc(m, size) + struct mbuf *m; + int size; +{ + /* some compiles throw up on gotos. This one we can fake. */ + if(m->m_size>size) return; + + if (m->m_flags & M_EXT) { + /* datasize = m->m_data - m->m_ext; */ + m->m_ext = (char *)realloc(m->m_ext,size); +/* if (m->m_ext == NULL) + * return (struct mbuf *)NULL; + */ + /* m->m_data = m->m_ext + datasize; */ + } else { + int datasize; + char *dat; + datasize = m->m_data - m->m_dat; + dat = (char *)malloc(size); +/* if (dat == NULL) + * return (struct mbuf *)NULL; + */ + memcpy(dat, m->m_dat, m->m_size); + + m->m_ext = dat; + m->m_data = m->m_ext + datasize; + m->m_flags |= M_EXT; + } + + m->m_size = size; + +} + + + +void +m_adj(m, len) + struct mbuf *m; + int len; +{ + if (m == NULL) + return; + if (len >= 0) { + /* Trim from head */ + m->m_data += len; + m->m_len -= len; + } else { + /* Trim from tail */ + len = -len; + m->m_len -= len; + } +} + + +/* + * Copy len bytes from m, starting off bytes into n + */ +int +m_copy(n, m, off, len) + struct mbuf *n, *m; + int off, len; +{ + if (len > M_FREEROOM(n)) + return -1; + + memcpy((n->m_data + n->m_len), (m->m_data + off), len); + n->m_len += len; + return 0; +} + + +/* + * Given a pointer into an mbuf, return the mbuf + * XXX This is a kludge, I should eliminate the need for it + * Fortunately, it's not used often + */ +struct mbuf * +dtom(dat) + void *dat; +{ + struct mbuf *m; + + DEBUG_CALL("dtom"); + DEBUG_ARG("dat = %lx", (long )dat); + + /* bug corrected for M_EXT buffers */ + for (m = m_usedlist.m_next; m != &m_usedlist; m = m->m_next) { + if (m->m_flags & M_EXT) { + if( (char *)dat>=m->m_ext && (char *)dat<(m->m_ext + m->m_size) ) + return m; + } else { + if( (char *)dat >= m->m_dat && (char *)dat<(m->m_dat + m->m_size) ) + return m; + } + } + + DEBUG_ERROR((dfd, "dtom failed")); + + return (struct mbuf *)0; +} + diff --git a/slirp/mbuf.h b/slirp/mbuf.h new file mode 100644 index 000000000..8cc292bbf --- /dev/null +++ b/slirp/mbuf.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 + * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp + */ + +#ifndef _MBUF_H_ +#define _MBUF_H_ + +#define m_freem m_free + + +#define MINCSIZE 4096 /* Amount to increase mbuf if too small */ + +/* + * Macros for type conversion + * mtod(m,t) - convert mbuf pointer to data pointer of correct type + * dtom(x) - convert data pointer within mbuf to mbuf pointer (XXX) + */ +#define mtod(m,t) ((t)(m)->m_data) +/* #define dtom(x) ((struct mbuf *)((int)(x) & ~(M_SIZE-1))) */ + +/* XXX About mbufs for slirp: + * Only one mbuf is ever used in a chain, for each "cell" of data. + * m_nextpkt points to the next packet, if fragmented. + * If the data is too large, the M_EXT is used, and a larger block + * is alloced. Therefore, m_free[m] must check for M_EXT and if set + * free the m_ext. This is inefficient memory-wise, but who cares. + */ + +/* XXX should union some of these! */ +/* header at beginning of each mbuf: */ +struct m_hdr { + struct mbuf *mh_next; /* Linked list of mbufs */ + struct mbuf *mh_prev; + struct mbuf *mh_nextpkt; /* Next packet in queue/record */ + struct mbuf *mh_prevpkt; /* Flags aren't used in the output queue */ + int mh_flags; /* Misc flags */ + + int mh_size; /* Size of data */ + struct socket *mh_so; + + caddr_t mh_data; /* Location of data */ + int mh_len; /* Amount of data in this mbuf */ +}; + +/* + * How much room is in the mbuf, from m_data to the end of the mbuf + */ +#define M_ROOM(m) ((m->m_flags & M_EXT)? \ + (((m)->m_ext + (m)->m_size) - (m)->m_data) \ + : \ + (((m)->m_dat + (m)->m_size) - (m)->m_data)) + +/* + * How much free room there is + */ +#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) +#define M_TRAILINGSPACE M_FREEROOM + +struct mbuf { + struct m_hdr m_hdr; + union M_dat { + char m_dat_[1]; /* ANSI don't like 0 sized arrays */ + char *m_ext_; + } M_dat; +}; + +#define m_next m_hdr.mh_next +#define m_prev m_hdr.mh_prev +#define m_nextpkt m_hdr.mh_nextpkt +#define m_prevpkt m_hdr.mh_prevpkt +#define m_flags m_hdr.mh_flags +#define m_len m_hdr.mh_len +#define m_data m_hdr.mh_data +#define m_size m_hdr.mh_size +#define m_dat M_dat.m_dat_ +#define m_ext M_dat.m_ext_ +#define m_so m_hdr.mh_so + +#define ifq_prev m_prev +#define ifq_next m_next +#define ifs_prev m_prevpkt +#define ifs_next m_nextpkt +#define ifq_so m_so + +#define M_EXT 0x01 /* m_ext points to more (malloced) data */ +#define M_FREELIST 0x02 /* mbuf is on free list */ +#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ +#define M_DOFREE 0x08 /* when m_free is called on the mbuf, free() + * it rather than putting it on the free list */ + +/* + * Mbuf statistics. XXX + */ + +struct mbstat { + int mbs_alloced; /* Number of mbufs allocated */ + +}; + +extern struct mbstat mbstat; +extern int mbuf_alloced; +extern struct mbuf m_freelist, m_usedlist; +extern int mbuf_max; + +void m_init _P((void)); +void msize_init _P((void)); +struct mbuf * m_get _P((void)); +void m_free _P((struct mbuf *)); +void m_cat _P((register struct mbuf *, register struct mbuf *)); +void m_inc _P((struct mbuf *, int)); +void m_adj _P((struct mbuf *, int)); +int m_copy _P((struct mbuf *, struct mbuf *, int, int)); +struct mbuf * dtom _P((void *)); + +#endif diff --git a/slirp/misc.c b/slirp/misc.c new file mode 100644 index 000000000..7f6448dff --- /dev/null +++ b/slirp/misc.c @@ -0,0 +1,925 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#define WANT_SYS_IOCTL_H +#include <slirp.h> + +u_int curtime, time_fasttimo, last_slowtimo, detach_time; +u_int detach_wait = 600000; /* 10 minutes */ + +#if 0 +int x_port = -1; +int x_display = 0; +int x_screen = 0; + +int +show_x(buff, inso) + char *buff; + struct socket *inso; +{ + if (x_port < 0) { + lprint("X Redir: X not being redirected.\r\n"); + } else { + lprint("X Redir: In sh/bash/zsh/etc. type: DISPLAY=%s:%d.%d; export DISPLAY\r\n", + inet_ntoa(our_addr), x_port, x_screen); + lprint("X Redir: In csh/tcsh/etc. type: setenv DISPLAY %s:%d.%d\r\n", + inet_ntoa(our_addr), x_port, x_screen); + if (x_display) + lprint("X Redir: Redirecting to display %d\r\n", x_display); + } + + return CFG_OK; +} + + +/* + * XXX Allow more than one X redirection? + */ +void +redir_x(inaddr, start_port, display, screen) + u_int32_t inaddr; + int start_port; + int display; + int screen; +{ + int i; + + if (x_port >= 0) { + lprint("X Redir: X already being redirected.\r\n"); + show_x(0, 0); + } else { + for (i = 6001 + (start_port-1); i <= 6100; i++) { + if (solisten(htons(i), inaddr, htons(6000 + display), 0)) { + /* Success */ + x_port = i - 6000; + x_display = display; + x_screen = screen; + show_x(0, 0); + return; + } + } + lprint("X Redir: Error: Couldn't redirect a port for X. Weird.\r\n"); + } +} +#endif + +#ifndef HAVE_INET_ATON +int +inet_aton(cp, ia) + const char *cp; + struct in_addr *ia; +{ + u_int32_t addr = inet_addr(cp); + if (addr == 0xffffffff) + return 0; + ia->s_addr = addr; + return 1; +} +#endif + +/* + * Get our IP address and put it in our_addr + */ +void +getouraddr() +{ + char buff[256]; + struct hostent *he; + + if (gethostname(buff,256) < 0) + return; + + if ((he = gethostbyname(buff)) == NULL) + return; + + our_addr = *(struct in_addr *)he->h_addr; +} + +#if SIZEOF_CHAR_P == 8 + +struct quehead_32 { + u_int32_t qh_link; + u_int32_t qh_rlink; +}; + +inline void +insque_32(a, b) + void *a; + void *b; +{ + register struct quehead_32 *element = (struct quehead_32 *) a; + register struct quehead_32 *head = (struct quehead_32 *) b; + element->qh_link = head->qh_link; + head->qh_link = (u_int32_t)element; + element->qh_rlink = (u_int32_t)head; + ((struct quehead_32 *)(element->qh_link))->qh_rlink + = (u_int32_t)element; +} + +inline void +remque_32(a) + void *a; +{ + register struct quehead_32 *element = (struct quehead_32 *) a; + ((struct quehead_32 *)(element->qh_link))->qh_rlink = element->qh_rlink; + ((struct quehead_32 *)(element->qh_rlink))->qh_link = element->qh_link; + element->qh_rlink = 0; +} + +#endif /* SIZEOF_CHAR_P == 8 */ + +struct quehead { + struct quehead *qh_link; + struct quehead *qh_rlink; +}; + +inline void +insque(a, b) + void *a, *b; +{ + register struct quehead *element = (struct quehead *) a; + register struct quehead *head = (struct quehead *) b; + element->qh_link = head->qh_link; + head->qh_link = (struct quehead *)element; + element->qh_rlink = (struct quehead *)head; + ((struct quehead *)(element->qh_link))->qh_rlink + = (struct quehead *)element; +} + +inline void +remque(a) + void *a; +{ + register struct quehead *element = (struct quehead *) a; + ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; + ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; + element->qh_rlink = NULL; + /* element->qh_link = NULL; TCP FIN1 crashes if you do this. Why ? */ +} + +/* #endif */ + + +int +add_exec(ex_ptr, do_pty, exec, addr, port) + struct ex_list **ex_ptr; + int do_pty; + char *exec; + int addr; + int port; +{ + struct ex_list *tmp_ptr; + + /* First, check if the port is "bound" */ + for (tmp_ptr = *ex_ptr; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { + if (port == tmp_ptr->ex_fport && addr == tmp_ptr->ex_addr) + return -1; + } + + tmp_ptr = *ex_ptr; + *ex_ptr = (struct ex_list *)malloc(sizeof(struct ex_list)); + (*ex_ptr)->ex_fport = port; + (*ex_ptr)->ex_addr = addr; + (*ex_ptr)->ex_pty = do_pty; + (*ex_ptr)->ex_exec = strdup(exec); + (*ex_ptr)->ex_next = tmp_ptr; + return 0; +} + +#ifndef HAVE_STRERROR + +/* + * For systems with no strerror + */ + +extern int sys_nerr; +extern char *sys_errlist[]; + +char * +strerror(error) + int error; +{ + if (error < sys_nerr) + return sys_errlist[error]; + else + return "Unknown error."; +} + +#endif + + +#if 0 +int +openpty(amaster, aslave) + int *amaster, *aslave; +{ + register int master, slave; + +#ifdef HAVE_GRANTPT + char *ptr; + + if ((master = open("/dev/ptmx", O_RDWR)) < 0 || + grantpt(master) < 0 || + unlockpt(master) < 0 || + (ptr = ptsname(master)) == NULL) { + close(master); + return -1; + } + + if ((slave = open(ptr, O_RDWR)) < 0 || + ioctl(slave, I_PUSH, "ptem") < 0 || + ioctl(slave, I_PUSH, "ldterm") < 0 || + ioctl(slave, I_PUSH, "ttcompat") < 0) { + close(master); + close(slave); + return -1; + } + + *amaster = master; + *aslave = slave; + return 0; + +#else + + static char line[] = "/dev/ptyXX"; + register const char *cp1, *cp2; + + for (cp1 = "pqrsPQRS"; *cp1; cp1++) { + line[8] = *cp1; + for (cp2 = "0123456789abcdefghijklmnopqrstuv"; *cp2; cp2++) { + line[9] = *cp2; + if ((master = open(line, O_RDWR, 0)) == -1) { + if (errno == ENOENT) + return (-1); /* out of ptys */ + } else { + line[5] = 't'; + /* These will fail */ + (void) chown(line, getuid(), 0); + (void) chmod(line, S_IRUSR|S_IWUSR|S_IWGRP); +#ifdef HAVE_REVOKE + (void) revoke(line); +#endif + if ((slave = open(line, O_RDWR, 0)) != -1) { + *amaster = master; + *aslave = slave; + return 0; + } + (void) close(master); + line[5] = 'p'; + } + } + } + errno = ENOENT; /* out of ptys */ + return (-1); +#endif +} + +/* + * XXX This is ugly + * We create and bind a socket, then fork off to another + * process, which connects to this socket, after which we + * exec the wanted program. If something (strange) happens, + * the accept() call could block us forever. + * + * do_pty = 0 Fork/exec inetd style + * do_pty = 1 Fork/exec using slirp.telnetd + * do_ptr = 2 Fork/exec using pty + */ +int +fork_exec(so, ex, do_pty) + struct socket *so; + char *ex; + int do_pty; +{ + int s; + struct sockaddr_in addr; + int addrlen = sizeof(addr); + int opt; + int master; + char *argv[256]; + char buff[256]; + /* don't want to clobber the original */ + char *bptr; + char *curarg; + int c, i; + + DEBUG_CALL("fork_exec"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("ex = %lx", (long)ex); + DEBUG_ARG("do_pty = %lx", (long)do_pty); + + if (do_pty == 2) { + if (openpty(&master, &s) == -1) { + lprint("Error: openpty failed: %s\n", strerror(errno)); + return 0; + } + } else { + addr.sin_family = AF_INET; + addr.sin_port = 0; + addr.sin_addr.s_addr = INADDR_ANY; + + if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0 || + bind(s, (struct sockaddr *)&addr, addrlen) < 0 || + listen(s, 1) < 0) { + lprint("Error: inet socket: %s\n", strerror(errno)); + close(s); + + return 0; + } + } + + switch(fork()) { + case -1: + lprint("Error: fork failed: %s\n", strerror(errno)); + close(s); + if (do_pty == 2) + close(master); + return 0; + + case 0: + /* Set the DISPLAY */ + if (do_pty == 2) { + (void) close(master); +#ifdef TIOCSCTTY /* XXXXX */ + (void) setsid(); + ioctl(s, TIOCSCTTY, (char *)NULL); +#endif + } else { + getsockname(s, (struct sockaddr *)&addr, &addrlen); + close(s); + /* + * Connect to the socket + * XXX If any of these fail, we're in trouble! + */ + s = socket(AF_INET, SOCK_STREAM, 0); + addr.sin_addr = loopback_addr; + connect(s, (struct sockaddr *)&addr, addrlen); + } + + if (x_port >= 0) { +#ifdef HAVE_SETENV + sprintf(buff, "%s:%d.%d", inet_ntoa(our_addr), x_port, x_screen); + setenv("DISPLAY", buff, 1); +#else + sprintf(buff, "DISPLAY=%s:%d.%d", inet_ntoa(our_addr), x_port, x_screen); + putenv(buff); +#endif + } + + dup2(s, 0); + dup2(s, 1); + dup2(s, 2); + for (s = 3; s <= 255; s++) + close(s); + + i = 0; + bptr = strdup(ex); /* No need to free() this */ + if (do_pty == 1) { + /* Setup "slirp.telnetd -x" */ + argv[i++] = "slirp.telnetd"; + argv[i++] = "-x"; + argv[i++] = bptr; + } else + do { + /* Change the string into argv[] */ + curarg = bptr; + while (*bptr != ' ' && *bptr != (char)0) + bptr++; + c = *bptr; + *bptr++ = (char)0; + argv[i++] = strdup(curarg); + } while (c); + + argv[i] = 0; + execvp(argv[0], argv); + + /* Ooops, failed, let's tell the user why */ + { + char buff[256]; + + sprintf(buff, "Error: execvp of %s failed: %s\n", + argv[0], strerror(errno)); + write(2, buff, strlen(buff)+1); + } + close(0); close(1); close(2); /* XXX */ + exit(1); + + default: + if (do_pty == 2) { + close(s); + so->s = master; + } else { + /* + * XXX this could block us... + * XXX Should set a timer here, and if accept() doesn't + * return after X seconds, declare it a failure + * The only reason this will block forever is if socket() + * of connect() fail in the child process + */ + so->s = accept(s, (struct sockaddr *)&addr, &addrlen); + close(s); + opt = 1; + setsockopt(so->s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int)); + opt = 1; + setsockopt(so->s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int)); + } + fd_nonblock(so->s); + + /* Append the telnet options now */ + if (so->so_m != 0 && do_pty == 1) { + sbappend(so, so->so_m); + so->so_m = 0; + } + + return 1; + } +} +#endif + +#ifndef HAVE_STRDUP +char * +strdup(str) + const char *str; +{ + char *bptr; + + bptr = (char *)malloc(strlen(str)+1); + strcpy(bptr, str); + + return bptr; +} +#endif + +#if 0 +void +snooze_hup(num) + int num; +{ + int s, ret; +#ifndef NO_UNIX_SOCKETS + struct sockaddr_un sock_un; +#endif + struct sockaddr_in sock_in; + char buff[256]; + + ret = -1; + if (slirp_socket_passwd) { + s = socket(AF_INET, SOCK_STREAM, 0); + if (s < 0) + slirp_exit(1); + sock_in.sin_family = AF_INET; + sock_in.sin_addr.s_addr = slirp_socket_addr; + sock_in.sin_port = htons(slirp_socket_port); + if (connect(s, (struct sockaddr *)&sock_in, sizeof(sock_in)) != 0) + slirp_exit(1); /* just exit...*/ + sprintf(buff, "kill %s:%d", slirp_socket_passwd, slirp_socket_unit); + write(s, buff, strlen(buff)+1); + } +#ifndef NO_UNIX_SOCKETS + else { + s = socket(AF_UNIX, SOCK_STREAM, 0); + if (s < 0) + slirp_exit(1); + sock_un.sun_family = AF_UNIX; + strcpy(sock_un.sun_path, socket_path); + if (connect(s, (struct sockaddr *)&sock_un, + sizeof(sock_un.sun_family) + sizeof(sock_un.sun_path)) != 0) + slirp_exit(1); + sprintf(buff, "kill none:%d", slirp_socket_unit); + write(s, buff, strlen(buff)+1); + } +#endif + slirp_exit(0); +} + + +void +snooze() +{ + sigset_t s; + int i; + + /* Don't need our data anymore */ + /* XXX This makes SunOS barf */ +/* brk(0); */ + + /* Close all fd's */ + for (i = 255; i >= 0; i--) + close(i); + + signal(SIGQUIT, slirp_exit); + signal(SIGHUP, snooze_hup); + sigemptyset(&s); + + /* Wait for any signal */ + sigsuspend(&s); + + /* Just in case ... */ + exit(255); +} + +void +relay(s) + int s; +{ + char buf[8192]; + int n; + fd_set readfds; + struct ttys *ttyp; + + /* Don't need our data anymore */ + /* XXX This makes SunOS barf */ +/* brk(0); */ + + signal(SIGQUIT, slirp_exit); + signal(SIGHUP, slirp_exit); + signal(SIGINT, slirp_exit); + signal(SIGTERM, slirp_exit); + + /* Fudge to get term_raw and term_restore to work */ + if (NULL == (ttyp = tty_attach (0, slirp_tty))) { + lprint ("Error: tty_attach failed in misc.c:relay()\r\n"); + slirp_exit (1); + } + ttyp->fd = 0; + ttyp->flags |= TTY_CTTY; + term_raw(ttyp); + + while (1) { + FD_ZERO(&readfds); + + FD_SET(0, &readfds); + FD_SET(s, &readfds); + + n = select(s+1, &readfds, (fd_set *)0, (fd_set *)0, (struct timeval *)0); + + if (n <= 0) + slirp_exit(0); + + if (FD_ISSET(0, &readfds)) { + n = read(0, buf, 8192); + if (n <= 0) + slirp_exit(0); + n = writen(s, buf, n); + if (n <= 0) + slirp_exit(0); + } + + if (FD_ISSET(s, &readfds)) { + n = read(s, buf, 8192); + if (n <= 0) + slirp_exit(0); + n = writen(0, buf, n); + if (n <= 0) + slirp_exit(0); + } + } + + /* Just in case.... */ + exit(1); +} +#endif + +int (*lprint_print) _P((void *, const char *, va_list)); +char *lprint_ptr, *lprint_ptr2, **lprint_arg; + +void +#ifdef __STDC__ +lprint(const char *format, ...) +#else +lprint(va_alist) va_dcl +#endif +{ + va_list args; + +#ifdef __STDC__ + va_start(args, format); +#else + char *format; + va_start(args); + format = va_arg(args, char *); +#endif +#if 0 + /* If we're printing to an sbuf, make sure there's enough room */ + /* XXX +100? */ + if (lprint_sb) { + if ((lprint_ptr - lprint_sb->sb_wptr) >= + (lprint_sb->sb_datalen - (strlen(format) + 100))) { + int deltaw = lprint_sb->sb_wptr - lprint_sb->sb_data; + int deltar = lprint_sb->sb_rptr - lprint_sb->sb_data; + int deltap = lprint_ptr - lprint_sb->sb_data; + + lprint_sb->sb_data = (char *)realloc(lprint_sb->sb_data, + lprint_sb->sb_datalen + TCP_SNDSPACE); + + /* Adjust all values */ + lprint_sb->sb_wptr = lprint_sb->sb_data + deltaw; + lprint_sb->sb_rptr = lprint_sb->sb_data + deltar; + lprint_ptr = lprint_sb->sb_data + deltap; + + lprint_sb->sb_datalen += TCP_SNDSPACE; + } + } +#endif + if (lprint_print) + lprint_ptr += (*lprint_print)(*lprint_arg, format, args); + + /* Check if they want output to be logged to file as well */ + if (lfd) { + /* + * Remove \r's + * otherwise you'll get ^M all over the file + */ + int len = strlen(format); + char *bptr1, *bptr2; + + bptr1 = bptr2 = strdup(format); + + while (len--) { + if (*bptr1 == '\r') + memcpy(bptr1, bptr1+1, len+1); + else + bptr1++; + } + vfprintf(lfd, bptr2, args); + free(bptr2); + } + va_end(args); +} + +void +add_emu(buff) + char *buff; +{ + u_int lport, fport; + u_int8_t tos = 0, emu = 0; + char buff1[256], buff2[256], buff4[128]; + char *buff3 = buff4; + struct emu_t *emup; + struct socket *so; + + if (sscanf(buff, "%256s %256s", buff2, buff1) != 2) { + lprint("Error: Bad arguments\r\n"); + return; + } + + if (sscanf(buff1, "%d:%d", &lport, &fport) != 2) { + lport = 0; + if (sscanf(buff1, "%d", &fport) != 1) { + lprint("Error: Bad first argument\r\n"); + return; + } + } + + if (sscanf(buff2, "%128[^:]:%128s", buff1, buff3) != 2) { + buff3 = 0; + if (sscanf(buff2, "%256s", buff1) != 1) { + lprint("Error: Bad second argument\r\n"); + return; + } + } + + if (buff3) { + if (strcmp(buff3, "lowdelay") == 0) + tos = IPTOS_LOWDELAY; + else if (strcmp(buff3, "throughput") == 0) + tos = IPTOS_THROUGHPUT; + else { + lprint("Error: Expecting \"lowdelay\"/\"throughput\"\r\n"); + return; + } + } + + if (strcmp(buff1, "ftp") == 0) + emu = EMU_FTP; + else if (strcmp(buff1, "irc") == 0) + emu = EMU_IRC; + else if (strcmp(buff1, "none") == 0) + emu = EMU_NONE; /* ie: no emulation */ + else { + lprint("Error: Unknown service\r\n"); + return; + } + + /* First, check that it isn't already emulated */ + for (emup = tcpemu; emup; emup = emup->next) { + if (emup->lport == lport && emup->fport == fport) { + lprint("Error: port already emulated\r\n"); + return; + } + } + + /* link it */ + emup = (struct emu_t *)malloc(sizeof (struct emu_t)); + emup->lport = (u_int16_t)lport; + emup->fport = (u_int16_t)fport; + emup->tos = tos; + emup->emu = emu; + emup->next = tcpemu; + tcpemu = emup; + + /* And finally, mark all current sessions, if any, as being emulated */ + for (so = tcb.so_next; so != &tcb; so = so->so_next) { + if ((lport && lport == ntohs(so->so_lport)) || + (fport && fport == ntohs(so->so_fport))) { + if (emu) + so->so_emu = emu; + if (tos) + so->so_iptos = tos; + } + } + + lprint("Adding emulation for %s to port %d/%d\r\n", buff1, emup->lport, emup->fport); +} + +#ifdef BAD_SPRINTF + +#undef vsprintf +#undef sprintf + +/* + * Some BSD-derived systems have a sprintf which returns char * + */ + +int +vsprintf_len(string, format, args) + char *string; + const char *format; + va_list args; +{ + vsprintf(string, format, args); + return strlen(string); +} + +int +#ifdef __STDC__ +sprintf_len(char *string, const char *format, ...) +#else +sprintf_len(va_alist) va_dcl +#endif +{ + va_list args; +#ifdef __STDC__ + va_start(args, format); +#else + char *string; + char *format; + va_start(args); + string = va_arg(args, char *); + format = va_arg(args, char *); +#endif + vsprintf(string, format, args); + return strlen(string); +} + +#endif + +void +u_sleep(usec) + int usec; +{ + struct timeval t; + fd_set fdset; + + FD_ZERO(&fdset); + + t.tv_sec = 0; + t.tv_usec = usec * 1000; + + select(0, &fdset, &fdset, &fdset, &t); +} + +/* + * Set fd blocking and non-blocking + */ + +void +fd_nonblock(fd) + int fd; +{ +#ifdef FIONBIO + int opt = 1; + + ioctl(fd, FIONBIO, &opt); +#else + int opt; + + opt = fcntl(fd, F_GETFL, 0); + opt |= O_NONBLOCK; + fcntl(fd, F_SETFL, opt); +#endif +} + +void +fd_block(fd) + int fd; +{ +#ifdef FIONBIO + int opt = 0; + + ioctl(fd, FIONBIO, &opt); +#else + int opt; + + opt = fcntl(fd, F_GETFL, 0); + opt &= ~O_NONBLOCK; + fcntl(fd, F_SETFL, opt); +#endif +} + + +#if 0 +/* + * invoke RSH + */ +int +rsh_exec(so,ns, user, host, args) + struct socket *so; + struct socket *ns; + char *user; + char *host; + char *args; +{ + int fd[2]; + int fd0[2]; + int s; + char buff[256]; + + DEBUG_CALL("rsh_exec"); + DEBUG_ARG("so = %lx", (long)so); + + if (pipe(fd)<0) { + lprint("Error: pipe failed: %s\n", strerror(errno)); + return 0; + } +/* #ifdef HAVE_SOCKETPAIR */ +#if 1 + if (socketpair(PF_UNIX,SOCK_STREAM,0, fd0) == -1) { + close(fd[0]); + close(fd[1]); + lprint("Error: openpty failed: %s\n", strerror(errno)); + return 0; + } +#else + if (openpty(&fd0[0], &fd0[1]) == -1) { + close(fd[0]); + close(fd[1]); + lprint("Error: openpty failed: %s\n", strerror(errno)); + return 0; + } +#endif + + switch(fork()) { + case -1: + lprint("Error: fork failed: %s\n", strerror(errno)); + close(fd[0]); + close(fd[1]); + close(fd0[0]); + close(fd0[1]); + return 0; + + case 0: + close(fd[0]); + close(fd0[0]); + + /* Set the DISPLAY */ + if (x_port >= 0) { +#ifdef HAVE_SETENV + sprintf(buff, "%s:%d.%d", inet_ntoa(our_addr), x_port, x_screen); + setenv("DISPLAY", buff, 1); +#else + sprintf(buff, "DISPLAY=%s:%d.%d", inet_ntoa(our_addr), x_port, x_screen); + putenv(buff); +#endif + } + + dup2(fd0[1], 0); + dup2(fd0[1], 1); + dup2(fd[1], 2); + for (s = 3; s <= 255; s++) + close(s); + + execlp("rsh","rsh","-l", user, host, args, NULL); + + /* Ooops, failed, let's tell the user why */ + + sprintf(buff, "Error: execlp of %s failed: %s\n", + "rsh", strerror(errno)); + write(2, buff, strlen(buff)+1); + close(0); close(1); close(2); /* XXX */ + exit(1); + + default: + close(fd[1]); + close(fd0[1]); + ns->s=fd[0]; + so->s=fd0[0]; + + return 1; + } +} +#endif diff --git a/slirp/misc.h b/slirp/misc.h new file mode 100644 index 000000000..8e2819b99 --- /dev/null +++ b/slirp/misc.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef _MISC_H_ +#define _MISC_H_ + +struct ex_list { + int ex_pty; /* Do we want a pty? */ + int ex_addr; /* The last byte of the address */ + int ex_fport; /* Port to telnet to */ + char *ex_exec; /* Command line of what to exec */ + struct ex_list *ex_next; +}; + +extern struct ex_list *exec_list; +extern u_int curtime, time_fasttimo, last_slowtimo, detach_time, detach_wait; + +extern int (*lprint_print) _P((void *, const char *, va_list)); +extern char *lprint_ptr, *lprint_ptr2, **lprint_arg; +extern struct sbuf *lprint_sb; + +#ifndef HAVE_STRDUP +char *strdup _P((const char *)); +#endif + +void do_wait _P((int)); + +#define EMU_NONE 0x0 + +/* TCP emulations */ +#define EMU_CTL 0x1 +#define EMU_FTP 0x2 +#define EMU_KSH 0x3 +#define EMU_IRC 0x4 +#define EMU_REALAUDIO 0x5 +#define EMU_RLOGIN 0x6 +#define EMU_IDENT 0x7 +#define EMU_RSH 0x8 + +#define EMU_NOCONNECT 0x10 /* Don't connect */ + +/* UDP emulations */ +#define EMU_TALK 0x1 +#define EMU_NTALK 0x2 +#define EMU_CUSEEME 0x3 + +struct tos_t { + u_int16_t lport; + u_int16_t fport; + u_int8_t tos; + u_int8_t emu; +}; + +struct emu_t { + u_int16_t lport; + u_int16_t fport; + u_int8_t tos; + u_int8_t emu; + struct emu_t *next; +}; + +extern struct emu_t *tcpemu; + +extern int x_port, x_server, x_display; + +int show_x _P((char *, struct socket *)); +void redir_x _P((u_int32_t, int, int, int)); +void getouraddr _P((void)); +inline void slirp_insque _P((void *, void *)); +inline void slirp_remque _P((void *)); +int add_exec _P((struct ex_list **, int, char *, int, int)); +int openpty _P((int *, int *)); +int fork_exec _P((struct socket *, char *, int)); +void snooze_hup _P((int)); +void snooze _P((void)); +void relay _P((int)); +void add_emu _P((char *)); +void u_sleep _P((int)); +void fd_nonblock _P((int)); +void fd_block _P((int)); +int rsh_exec _P((struct socket *, struct socket *, char *, char *, char *)); + +#endif diff --git a/slirp/sbuf.c b/slirp/sbuf.c new file mode 100644 index 000000000..04fb97ddc --- /dev/null +++ b/slirp/sbuf.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> + +/* Done as a macro in socket.h */ +/* int + * sbspace(struct sockbuff *sb) + * { + * return SB_DATALEN - sb->sb_cc; + * } + */ + +void +sbfree(sb) + struct sbuf *sb; +{ + free(sb->sb_data); +} + +void +sbdrop(sb, num) + struct sbuf *sb; + int num; +{ + /* + * We can only drop how much we have + * This should never succeed + */ + if(num > sb->sb_cc) + num = sb->sb_cc; + sb->sb_cc -= num; + sb->sb_rptr += num; + if(sb->sb_rptr >= sb->sb_data + sb->sb_datalen) + sb->sb_rptr -= sb->sb_datalen; + +} + +void +sbreserve(sb, size) + struct sbuf *sb; + int size; +{ + if (sb->sb_data) { + /* Already alloced, realloc if necessary */ + if (sb->sb_datalen != size) { + sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)realloc(sb->sb_data, size); + sb->sb_cc = 0; + if (sb->sb_wptr) + sb->sb_datalen = size; + else + sb->sb_datalen = 0; + } + } else { + sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)malloc(size); + sb->sb_cc = 0; + if (sb->sb_wptr) + sb->sb_datalen = size; + else + sb->sb_datalen = 0; + } +} + +/* + * Try and write() to the socket, whatever doesn't get written + * append to the buffer... for a host with a fast net connection, + * this prevents an unnecessary copy of the data + * (the socket is non-blocking, so we won't hang) + */ +void +sbappend(so, m) + struct socket *so; + struct mbuf *m; +{ + int ret = 0; + + DEBUG_CALL("sbappend"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("m->m_len = %d", m->m_len); + + /* Shouldn't happen, but... e.g. foreign host closes connection */ + if (m->m_len <= 0) { + m_free(m); + return; + } + + /* + * If there is urgent data, call sosendoob + * if not all was sent, sowrite will take care of the rest + * (The rest of this function is just an optimisation) + */ + if (so->so_urgc) { + sbappendsb(&so->so_rcv, m); + m_free(m); + sosendoob(so); + return; + } + + /* + * We only write if there's nothing in the buffer, + * ottherwise it'll arrive out of order, and hence corrupt + */ + if (!so->so_rcv.sb_cc) + ret = write(so->s, m->m_data, m->m_len); + + if (ret <= 0) { + /* + * Nothing was written + * It's possible that the socket has closed, but + * we don't need to check because if it has closed, + * it will be detected in the normal way by soread() + */ + sbappendsb(&so->so_rcv, m); + } else if (ret != m->m_len) { + /* + * Something was written, but not everything.. + * sbappendsb the rest + */ + m->m_len -= ret; + m->m_data += ret; + sbappendsb(&so->so_rcv, m); + } /* else */ + /* Whatever happened, we free the mbuf */ + m_free(m); +} + +/* + * Copy the data from m into sb + * The caller is responsible to make sure there's enough room + */ +void +sbappendsb(sb, m) + struct sbuf *sb; + struct mbuf *m; +{ + int len, n, nn; + + len = m->m_len; + + if (sb->sb_wptr < sb->sb_rptr) { + n = sb->sb_rptr - sb->sb_wptr; + if (n > len) n = len; + memcpy(sb->sb_wptr, m->m_data, n); + } else { + /* Do the right edge first */ + n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; + if (n > len) n = len; + memcpy(sb->sb_wptr, m->m_data, n); + len -= n; + if (len) { + /* Now the left edge */ + nn = sb->sb_rptr - sb->sb_data; + if (nn > len) nn = len; + memcpy(sb->sb_data,m->m_data+n,nn); + n += nn; + } + } + + sb->sb_cc += n; + sb->sb_wptr += n; + if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) + sb->sb_wptr -= sb->sb_datalen; +} + +/* + * Copy data from sbuf to a normal, straight buffer + * Don't update the sbuf rptr, this will be + * done in sbdrop when the data is acked + */ +void +sbcopy(sb, off, len, to) + struct sbuf *sb; + int off; + int len; + char *to; +{ + char *from; + + from = sb->sb_rptr + off; + if (from >= sb->sb_data + sb->sb_datalen) + from -= sb->sb_datalen; + + if (from < sb->sb_wptr) { + if (len > sb->sb_cc) len = sb->sb_cc; + memcpy(to,from,len); + } else { + /* re-use off */ + off = (sb->sb_data + sb->sb_datalen) - from; + if (off > len) off = len; + memcpy(to,from,off); + len -= off; + if (len) + memcpy(to+off,sb->sb_data,len); + } +} + diff --git a/slirp/sbuf.h b/slirp/sbuf.h new file mode 100644 index 000000000..161e0bb76 --- /dev/null +++ b/slirp/sbuf.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef _SBUF_H_ +#define _SBUF_H_ + +#define sbflush(sb) sbdrop((sb),(sb)->sb_cc) +#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) + +struct sbuf { + u_int sb_cc; /* actual chars in buffer */ + u_int sb_datalen; /* Length of data */ + char *sb_wptr; /* write pointer. points to where the next + * bytes should be written in the sbuf */ + char *sb_rptr; /* read pointer. points to where the next + * byte should be read from the sbuf */ + char *sb_data; /* Actual data */ +}; + +void sbfree _P((struct sbuf *)); +void sbdrop _P((struct sbuf *, int)); +void sbreserve _P((struct sbuf *, int)); +void sbappend _P((struct socket *, struct mbuf *)); +void sbappendsb _P((struct sbuf *, struct mbuf *)); +void sbcopy _P((struct sbuf *, int, int, char *)); + +#endif diff --git a/slirp/slirp.c b/slirp/slirp.c new file mode 100644 index 000000000..48b45a39c --- /dev/null +++ b/slirp/slirp.c @@ -0,0 +1,550 @@ +#include "slirp.h" + +/* host address */ +struct in_addr our_addr; +/* host dns address */ +struct in_addr dns_addr; +/* host loopback address */ +struct in_addr loopback_addr; + +/* address for slirp virtual addresses */ +struct in_addr special_addr; + +const uint8_t special_ethaddr[6] = { + 0x52, 0x54, 0x00, 0x12, 0x35, 0x00 +}; + +uint8_t client_ethaddr[6]; + +int do_slowtimo; +int link_up; +struct timeval tt; +FILE *lfd; + +/* XXX: suppress those select globals */ +fd_set *global_readfds, *global_writefds, *global_xfds; + +#ifdef _WIN32 + +static int get_dns_addr(struct in_addr *pdns_addr) +{ + /* XXX: add it */ + return -1; +} + +#else + +static int get_dns_addr(struct in_addr *pdns_addr) +{ + char buff[512]; + char buff2[256]; + FILE *f; + int found = 0; + struct in_addr tmp_addr; + + f = fopen("/etc/resolv.conf", "r"); + if (!f) + return -1; + + lprint("IP address of your DNS(s): "); + while (fgets(buff, 512, f) != NULL) { + if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { + if (!inet_aton(buff2, &tmp_addr)) + continue; + if (tmp_addr.s_addr == loopback_addr.s_addr) + tmp_addr = our_addr; + /* If it's the first one, set it to dns_addr */ + if (!found) + *pdns_addr = tmp_addr; + else + lprint(", "); + if (++found > 3) { + lprint("(more)"); + break; + } else + lprint("%s", inet_ntoa(tmp_addr)); + } + } + if (!found) + return -1; + return 0; +} + +#endif + +void slirp_init(void) +{ + debug_init("/tmp/slirp.log", DEBUG_DEFAULT); + + link_up = 1; + + if_init(); + ip_init(); + + /* Initialise mbufs *after* setting the MTU */ + m_init(); + + /* set default addresses */ + getouraddr(); + inet_aton("127.0.0.1", &loopback_addr); + + if (get_dns_addr(&dns_addr) < 0) { + fprintf(stderr, "Could not get DNS address\n"); + exit(1); + } + + inet_aton(CTL_SPECIAL, &special_addr); +} + +#define CONN_CANFSEND(so) (((so)->so_state & (SS_FCANTSENDMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED) +#define CONN_CANFRCV(so) (((so)->so_state & (SS_FCANTRCVMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED) +#define UPD_NFDS(x) if (nfds < (x)) nfds = (x) + +/* + * curtime kept to an accuracy of 1ms + */ +static void updtime(void) +{ + gettimeofday(&tt, 0); + + curtime = (u_int)tt.tv_sec * (u_int)1000; + curtime += (u_int)tt.tv_usec / (u_int)1000; + + if ((tt.tv_usec % 1000) >= 500) + curtime++; +} + +void slirp_select_fill(int *pnfds, + fd_set *readfds, fd_set *writefds, fd_set *xfds) +{ + struct socket *so, *so_next; + struct timeval timeout; + int nfds; + int tmp_time; + + /* fail safe */ + global_readfds = NULL; + global_writefds = NULL; + global_xfds = NULL; + + nfds = *pnfds; + /* + * First, TCP sockets + */ + do_slowtimo = 0; + if (link_up) { + /* + * *_slowtimo needs calling if there are IP fragments + * in the fragment queue, or there are TCP connections active + */ + do_slowtimo = ((tcb.so_next != &tcb) || + ((struct ipasfrag *)&ipq != (struct ipasfrag *)ipq.next)); + + for (so = tcb.so_next; so != &tcb; so = so_next) { + so_next = so->so_next; + + /* + * See if we need a tcp_fasttimo + */ + if (time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK) + time_fasttimo = curtime; /* Flag when we want a fasttimo */ + + /* + * NOFDREF can include still connecting to local-host, + * newly socreated() sockets etc. Don't want to select these. + */ + if (so->so_state & SS_NOFDREF || so->s == -1) + continue; + + /* + * Set for reading sockets which are accepting + */ + if (so->so_state & SS_FACCEPTCONN) { + FD_SET(so->s, readfds); + UPD_NFDS(so->s); + continue; + } + + /* + * Set for writing sockets which are connecting + */ + if (so->so_state & SS_ISFCONNECTING) { + FD_SET(so->s, writefds); + UPD_NFDS(so->s); + continue; + } + + /* + * Set for writing if we are connected, can send more, and + * we have something to send + */ + if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { + FD_SET(so->s, writefds); + UPD_NFDS(so->s); + } + + /* + * Set for reading (and urgent data) if we are connected, can + * receive more, and we have room for it XXX /2 ? + */ + if (CONN_CANFRCV(so) && (so->so_snd.sb_cc < (so->so_snd.sb_datalen/2))) { + FD_SET(so->s, readfds); + FD_SET(so->s, xfds); + UPD_NFDS(so->s); + } + } + + /* + * UDP sockets + */ + for (so = udb.so_next; so != &udb; so = so_next) { + so_next = so->so_next; + + /* + * See if it's timed out + */ + if (so->so_expire) { + if (so->so_expire <= curtime) { + udp_detach(so); + continue; + } else + do_slowtimo = 1; /* Let socket expire */ + } + + /* + * When UDP packets are received from over the + * link, they're sendto()'d straight away, so + * no need for setting for writing + * Limit the number of packets queued by this session + * to 4. Note that even though we try and limit this + * to 4 packets, the session could have more queued + * if the packets needed to be fragmented + * (XXX <= 4 ?) + */ + if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { + FD_SET(so->s, readfds); + UPD_NFDS(so->s); + } + } + } + + /* + * Setup timeout to use minimum CPU usage, especially when idle + */ + + /* + * First, see the timeout needed by *timo + */ + timeout.tv_sec = 0; + timeout.tv_usec = -1; + /* + * If a slowtimo is needed, set timeout to 500ms from the last + * slow timeout. If a fast timeout is needed, set timeout within + * 200ms of when it was requested. + */ + if (do_slowtimo) { + /* XXX + 10000 because some select()'s aren't that accurate */ + timeout.tv_usec = ((500 - (curtime - last_slowtimo)) * 1000) + 10000; + if (timeout.tv_usec < 0) + timeout.tv_usec = 0; + else if (timeout.tv_usec > 510000) + timeout.tv_usec = 510000; + + /* Can only fasttimo if we also slowtimo */ + if (time_fasttimo) { + tmp_time = (200 - (curtime - time_fasttimo)) * 1000; + if (tmp_time < 0) + tmp_time = 0; + + /* Choose the smallest of the 2 */ + if (tmp_time < timeout.tv_usec) + timeout.tv_usec = (u_int)tmp_time; + } + } + *pnfds = nfds; +} + +void slirp_select_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds) +{ + struct socket *so, *so_next; + int ret; + + global_readfds = readfds; + global_writefds = writefds; + global_xfds = xfds; + + /* Update time */ + updtime(); + + /* + * See if anything has timed out + */ + if (link_up) { + if (time_fasttimo && ((curtime - time_fasttimo) >= 199)) { + tcp_fasttimo(); + time_fasttimo = 0; + } + if (do_slowtimo && ((curtime - last_slowtimo) >= 499)) { + ip_slowtimo(); + tcp_slowtimo(); + last_slowtimo = curtime; + } + } + + /* + * Check sockets + */ + if (link_up) { + /* + * Check TCP sockets + */ + for (so = tcb.so_next; so != &tcb; so = so_next) { + so_next = so->so_next; + + /* + * FD_ISSET is meaningless on these sockets + * (and they can crash the program) + */ + if (so->so_state & SS_NOFDREF || so->s == -1) + continue; + + /* + * Check for URG data + * This will soread as well, so no need to + * test for readfds below if this succeeds + */ + if (FD_ISSET(so->s, xfds)) + sorecvoob(so); + /* + * Check sockets for reading + */ + else if (FD_ISSET(so->s, readfds)) { + /* + * Check for incoming connections + */ + if (so->so_state & SS_FACCEPTCONN) { + tcp_connect(so); + continue; + } /* else */ + ret = soread(so); + + /* Output it if we read something */ + if (ret > 0) + tcp_output(sototcpcb(so)); + } + + /* + * Check sockets for writing + */ + if (FD_ISSET(so->s, writefds)) { + /* + * Check for non-blocking, still-connecting sockets + */ + if (so->so_state & SS_ISFCONNECTING) { + /* Connected */ + so->so_state &= ~SS_ISFCONNECTING; + + ret = write(so->s, &ret, 0); + if (ret < 0) { + /* XXXXX Must fix, zero bytes is a NOP */ + if (errno == EAGAIN || errno == EWOULDBLOCK || + errno == EINPROGRESS || errno == ENOTCONN) + continue; + + /* else failed */ + so->so_state = SS_NOFDREF; + } + /* else so->so_state &= ~SS_ISFCONNECTING; */ + + /* + * Continue tcp_input + */ + tcp_input((struct mbuf *)NULL, sizeof(struct ip), so); + /* continue; */ + } else + ret = sowrite(so); + /* + * XXXXX If we wrote something (a lot), there + * could be a need for a window update. + * In the worst case, the remote will send + * a window probe to get things going again + */ + } + + /* + * Probe a still-connecting, non-blocking socket + * to check if it's still alive + */ +#ifdef PROBE_CONN + if (so->so_state & SS_ISFCONNECTING) { + ret = read(so->s, (char *)&ret, 0); + + if (ret < 0) { + /* XXX */ + if (errno == EAGAIN || errno == EWOULDBLOCK || + errno == EINPROGRESS || errno == ENOTCONN) + continue; /* Still connecting, continue */ + + /* else failed */ + so->so_state = SS_NOFDREF; + + /* tcp_input will take care of it */ + } else { + ret = write(so->s, &ret, 0); + if (ret < 0) { + /* XXX */ + if (errno == EAGAIN || errno == EWOULDBLOCK || + errno == EINPROGRESS || errno == ENOTCONN) + continue; + /* else failed */ + so->so_state = SS_NOFDREF; + } else + so->so_state &= ~SS_ISFCONNECTING; + + } + tcp_input((struct mbuf *)NULL, sizeof(struct ip),so); + } /* SS_ISFCONNECTING */ +#endif + } + + /* + * Now UDP sockets. + * Incoming packets are sent straight away, they're not buffered. + * Incoming UDP data isn't buffered either. + */ + for (so = udb.so_next; so != &udb; so = so_next) { + so_next = so->so_next; + + if (so->s != -1 && FD_ISSET(so->s, readfds)) { + sorecvfrom(so); + } + } + } + + /* + * See if we can start outputting + */ + if (if_queued && link_up) + if_start(); +} + +#define ETH_ALEN 6 +#define ETH_HLEN 14 + +#define ETH_P_IP 0x0800 /* Internet Protocol packet */ +#define ETH_P_ARP 0x0806 /* Address Resolution packet */ + +#define ARPOP_REQUEST 1 /* ARP request */ +#define ARPOP_REPLY 2 /* ARP reply */ + +struct ethhdr +{ + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + unsigned char h_source[ETH_ALEN]; /* source ether addr */ + unsigned short h_proto; /* packet type ID field */ +}; + +struct arphdr +{ + unsigned short ar_hrd; /* format of hardware address */ + unsigned short ar_pro; /* format of protocol address */ + unsigned char ar_hln; /* length of hardware address */ + unsigned char ar_pln; /* length of protocol address */ + unsigned short ar_op; /* ARP opcode (command) */ + + /* + * Ethernet looks like this : This bit is variable sized however... + */ + unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ + unsigned char ar_sip[4]; /* sender IP address */ + unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ + unsigned char ar_tip[4]; /* target IP address */ +}; + +void arp_input(const uint8_t *pkt, int pkt_len) +{ + struct ethhdr *eh = (struct ethhdr *)pkt; + struct arphdr *ah = (struct arphdr *)(pkt + ETH_HLEN); + uint8_t arp_reply[ETH_HLEN + sizeof(struct arphdr)]; + struct ethhdr *reh = (struct ethhdr *)arp_reply; + struct arphdr *rah = (struct arphdr *)(arp_reply + ETH_HLEN); + int ar_op; + + ar_op = ntohs(ah->ar_op); + switch(ar_op) { + case ARPOP_REQUEST: + if (!memcmp(ah->ar_tip, &special_addr, 3) && + (ah->ar_tip[3] == CTL_DNS || ah->ar_tip[3] == CTL_ALIAS)) { + + /* XXX: make an ARP request to have the client address */ + memcpy(client_ethaddr, eh->h_source, ETH_ALEN); + + /* ARP request for alias/dns mac address */ + memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); + memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 1); + reh->h_source[5] = ah->ar_tip[3]; + reh->h_proto = htons(ETH_P_ARP); + + rah->ar_hrd = htons(1); + rah->ar_pro = htons(ETH_P_IP); + rah->ar_hln = ETH_ALEN; + rah->ar_pln = 4; + rah->ar_op = htons(ARPOP_REPLY); + memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); + memcpy(rah->ar_sip, ah->ar_tip, 4); + memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); + memcpy(rah->ar_tip, ah->ar_sip, 4); + slirp_output(arp_reply, sizeof(arp_reply)); + } + break; + default: + break; + } +} + +void slirp_input(const uint8_t *pkt, int pkt_len) +{ + struct mbuf *m; + int proto; + + if (pkt_len < ETH_HLEN) + return; + + proto = ntohs(*(uint16_t *)(pkt + 12)); + switch(proto) { + case ETH_P_ARP: + arp_input(pkt, pkt_len); + break; + case ETH_P_IP: + m = m_get(); + if (!m) + return; + m->m_len = pkt_len; + memcpy(m->m_data, pkt, pkt_len); + + m->m_data += ETH_HLEN; + m->m_len -= ETH_HLEN; + + ip_input(m); + break; + default: + break; + } +} + +/* output the IP packet to the ethernet device */ +void if_encap(const uint8_t *ip_data, int ip_data_len) +{ + uint8_t buf[1600]; + struct ethhdr *eh = (struct ethhdr *)buf; + + if (ip_data_len + ETH_HLEN > sizeof(buf)) + return; + + memcpy(eh->h_dest, client_ethaddr, ETH_ALEN); + memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 1); + eh->h_source[5] = CTL_ALIAS; + eh->h_proto = htons(ETH_P_IP); + memcpy(buf + sizeof(struct ethhdr), ip_data, ip_data_len); + slirp_output(buf, ip_data_len + ETH_HLEN); +} diff --git a/slirp/slirp.h b/slirp/slirp.h new file mode 100644 index 000000000..c2fa86e99 --- /dev/null +++ b/slirp/slirp.h @@ -0,0 +1,308 @@ +#ifndef __COMMON_H__ +#define __COMMON_H__ + +#define CONFIG_QEMU + +#define DEBUG 1 + +#ifndef CONFIG_QEMU +#include "version.h" +#endif +#include "config.h" +#include "slirp_config.h" + +#include <sys/types.h> +#ifdef HAVE_SYS_BITYPES_H +# include <sys/bitypes.h> +#endif + +#ifdef NEED_TYPEDEFS +typedef char int8_t; +typedef unsigned char u_int8_t; + +# if SIZEOF_SHORT == 2 + typedef short int16_t; + typedef unsigned short u_int16_t; +# else +# if SIZEOF_INT == 2 + typedef int int16_t; + typedef unsigned int u_int16_t; +# else + #error Cannot find a type with sizeof() == 2 +# endif +# endif + +# if SIZEOF_SHORT == 4 + typedef short int32_t; + typedef unsigned short u_int32_t; +# else +# if SIZEOF_INT == 4 + typedef int int32_t; + typedef unsigned int u_int32_t; +# else + #error Cannot find a type with sizeof() == 4 +# endif +# endif +#endif /* NEED_TYPEDEFS */ + +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif + +#ifdef HAVE_STDLIB_H +# include <stdlib.h> +#endif + +#include <stdio.h> +#include <errno.h> + +#ifndef HAVE_MEMMOVE +#define memmove(x, y, z) bcopy(y, x, z) +#endif + +#if TIME_WITH_SYS_TIME +# include <sys/time.h> +# include <time.h> +#else +# if HAVE_SYS_TIME_H +# include <sys/time.h> +# else +# include <time.h> +# endif +#endif + +#ifdef HAVE_STRING_H +# include <string.h> +#else +# include <strings.h> +#endif + +#include <sys/uio.h> + +#ifndef _P +#ifndef NO_PROTOTYPES +# define _P(x) x +#else +# define _P(x) () +#endif +#endif + +#include <netinet/in.h> +#include <arpa/inet.h> + +#ifdef GETTIMEOFDAY_ONE_ARG +#define gettimeofday(x, y) gettimeofday(x) +#endif + +/* Systems lacking strdup() definition in <string.h>. */ +#if defined(ultrix) +char *strdup _P((const char *)); +#endif + +/* Systems lacking malloc() definition in <stdlib.h>. */ +#if defined(ultrix) || defined(hcx) +void *malloc _P((size_t arg)); +void free _P((void *ptr)); +#endif + +#ifndef HAVE_INET_ATON +int inet_aton _P((const char *cp, struct in_addr *ia)); +#endif + +#include <fcntl.h> +#ifndef NO_UNIX_SOCKETS +#include <sys/un.h> +#endif +#include <signal.h> +#ifdef HAVE_SYS_SIGNAL_H +# include <sys/signal.h> +#endif +#include <sys/socket.h> + +#if defined(WANT_SYS_IOCTL_H) && defined(HAVE_SYS_IOCTL_H) +# include <sys/ioctl.h> +#else +# define WANT_SYS_TERMIOS_H +#endif + +#ifdef WANT_SYS_TERMIOS_H +# ifndef INCLUDED_TERMIOS_H +# ifdef HAVE_TERMIOS_H +# include <termios.h> +# else +# include <termio.h> +# endif +# define INCLUDED_TERMIOS_H +# endif +#endif + + + +#ifdef HAVE_SYS_SELECT_H +# include <sys/select.h> +#endif + +#ifdef HAVE_SYS_WAIT_H +# include <sys/wait.h> +#endif + +#ifdef HAVE_SYS_FILIO_H +# include <sys/filio.h> +#endif + +#ifdef USE_PPP +#include <ppp/slirppp.h> +#endif + +#ifdef __STDC__ +#include <stdarg.h> +#else +#include <varargs.h> +#endif + +#include <sys/stat.h> + +/* Avoid conflicting with the libc insque() and remque(), which + have different prototypes. */ +#define insque slirp_insque +#define remque slirp_remque + +#ifdef HAVE_SYS_STROPTS_H +#include <sys/stropts.h> +#endif + +#include "debug.h" + +#include "ip.h" +#include "tcp.h" +#include "tcp_timer.h" +#include "tcp_var.h" +#include "tcpip.h" +#include "udp.h" +#include "icmp_var.h" +#include "mbuf.h" +#include "sbuf.h" +#include "socket.h" +#include "if.h" +#include "main.h" +#include "misc.h" +#include "ctl.h" +#ifdef USE_PPP +#include "ppp/pppd.h" +#include "ppp/ppp.h" +#endif + +#include "bootp.h" +#include "libslirp.h" + +extern struct ttys *ttys_unit[MAX_INTERFACES]; + +#ifndef NULL +#define NULL (void *)0 +#endif + +#ifndef FULL_BOLT +void if_start _P((void)); +#else +void if_start _P((struct ttys *)); +#endif + +#ifdef BAD_SPRINTF +# define vsprintf vsprintf_len +# define sprintf sprintf_len + extern int vsprintf_len _P((char *, const char *, va_list)); + extern int sprintf_len _P((char *, const char *, ...)); +#endif + +#ifdef DECLARE_SPRINTF +# ifndef BAD_SPRINTF + extern int vsprintf _P((char *, const char *, va_list)); +# endif + extern int vfprintf _P((FILE *, const char *, va_list)); +#endif + +#ifndef HAVE_STRERROR + extern char *strerror _P((int error)); +#endif + +#ifndef HAVE_INDEX + char *index _P((const char *, int)); +#endif + +#ifndef HAVE_GETHOSTID + long gethostid _P((void)); +#endif + +void lprint _P((const char *, ...)); + +extern int do_echo; + +#if SIZEOF_CHAR_P == 4 +# define insque_32 insque +# define remque_32 remque +#else + inline void insque_32 _P((void *, void *)); + inline void remque_32 _P((void *)); +#endif + +#include <pwd.h> +#include <netdb.h> + +#define DEFAULT_BAUD 115200 + +/* cksum.c */ +int cksum(struct mbuf *m, int len); + +/* if.c */ +void if_init _P((void)); +void if_output _P((struct socket *, struct mbuf *)); + +/* ip_input.c */ +void ip_init _P((void)); +void ip_input _P((struct mbuf *)); +struct ip * ip_reass _P((register struct ipasfrag *, register struct ipq *)); +void ip_freef _P((struct ipq *)); +void ip_enq _P((register struct ipasfrag *, register struct ipasfrag *)); +void ip_deq _P((register struct ipasfrag *)); +void ip_slowtimo _P((void)); +void ip_stripoptions _P((register struct mbuf *, struct mbuf *)); + +/* ip_output.c */ +int ip_output _P((struct socket *, struct mbuf *)); + +/* tcp_input.c */ +int tcp_reass _P((register struct tcpcb *, register struct tcpiphdr *, struct mbuf *)); +void tcp_input _P((register struct mbuf *, int, struct socket *)); +void tcp_dooptions _P((struct tcpcb *, u_char *, int, struct tcpiphdr *)); +void tcp_xmit_timer _P((register struct tcpcb *, int)); +int tcp_mss _P((register struct tcpcb *, u_int)); + +/* tcp_output.c */ +int tcp_output _P((register struct tcpcb *)); +void tcp_setpersist _P((register struct tcpcb *)); + +/* tcp_subr.c */ +void tcp_init _P((void)); +void tcp_template _P((struct tcpcb *)); +void tcp_respond _P((struct tcpcb *, register struct tcpiphdr *, register struct mbuf *, tcp_seq, tcp_seq, int)); +struct tcpcb * tcp_newtcpcb _P((struct socket *)); +struct tcpcb * tcp_close _P((register struct tcpcb *)); +void tcp_drain _P((void)); +void tcp_sockclosed _P((struct tcpcb *)); +int tcp_fconnect _P((struct socket *)); +void tcp_connect _P((struct socket *)); +int tcp_attach _P((struct socket *)); +u_int8_t tcp_tos _P((struct socket *)); +int tcp_emu _P((struct socket *, struct mbuf *)); +int tcp_ctl _P((struct socket *)); +struct tcpcb *tcp_drop(struct tcpcb *tp, int errno); + +#ifdef USE_PPP +#define MIN_MRU MINMRU +#define MAX_MRU MAXMRU +#else +#define MIN_MRU 128 +#define MAX_MRU 16384 +#endif + +#endif diff --git a/slirp/slirp_config.h b/slirp/slirp_config.h new file mode 100644 index 000000000..51fc95157 --- /dev/null +++ b/slirp/slirp_config.h @@ -0,0 +1,186 @@ +/* + * User definable configuration options + */ + +/* Undefine if you don't want talk emulation */ +#undef EMULATE_TALK + +/* Define if you want the connection to be probed */ +/* XXX Not working yet, so ignore this for now */ +#undef PROBE_CONN + +/* Define to 1 if you want KEEPALIVE timers */ +#define DO_KEEPALIVE 0 + +/* Define to MAX interfaces you expect to use at once */ +/* MAX_INTERFACES determines the max. TOTAL number of interfaces (SLIP and PPP) */ +/* MAX_PPP_INTERFACES determines max. number of PPP interfaces */ +#define MAX_INTERFACES 1 +#define MAX_PPP_INTERFACES 1 + +/* Define if you want slirp's socket in /tmp */ +/* XXXXXX Do this in ./configure */ +#undef USE_TMPSOCKET + +/* Define if you want slirp to use cfsetXspeed() on the terminal */ +#undef DO_CFSETSPEED + +/* Define this if you want slirp to write to the tty as fast as it can */ +/* This should only be set if you are using load-balancing, slirp does a */ +/* pretty good job on single modems already, and seting this will make */ +/* interactive sessions less responsive */ +/* XXXXX Talk about having fast modem as unit 0 */ +#undef FULL_BOLT + +/* + * Define if you want slirp to use less CPU + * You will notice a small lag in interactive sessions, but it's not that bad + * Things like Netscape/ftp/etc. are completely unaffected + * This is mainly for sysadmins who have many slirp users + */ +#undef USE_LOWCPU + +/* Define this if your compiler doesn't like prototypes */ +#ifndef __STDC__ +#define NO_PROTOTYPES +#endif + +/*********************************************************/ +/* + * Autoconf defined configuration options + * You shouldn't need to touch any of these + */ + +/* Ignore this */ +#undef DUMMY_PPP + +/* Define if you have unistd.h */ +#define HAVE_UNISTD_H + +/* Define if you have stdlib.h */ +#define HAVE_STDLIB_H + +/* Define if you have sys/ioctl.h */ +#undef HAVE_SYS_IOCTL_H + +/* Define if you have sys/filio.h */ +#undef HAVE_SYS_FILIO_H + +/* Define if you have strerror */ +#define HAVE_STRERROR + +/* Define if you have strdup() */ +#define HAVE_STRDUP + +/* Define according to how time.h should be included */ +#define TIME_WITH_SYS_TIME 0 +#undef HAVE_SYS_TIME_H + +/* Define if you have sys/bitypes.h */ +#undef HAVE_SYS_BITYPES_H + +/* Define if the machine is big endian */ +//#undef WORDS_BIGENDIAN + +/* Define if your sprintf returns char * instead of int */ +#undef BAD_SPRINTF + +/* Define if you have readv */ +#undef HAVE_READV + +/* Define if iovec needs to be declared */ +#undef DECLARE_IOVEC + +/* Define if a declaration of sprintf/fprintf is needed */ +#undef DECLARE_SPRINTF + +/* Define if you have a POSIX.1 sys/wait.h */ +#undef HAVE_SYS_WAIT_H + +/* Define if you have sys/select.h */ +#define HAVE_SYS_SELECT_H + +/* Define if you have strings.h */ +#define HAVE_STRING_H + +/* Define if you have arpa/inet.h */ +#define HAVE_ARPA_INET_H + +/* Define if you have sys/signal.h */ +#undef HAVE_SYS_SIGNAL_H + +/* Define if you have sys/stropts.h */ +#undef HAVE_SYS_STROPTS_H + +/* Define to whatever your compiler thinks inline should be */ +#define inline inline + +/* Define to whatever your compiler thinks const should be */ +#define const const + +/* Define if your compiler doesn't like prototypes */ +#undef NO_PROTOTYPES + +/* Define if you don't have u_int32_t etc. typedef'd */ +#undef NEED_TYPEDEFS + +/* Define to sizeof(char) */ +#define SIZEOF_CHAR 1 + +/* Define to sizeof(short) */ +#define SIZEOF_SHORT 2 + +/* Define to sizeof(int) */ +#define SIZEOF_INT 4 + +/* Define to sizeof(char *) */ +/* XXX: patch it */ +#define SIZEOF_CHAR_P 4 + +/* Define if you have random() */ +#undef HAVE_RANDOM + +/* Define if you have srandom() */ +#undef HAVE_SRANDOM + +/* Define if you have inet_aton */ +#define HAVE_INET_ATON + +/* Define if you have setenv */ +#undef HAVE_SETENV + +/* Define if you have index() */ +#undef HAVE_INDEX + +/* Define if you have bcmp() */ +#undef HAVE_BCMP + +/* Define if you have drand48 */ +#undef HAVE_DRAND48 + +/* Define if you have memmove */ +#define HAVE_MEMMOVE + +/* Define if you have <termios.h> */ +#undef HAVE_TERMIOS_H + +/* Define if you have gethostid */ +#undef HAVE_GETHOSTID + +/* Define if you DON'T have unix-domain sockets */ +#undef NO_UNIX_SOCKETS + +/* Define if gettimeofday only takes one argument */ +#undef GETTIMEOFDAY_ONE_ARG + +/* Define if you have revoke() */ +#undef HAVE_REVOKE + +/* Define if you have the sysv method of opening pty's (/dev/ptmx, etc.) */ +#undef HAVE_GRANTPT + +/* Define if you have fchmod */ +#undef HAVE_FCHMOD + +/* Define if you have <sys/type32.h> */ +#undef HAVE_SYS_TYPES32_H diff --git a/slirp/socket.c b/slirp/socket.c new file mode 100644 index 000000000..396fb4ac7 --- /dev/null +++ b/slirp/socket.c @@ -0,0 +1,696 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#define WANT_SYS_IOCTL_H +#include <slirp.h> +#include "ip_icmp.h" +#include "main.h" + +void +so_init() +{ + /* Nothing yet */ +} + + +struct socket * +solookup(head, laddr, lport, faddr, fport) + struct socket *head; + struct in_addr laddr; + u_int lport; + struct in_addr faddr; + u_int fport; +{ + struct socket *so; + + for (so = head->so_next; so != head; so = so->so_next) { + if (so->so_lport == lport && + so->so_laddr.s_addr == laddr.s_addr && + so->so_faddr.s_addr == faddr.s_addr && + so->so_fport == fport) + break; + } + + if (so == head) + return (struct socket *)NULL; + return so; + +} + +/* + * Create a new socket, initialise the fields + * It is the responsibility of the caller to + * insque() it into the correct linked-list + */ +struct socket * +socreate() +{ + struct socket *so; + + so = (struct socket *)malloc(sizeof(struct socket)); + if(so) { + memset(so, 0, sizeof(struct socket)); + so->so_state = SS_NOFDREF; + so->s = -1; + } + return(so); +} + +/* + * remque and free a socket, clobber cache + */ +void +sofree(so) + struct socket *so; +{ + if (so->so_emu==EMU_RSH && so->extra) { + sofree(so->extra); + so->extra=NULL; + } + if (so == tcp_last_so) + tcp_last_so = &tcb; + else if (so == udp_last_so) + udp_last_so = &udb; + + m_free(so->so_m); + + if(so->so_next && so->so_prev) + remque(so); /* crashes if so is not in a queue */ + + free(so); +} + +/* + * Read from so's socket into sb_snd, updating all relevant sbuf fields + * NOTE: This will only be called if it is select()ed for reading, so + * a read() of 0 (or less) means it's disconnected + */ +int +soread(so) + struct socket *so; +{ + int n, nn, lss, total; + struct sbuf *sb = &so->so_snd; + int len = sb->sb_datalen - sb->sb_cc; + struct iovec iov[2]; + int mss = so->so_tcpcb->t_maxseg; + + DEBUG_CALL("soread"); + DEBUG_ARG("so = %lx", (long )so); + + /* + * No need to check if there's enough room to read. + * soread wouldn't have been called if there weren't + */ + + len = sb->sb_datalen - sb->sb_cc; + + iov[0].iov_base = sb->sb_wptr; + if (sb->sb_wptr < sb->sb_rptr) { + iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) + iov[0].iov_len = len; + if (iov[0].iov_len > mss) + iov[0].iov_len -= iov[0].iov_len%mss; + n = 1; + } else { + iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) iov[0].iov_len = len; + len -= iov[0].iov_len; + if (len) { + iov[1].iov_base = sb->sb_data; + iov[1].iov_len = sb->sb_rptr - sb->sb_data; + if(iov[1].iov_len > len) + iov[1].iov_len = len; + total = iov[0].iov_len + iov[1].iov_len; + if (total > mss) { + lss = total%mss; + if (iov[1].iov_len > lss) { + iov[1].iov_len -= lss; + n = 2; + } else { + lss -= iov[1].iov_len; + iov[0].iov_len -= lss; + n = 1; + } + } else + n = 2; + } else { + if (iov[0].iov_len > mss) + iov[0].iov_len -= iov[0].iov_len%mss; + n = 1; + } + } + +#ifdef HAVE_READV + nn = readv(so->s, (struct iovec *)iov, n); + DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn)); +#else + nn = read(so->s, iov[0].iov_base, iov[0].iov_len); +#endif + if (nn <= 0) { + if (nn < 0 && (errno == EINTR || errno == EAGAIN)) + return 0; + else { + DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n", nn, errno,strerror(errno))); + sofcantrcvmore(so); + tcp_sockclosed(sototcpcb(so)); + return -1; + } + } + +#ifndef HAVE_READV + /* + * If there was no error, try and read the second time round + * We read again if n = 2 (ie, there's another part of the buffer) + * and we read as much as we could in the first read + * We don't test for <= 0 this time, because there legitimately + * might not be any more data (since the socket is non-blocking), + * a close will be detected on next iteration. + * A return of -1 wont (shouldn't) happen, since it didn't happen above + */ + if (n == 2 && nn == iov[0].iov_len) + nn += read(so->s, iov[1].iov_base, iov[1].iov_len); + + DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn)); +#endif + + /* Update fields */ + sb->sb_cc += nn; + sb->sb_wptr += nn; + if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_wptr -= sb->sb_datalen; + return nn; +} + +/* + * Get urgent data + * + * When the socket is created, we set it SO_OOBINLINE, + * so when OOB data arrives, we soread() it and everything + * in the send buffer is sent as urgent data + */ +void +sorecvoob(so) + struct socket *so; +{ + struct tcpcb *tp = sototcpcb(so); + + DEBUG_CALL("sorecvoob"); + DEBUG_ARG("so = %lx", (long)so); + + /* + * We take a guess at how much urgent data has arrived. + * In most situations, when urgent data arrives, the next + * read() should get all the urgent data. This guess will + * be wrong however if more data arrives just after the + * urgent data, or the read() doesn't return all the + * urgent data. + */ + soread(so); + tp->snd_up = tp->snd_una + so->so_snd.sb_cc; + tp->t_force = 1; + tcp_output(tp); + tp->t_force = 0; +} + +/* + * Send urgent data + * There's a lot duplicated code here, but... + */ +int +sosendoob(so) + struct socket *so; +{ + struct sbuf *sb = &so->so_rcv; + char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ + + int n, len; + + DEBUG_CALL("sosendoob"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); + + if (so->so_urgc > 2048) + so->so_urgc = 2048; /* XXXX */ + + if (sb->sb_rptr < sb->sb_wptr) { + /* We can send it directly */ + n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */ + so->so_urgc -= n; + + DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc)); + } else { + /* + * Since there's no sendv or sendtov like writev, + * we must copy all data to a linear buffer then + * send it all + */ + len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; + if (len > so->so_urgc) len = so->so_urgc; + memcpy(buff, sb->sb_rptr, len); + so->so_urgc -= len; + if (so->so_urgc) { + n = sb->sb_wptr - sb->sb_data; + if (n > so->so_urgc) n = so->so_urgc; + memcpy((buff + len), sb->sb_data, n); + so->so_urgc -= n; + len += n; + } + n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ +#ifdef DEBUG + if (n != len) + DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n")); +#endif + DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc)); + } + + sb->sb_cc -= n; + sb->sb_rptr += n; + if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_rptr -= sb->sb_datalen; + + return n; +} + +/* + * Write data from so_rcv to so's socket, + * updating all sbuf field as necessary + */ +int +sowrite(so) + struct socket *so; +{ + int n,nn; + struct sbuf *sb = &so->so_rcv; + int len = sb->sb_cc; + struct iovec iov[2]; + + DEBUG_CALL("sowrite"); + DEBUG_ARG("so = %lx", (long)so); + + if (so->so_urgc) { + sosendoob(so); + if (sb->sb_cc == 0) + return 0; + } + + /* + * No need to check if there's something to write, + * sowrite wouldn't have been called otherwise + */ + + len = sb->sb_cc; + + iov[0].iov_base = sb->sb_rptr; + if (sb->sb_rptr < sb->sb_wptr) { + iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) iov[0].iov_len = len; + n = 1; + } else { + iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; + if (iov[0].iov_len > len) iov[0].iov_len = len; + len -= iov[0].iov_len; + if (len) { + iov[1].iov_base = sb->sb_data; + iov[1].iov_len = sb->sb_wptr - sb->sb_data; + if (iov[1].iov_len > len) iov[1].iov_len = len; + n = 2; + } else + n = 1; + } + /* Check if there's urgent data to send, and if so, send it */ + +#ifdef HAVE_READV + nn = writev(so->s, (const struct iovec *)iov, n); + + DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn)); +#else + nn = write(so->s, iov[0].iov_base, iov[0].iov_len); +#endif + /* This should never happen, but people tell me it does *shrug* */ + if (nn < 0 && (errno == EAGAIN || errno == EINTR)) + return 0; + + if (nn <= 0) { + DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n", + so->so_state, errno)); + sofcantsendmore(so); + tcp_sockclosed(sototcpcb(so)); + return -1; + } + +#ifndef HAVE_READV + if (n == 2 && nn == iov[0].iov_len) + nn += write(so->s, iov[1].iov_base, iov[1].iov_len); + DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn)); +#endif + + /* Update sbuf */ + sb->sb_cc -= nn; + sb->sb_rptr += nn; + if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_rptr -= sb->sb_datalen; + + /* + * If in DRAIN mode, and there's no more data, set + * it CANTSENDMORE + */ + if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) + sofcantsendmore(so); + + return nn; +} + +/* + * recvfrom() a UDP socket + */ +void +sorecvfrom(so) + struct socket *so; +{ + struct sockaddr_in addr; + int addrlen = sizeof(struct sockaddr_in); + + DEBUG_CALL("sorecvfrom"); + DEBUG_ARG("so = %lx", (long)so); + + if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ + char buff[256]; + int len; + + len = recvfrom(so->s, buff, 256, 0, + (struct sockaddr *)&addr, &addrlen); + /* XXX Check if reply is "correct"? */ + + if(len == -1 || len == 0) { + u_char code=ICMP_UNREACH_PORT; + + if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST; + else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET; + + DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n", + errno,strerror(errno))); + icmp_error(so->so_m, ICMP_UNREACH,code, 0,strerror(errno)); + } else { + icmp_reflect(so->so_m); + so->so_m = 0; /* Don't m_free() it again! */ + } + /* No need for this socket anymore, udp_detach it */ + udp_detach(so); + } else { /* A "normal" UDP packet */ + struct mbuf *m; + int len, n; + + if (!(m = m_get())) return; + m->m_data += if_maxlinkhdr; + + /* + * XXX Shouldn't FIONREAD packets destined for port 53, + * but I don't know the max packet size for DNS lookups + */ + len = M_FREEROOM(m); + /* if (so->so_fport != htons(53)) { */ + ioctl(so->s, FIONREAD, &n); + + if (n > len) { + n = (m->m_data - m->m_dat) + m->m_len + n + 1; + m_inc(m, n); + len = M_FREEROOM(m); + } + /* } */ + + m->m_len = recvfrom(so->s, m->m_data, len, 0, + (struct sockaddr *)&addr, &addrlen); + DEBUG_MISC((dfd, " did recvfrom %d, errno = %d-%s\n", + m->m_len, errno,strerror(errno))); + if(m->m_len<0) { + u_char code=ICMP_UNREACH_PORT; + + if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST; + else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET; + + DEBUG_MISC((dfd," rx error, tx icmp ICMP_UNREACH:%i\n", code)); + icmp_error(so->so_m, ICMP_UNREACH,code, 0,strerror(errno)); + m_free(m); + } else { + /* + * Hack: domain name lookup will be used the most for UDP, + * and since they'll only be used once there's no need + * for the 4 minute (or whatever) timeout... So we time them + * out much quicker (10 seconds for now...) + */ + if (so->so_expire) { + if (so->so_fport == htons(53)) + so->so_expire = curtime + SO_EXPIREFAST; + else + so->so_expire = curtime + SO_EXPIRE; + } + + /* if (m->m_len == len) { + * m_inc(m, MINCSIZE); + * m->m_len = 0; + * } + */ + + /* + * If this packet was destined for CTL_ADDR, + * make it look like that's where it came from, done by udp_output + */ + udp_output(so, m, &addr); + } /* rx error */ + } /* if ping packet */ +} + +/* + * sendto() a socket + */ +int +sosendto(so, m) + struct socket *so; + struct mbuf *m; +{ + int ret; + struct sockaddr_in addr; + + DEBUG_CALL("sosendto"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("m = %lx", (long)m); + + addr.sin_family = AF_INET; + if ((so->so_faddr.s_addr & htonl(0xffffff00)) == special_addr.s_addr) { + /* It's an alias */ + switch(ntohl(so->so_faddr.s_addr) & 0xff) { + case CTL_DNS: + addr.sin_addr = dns_addr; + break; + case CTL_ALIAS: + default: + addr.sin_addr = loopback_addr; + break; + } + } else + addr.sin_addr = so->so_faddr; + addr.sin_port = so->so_fport; + + DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n", ntohs(addr.sin_port), inet_ntoa(addr.sin_addr))); + + /* Don't care what port we get */ + ret = sendto(so->s, m->m_data, m->m_len, 0, + (struct sockaddr *)&addr, sizeof (struct sockaddr)); + if (ret < 0) + return -1; + + /* + * Kill the socket if there's no reply in 4 minutes, + * but only if it's an expirable socket + */ + if (so->so_expire) + so->so_expire = curtime + SO_EXPIRE; + so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */ + return 0; +} + +/* + * XXX This should really be tcp_listen + */ +struct socket * +solisten(port, laddr, lport, flags) + u_int port; + u_int32_t laddr; + u_int lport; + int flags; +{ + struct sockaddr_in addr; + struct socket *so; + int s, addrlen = sizeof(addr), opt = 1; + + DEBUG_CALL("solisten"); + DEBUG_ARG("port = %d", port); + DEBUG_ARG("laddr = %x", laddr); + DEBUG_ARG("lport = %d", lport); + DEBUG_ARG("flags = %x", flags); + + if ((so = socreate()) == NULL) { + /* free(so); Not sofree() ??? free(NULL) == NOP */ + return NULL; + } + + /* Don't tcp_attach... we don't need so_snd nor so_rcv */ + if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) { + free(so); + return NULL; + } + insque(so,&tcb); + + /* + * SS_FACCEPTONCE sockets must time out. + */ + if (flags & SS_FACCEPTONCE) + so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2; + + so->so_state = (SS_FACCEPTCONN|flags); + so->so_lport = lport; /* Kept in network format */ + so->so_laddr.s_addr = laddr; /* Ditto */ + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = INADDR_ANY; + addr.sin_port = port; + + if (((s = socket(AF_INET,SOCK_STREAM,0)) < 0) || + (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0) || + (listen(s,1) < 0)) { + int tmperrno = errno; /* Don't clobber the real reason we failed */ + + close(s); + sofree(so); + /* Restore the real errno */ + errno = tmperrno; + return NULL; + } + setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int)); + setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int)); + + getsockname(s,(struct sockaddr *)&addr,&addrlen); + so->so_fport = addr.sin_port; + if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr) + so->so_faddr = our_addr; + else + so->so_faddr = addr.sin_addr; + + so->s = s; + return so; +} + +/* + * Data is available in so_rcv + * Just write() the data to the socket + * XXX not yet... + */ +void +sorwakeup(so) + struct socket *so; +{ +/* sowrite(so); */ +/* FD_CLR(so->s,&writefds); */ +} + +/* + * Data has been freed in so_snd + * We have room for a read() if we want to + * For now, don't read, it'll be done in the main loop + */ +void +sowwakeup(so) + struct socket *so; +{ + /* Nothing, yet */ +} + +/* + * Various session state calls + * XXX Should be #define's + * The socket state stuff needs work, these often get call 2 or 3 + * times each when only 1 was needed + */ +void +soisfconnecting(so) + register struct socket *so; +{ + so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE| + SS_FCANTSENDMORE|SS_FWDRAIN); + so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ +} + +void +soisfconnected(so) + register struct socket *so; +{ + so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF); + so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ +} + +void +sofcantrcvmore(so) + struct socket *so; +{ + if ((so->so_state & SS_NOFDREF) == 0) { + shutdown(so->s,0); + FD_CLR(so->s, global_writefds); + } + so->so_state &= ~(SS_ISFCONNECTING); + if (so->so_state & SS_FCANTSENDMORE) + so->so_state = SS_NOFDREF; /* Don't select it */ /* XXX close() here as well? */ + else + so->so_state |= SS_FCANTRCVMORE; +} + +void +sofcantsendmore(so) + struct socket *so; +{ + if ((so->so_state & SS_NOFDREF) == 0) { + shutdown(so->s,1); /* send FIN to fhost */ + FD_CLR(so->s, global_readfds); + FD_CLR(so->s, global_xfds); + } + so->so_state &= ~(SS_ISFCONNECTING); + if (so->so_state & SS_FCANTRCVMORE) + so->so_state = SS_NOFDREF; /* as above */ + else + so->so_state |= SS_FCANTSENDMORE; +} + +void +soisfdisconnected(so) + struct socket *so; +{ +/* so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED); */ +/* close(so->s); */ +/* so->so_state = SS_ISFDISCONNECTED; */ + /* + * XXX Do nothing ... ? + */ +} + +/* + * Set write drain mode + * Set CANTSENDMORE once all data has been write()n + */ +void +sofwdrain(so) + struct socket *so; +{ + if (so->so_rcv.sb_cc) + so->so_state |= SS_FWDRAIN; + else + sofcantsendmore(so); +} + diff --git a/slirp/socket.h b/slirp/socket.h new file mode 100644 index 000000000..d05354c8c --- /dev/null +++ b/slirp/socket.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +/* MINE */ + +#ifndef _SLIRP_SOCKET_H_ +#define _SLIRP_SOCKET_H_ + +#define SO_EXPIRE 240000 +#define SO_EXPIREFAST 10000 + +/* + * Our socket structure + */ + +struct socket { + struct socket *so_next,*so_prev; /* For a linked list of sockets */ + + int s; /* The actual socket */ + + /* XXX union these with not-yet-used sbuf params */ + struct mbuf *so_m; /* Pointer to the original SYN packet, + * for non-blocking connect()'s, and + * PING reply's */ + struct tcpiphdr *so_ti; /* Pointer to the original ti within + * so_mconn, for non-blocking connections */ + int so_urgc; + struct in_addr so_faddr; /* foreign host table entry */ + struct in_addr so_laddr; /* local host table entry */ + u_int16_t so_fport; /* foreign port */ + u_int16_t so_lport; /* local port */ + + u_int8_t so_iptos; /* Type of service */ + u_int8_t so_emu; /* Is the socket emulated? */ + + u_char so_type; /* Type of socket, UDP or TCP */ + int so_state; /* internal state flags SS_*, below */ + + struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ + u_int so_expire; /* When the socket will expire */ + + int so_queued; /* Number of packets queued from this socket */ + int so_nqueued; /* Number of packets queued in a row + * Used to determine when to "downgrade" a session + * from fastq to batchq */ + + struct sbuf so_rcv; /* Receive buffer */ + struct sbuf so_snd; /* Send buffer */ + void * extra; /* Extra pointer */ +}; + + +/* + * Socket state bits. (peer means the host on the Internet, + * local host means the host on the other end of the modem) + */ +#define SS_NOFDREF 0x001 /* No fd reference */ + +#define SS_ISFCONNECTING 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ +#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ +#define SS_FCANTRCVMORE 0x008 /* Socket can't receive more from peer (for half-closes) */ +#define SS_FCANTSENDMORE 0x010 /* Socket can't send more to peer (for half-closes) */ +/* #define SS_ISFDISCONNECTED 0x020*/ /* Socket has disconnected from peer, in 2MSL state */ +#define SS_FWDRAIN 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ + +#define SS_CTL 0x080 +#define SS_FACCEPTCONN 0x100 /* Socket is accepting connections from a host on the internet */ +#define SS_FACCEPTONCE 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ + +extern struct socket tcb; + + +#if defined(DECLARE_IOVEC) && !defined(HAVE_READV) +struct iovec { + char *iov_base; + size_t iov_len; +}; +#endif + +void so_init _P((void)); +struct socket * solookup _P((struct socket *, struct in_addr, u_int, struct in_addr, u_int)); +struct socket * socreate _P((void)); +void sofree _P((struct socket *)); +int soread _P((struct socket *)); +void sorecvoob _P((struct socket *)); +int sosendoob _P((struct socket *)); +int sowrite _P((struct socket *)); +void sorecvfrom _P((struct socket *)); +int sosendto _P((struct socket *, struct mbuf *)); +struct socket * solisten _P((u_int, u_int32_t, u_int, int)); +void sorwakeup _P((struct socket *)); +void sowwakeup _P((struct socket *)); +void soisfconnecting _P((register struct socket *)); +void soisfconnected _P((register struct socket *)); +void sofcantrcvmore _P((struct socket *)); +void sofcantsendmore _P((struct socket *)); +void soisfdisconnected _P((struct socket *)); +void sofwdrain _P((struct socket *)); + +#endif /* _SOCKET_H_ */ diff --git a/slirp/tcp.h b/slirp/tcp.h new file mode 100644 index 000000000..3e0b4dd8f --- /dev/null +++ b/slirp/tcp.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp.h 8.1 (Berkeley) 6/10/93 + * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp + */ + +#ifndef _TCP_H_ +#define _TCP_H_ + +typedef u_int32_t tcp_seq; + +#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ +#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ + +extern int tcp_rcvspace; +extern int tcp_sndspace; +extern struct socket *tcp_last_so; + +#define TCP_SNDSPACE 8192 +#define TCP_RCVSPACE 8192 + +/* + * TCP header. + * Per RFC 793, September, 1981. + */ +struct tcphdr { + u_int16_t th_sport; /* source port */ + u_int16_t th_dport; /* destination port */ + tcp_seq th_seq; /* sequence number */ + tcp_seq th_ack; /* acknowledgement number */ +#ifdef WORDS_BIGENDIAN + u_int th_off:4, /* data offset */ + th_x2:4; /* (unused) */ +#else + u_int th_x2:4, /* (unused) */ + th_off:4; /* data offset */ +#endif + u_int8_t th_flags; +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 + u_int16_t th_win; /* window */ + u_int16_t th_sum; /* checksum */ + u_int16_t th_urp; /* urgent pointer */ +}; + +#include "tcp_var.h" + +#define TCPOPT_EOL 0 +#define TCPOPT_NOP 1 +#define TCPOPT_MAXSEG 2 +#define TCPOLEN_MAXSEG 4 +#define TCPOPT_WINDOW 3 +#define TCPOLEN_WINDOW 3 +#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ +#define TCPOLEN_SACK_PERMITTED 2 +#define TCPOPT_SACK 5 /* Experimental */ +#define TCPOPT_TIMESTAMP 8 +#define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */ + +#define TCPOPT_TSTAMP_HDR \ + (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP) + +/* + * Default maximum segment size for TCP. + * With an IP MSS of 576, this is 536, + * but 512 is probably more convenient. + * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)). + */ +#define TCP_MSS 512 + +#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ + +#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ + +/* + * User-settable options (used with setsockopt). + */ +/* #define TCP_NODELAY 0x01 */ /* don't delay send to coalesce packets */ +/* #define TCP_MAXSEG 0x02 */ /* set maximum segment size */ + +/* + * TCP FSM state definitions. + * Per RFC793, September, 1981. + */ + +#define TCP_NSTATES 11 + +#define TCPS_CLOSED 0 /* closed */ +#define TCPS_LISTEN 1 /* listening for connection */ +#define TCPS_SYN_SENT 2 /* active, have sent syn */ +#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ +/* states < TCPS_ESTABLISHED are those where connections not established */ +#define TCPS_ESTABLISHED 4 /* established */ +#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ +/* states > TCPS_CLOSE_WAIT are those where user has closed */ +#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ +#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ +#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ +/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ +#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ +#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ + +#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) +#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) +#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) + +/* + * TCP sequence numbers are 32 bit integers operated + * on with modular arithmetic. These macros can be + * used to compare such integers. + */ +#define SEQ_LT(a,b) ((int)((a)-(b)) < 0) +#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0) +#define SEQ_GT(a,b) ((int)((a)-(b)) > 0) +#define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0) + +/* + * Macros to initialize tcp sequence numbers for + * send and receive from initial send and receive + * sequence numbers. + */ +#define tcp_rcvseqinit(tp) \ + (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 + +#define tcp_sendseqinit(tp) \ + (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss + +#define TCP_ISSINCR (125*1024) /* increment for tcp_iss each second */ + +extern tcp_seq tcp_iss; /* tcp initial send seq # */ + +extern char *tcpstates[]; + +#endif diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c new file mode 100644 index 000000000..eeee98597 --- /dev/null +++ b/slirp/tcp_input.c @@ -0,0 +1,1745 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 + * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> +#include "ip_icmp.h" + +struct socket tcb; + +#define min(x,y) ((x) < (y) ? (x) : (y)) +#define max(x,y) ((x) > (y) ? (x) : (y)) + +int tcprexmtthresh = 3; +struct socket *tcp_last_so = &tcb; + +tcp_seq tcp_iss; /* tcp initial send seq # */ + +#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) + +/* for modulo comparisons of timestamps */ +#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) +#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0) + +/* + * Insert segment ti into reassembly queue of tcp with + * control block tp. Return TH_FIN if reassembly now includes + * a segment with FIN. The macro form does the common case inline + * (segment is the next to be received on an established connection, + * and the queue is empty), avoiding linkage into and removal + * from the queue and repetition of various conversions. + * Set DELACK for segments received in order, but ack immediately + * when segments are out of order (so fast retransmit can work). + */ +#ifdef TCP_ACK_HACK +#define TCP_REASS(tp, ti, m, so, flags) {\ + if ((ti)->ti_seq == (tp)->rcv_nxt && \ + (tp)->seg_next == (tcpiphdrp_32)(tp) && \ + (tp)->t_state == TCPS_ESTABLISHED) {\ + if (ti->ti_flags & TH_PUSH) \ + tp->t_flags |= TF_ACKNOW; \ + else \ + tp->t_flags |= TF_DELACK; \ + (tp)->rcv_nxt += (ti)->ti_len; \ + flags = (ti)->ti_flags & TH_FIN; \ + tcpstat.tcps_rcvpack++;\ + tcpstat.tcps_rcvbyte += (ti)->ti_len;\ + if (so->so_emu) { \ + if (tcp_emu((so),(m))) sbappend((so), (m)); \ + } else \ + sbappend((so), (m)); \ +/* sorwakeup(so); */ \ + } else {\ + (flags) = tcp_reass((tp), (ti), (m)); \ + tp->t_flags |= TF_ACKNOW; \ + } \ +} +#else +#define TCP_REASS(tp, ti, m, so, flags) { \ + if ((ti)->ti_seq == (tp)->rcv_nxt && \ + (tp)->seg_next == (tcpiphdrp_32)(tp) && \ + (tp)->t_state == TCPS_ESTABLISHED) { \ + tp->t_flags |= TF_DELACK; \ + (tp)->rcv_nxt += (ti)->ti_len; \ + flags = (ti)->ti_flags & TH_FIN; \ + tcpstat.tcps_rcvpack++;\ + tcpstat.tcps_rcvbyte += (ti)->ti_len;\ + if (so->so_emu) { \ + if (tcp_emu((so),(m))) sbappend(so, (m)); \ + } else \ + sbappend((so), (m)); \ +/* sorwakeup(so); */ \ + } else { \ + (flags) = tcp_reass((tp), (ti), (m)); \ + tp->t_flags |= TF_ACKNOW; \ + } \ +} +#endif + +int +tcp_reass(tp, ti, m) + register struct tcpcb *tp; + register struct tcpiphdr *ti; + struct mbuf *m; +{ + register struct tcpiphdr *q; + struct socket *so = tp->t_socket; + int flags; + + /* + * Call with ti==0 after become established to + * force pre-ESTABLISHED data up to user socket. + */ + if (ti == 0) + goto present; + + /* + * Find a segment which begins after this one does. + */ + for (q = (struct tcpiphdr *)tp->seg_next; q != (struct tcpiphdr *)tp; + q = (struct tcpiphdr *)q->ti_next) + if (SEQ_GT(q->ti_seq, ti->ti_seq)) + break; + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) { + register int i; + q = (struct tcpiphdr *)q->ti_prev; + /* conversion to int (in i) handles seq wraparound */ + i = q->ti_seq + q->ti_len - ti->ti_seq; + if (i > 0) { + if (i >= ti->ti_len) { + tcpstat.tcps_rcvduppack++; + tcpstat.tcps_rcvdupbyte += ti->ti_len; + m_freem(m); + /* + * Try to present any queued data + * at the left window edge to the user. + * This is needed after the 3-WHS + * completes. + */ + goto present; /* ??? */ + } + m_adj(m, i); + ti->ti_len -= i; + ti->ti_seq += i; + } + q = (struct tcpiphdr *)(q->ti_next); + } + tcpstat.tcps_rcvoopack++; + tcpstat.tcps_rcvoobyte += ti->ti_len; + REASS_MBUF(ti) = (mbufp_32) m; /* XXX */ + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + while (q != (struct tcpiphdr *)tp) { + register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; + if (i <= 0) + break; + if (i < q->ti_len) { + q->ti_seq += i; + q->ti_len -= i; + m_adj((struct mbuf *) REASS_MBUF(q), i); + break; + } + q = (struct tcpiphdr *)q->ti_next; + m = (struct mbuf *) REASS_MBUF((struct tcpiphdr *)q->ti_prev); + remque_32((void *)(q->ti_prev)); + m_freem(m); + } + + /* + * Stick new segment in its place. + */ + insque_32(ti, (void *)(q->ti_prev)); + +present: + /* + * Present data to user, advancing rcv_nxt through + * completed sequence space. + */ + if (!TCPS_HAVEESTABLISHED(tp->t_state)) + return (0); + ti = (struct tcpiphdr *) tp->seg_next; + if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) + return (0); + if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) + return (0); + do { + tp->rcv_nxt += ti->ti_len; + flags = ti->ti_flags & TH_FIN; + remque_32(ti); + m = (struct mbuf *) REASS_MBUF(ti); /* XXX */ + ti = (struct tcpiphdr *)ti->ti_next; +/* if (so->so_state & SS_FCANTRCVMORE) */ + if (so->so_state & SS_FCANTSENDMORE) + m_freem(m); + else { + if (so->so_emu) { + if (tcp_emu(so,m)) sbappend(so, m); + } else + sbappend(so, m); + } + } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); +/* sorwakeup(so); */ + return (flags); +} + +/* + * TCP input routine, follows pages 65-76 of the + * protocol specification dated September, 1981 very closely. + */ +void +tcp_input(m, iphlen, inso) + register struct mbuf *m; + int iphlen; + struct socket *inso; +{ + struct ip save_ip, *ip; + register struct tcpiphdr *ti; + caddr_t optp = NULL; + int optlen = 0; + int len, tlen, off; + register struct tcpcb *tp = 0; + register int tiflags; + struct socket *so = 0; + int todrop, acked, ourfinisacked, needoutput = 0; +/* int dropsocket = 0; */ + int iss = 0; + u_long tiwin; + int ret; +/* int ts_present = 0; */ + + DEBUG_CALL("tcp_input"); + DEBUG_ARGS((dfd," m = %8lx iphlen = %2d inso = %lx\n", + (long )m, iphlen, (long )inso )); + + /* + * If called with m == 0, then we're continuing the connect + */ + if (m == NULL) { + so = inso; + + /* Re-set a few variables */ + tp = sototcpcb(so); + m = so->so_m; + so->so_m = 0; + ti = so->so_ti; + tiwin = ti->ti_win; + tiflags = ti->ti_flags; + + goto cont_conn; + } + + + tcpstat.tcps_rcvtotal++; + /* + * Get IP and TCP header together in first mbuf. + * Note: IP leaves IP header in first mbuf. + */ + ti = mtod(m, struct tcpiphdr *); + if (iphlen > sizeof(struct ip )) { + ip_stripoptions(m, (struct mbuf *)0); + iphlen=sizeof(struct ip ); + } + /* XXX Check if too short */ + + + /* + * Save a copy of the IP header in case we want restore it + * for sending an ICMP error message in response. + */ + ip=mtod(m, struct ip *); + save_ip = *ip; + save_ip.ip_len+= iphlen; + + /* + * Checksum extended TCP header and data. + */ + tlen = ((struct ip *)ti)->ip_len; + ti->ti_next = ti->ti_prev = 0; + ti->ti_x1 = 0; + ti->ti_len = htons((u_int16_t)tlen); + len = sizeof(struct ip ) + tlen; + /* keep checksum for ICMP reply + * ti->ti_sum = cksum(m, len); + * if (ti->ti_sum) { */ + if(cksum(m, len)) { + tcpstat.tcps_rcvbadsum++; + goto drop; + } + + /* + * Check that TCP offset makes sense, + * pull out TCP options and adjust length. XXX + */ + off = ti->ti_off << 2; + if (off < sizeof (struct tcphdr) || off > tlen) { + tcpstat.tcps_rcvbadoff++; + goto drop; + } + tlen -= off; + ti->ti_len = tlen; + if (off > sizeof (struct tcphdr)) { + optlen = off - sizeof (struct tcphdr); + optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr); + + /* + * Do quick retrieval of timestamp options ("options + * prediction?"). If timestamp is the only option and it's + * formatted as recommended in RFC 1323 appendix A, we + * quickly get the values now and not bother calling + * tcp_dooptions(), etc. + */ +/* if ((optlen == TCPOLEN_TSTAMP_APPA || + * (optlen > TCPOLEN_TSTAMP_APPA && + * optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) && + * *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) && + * (ti->ti_flags & TH_SYN) == 0) { + * ts_present = 1; + * ts_val = ntohl(*(u_int32_t *)(optp + 4)); + * ts_ecr = ntohl(*(u_int32_t *)(optp + 8)); + * optp = NULL; / * we've parsed the options * / + * } + */ + } + tiflags = ti->ti_flags; + + /* + * Convert TCP protocol specific fields to host format. + */ + NTOHL(ti->ti_seq); + NTOHL(ti->ti_ack); + NTOHS(ti->ti_win); + NTOHS(ti->ti_urp); + + /* + * Drop TCP, IP headers and TCP options. + */ + m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + + /* + * Locate pcb for segment. + */ +findso: + so = tcp_last_so; + if (so->so_fport != ti->ti_dport || + so->so_lport != ti->ti_sport || + so->so_laddr.s_addr != ti->ti_src.s_addr || + so->so_faddr.s_addr != ti->ti_dst.s_addr) { + so = solookup(&tcb, ti->ti_src, ti->ti_sport, + ti->ti_dst, ti->ti_dport); + if (so) + tcp_last_so = so; + ++tcpstat.tcps_socachemiss; + } + + /* + * If the state is CLOSED (i.e., TCB does not exist) then + * all data in the incoming segment is discarded. + * If the TCB exists but is in CLOSED state, it is embryonic, + * but should either do a listen or a connect soon. + * + * state == CLOSED means we've done socreate() but haven't + * attached it to a protocol yet... + * + * XXX If a TCB does not exist, and the TH_SYN flag is + * the only flag set, then create a session, mark it + * as if it was LISTENING, and continue... + */ + if (so == 0) { + if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN) + goto dropwithreset; + + if ((so = socreate()) == NULL) + goto dropwithreset; + if (tcp_attach(so) < 0) { + free(so); /* Not sofree (if it failed, it's not insqued) */ + goto dropwithreset; + } + + sbreserve(&so->so_snd, tcp_sndspace); + sbreserve(&so->so_rcv, tcp_rcvspace); + + /* tcp_last_so = so; */ /* XXX ? */ + /* tp = sototcpcb(so); */ + + so->so_laddr = ti->ti_src; + so->so_lport = ti->ti_sport; + so->so_faddr = ti->ti_dst; + so->so_fport = ti->ti_dport; + + if ((so->so_iptos = tcp_tos(so)) == 0) + so->so_iptos = ((struct ip *)ti)->ip_tos; + + tp = sototcpcb(so); + tp->t_state = TCPS_LISTEN; + } + + /* + * If this is a still-connecting socket, this probably + * a retransmit of the SYN. Whether it's a retransmit SYN + * or something else, we nuke it. + */ + if (so->so_state & SS_ISFCONNECTING) + goto drop; + + tp = sototcpcb(so); + + /* XXX Should never fail */ + if (tp == 0) + goto dropwithreset; + if (tp->t_state == TCPS_CLOSED) + goto drop; + + /* Unscale the window into a 32-bit value. */ +/* if ((tiflags & TH_SYN) == 0) + * tiwin = ti->ti_win << tp->snd_scale; + * else + */ + tiwin = ti->ti_win; + + /* + * Segment received on connection. + * Reset idle time and keep-alive timer. + */ + tp->t_idle = 0; + if (so_options) + tp->t_timer[TCPT_KEEP] = tcp_keepintvl; + else + tp->t_timer[TCPT_KEEP] = tcp_keepidle; + + /* + * Process options if not in LISTEN state, + * else do it below (after getting remote address). + */ + if (optp && tp->t_state != TCPS_LISTEN) + tcp_dooptions(tp, (u_char *)optp, optlen, ti); +/* , */ +/* &ts_present, &ts_val, &ts_ecr); */ + + /* + * Header prediction: check for the two common cases + * of a uni-directional data xfer. If the packet has + * no control flags, is in-sequence, the window didn't + * change and we're not retransmitting, it's a + * candidate. If the length is zero and the ack moved + * forward, we're the sender side of the xfer. Just + * free the data acked & wake any higher level process + * that was blocked waiting for space. If the length + * is non-zero and the ack didn't move, we're the + * receiver side. If we're getting packets in-order + * (the reassembly queue is empty), add the data to + * the socket buffer and note that we need a delayed ack. + * + * XXX Some of these tests are not needed + * eg: the tiwin == tp->snd_wnd prevents many more + * predictions.. with no *real* advantage.. + */ + if (tp->t_state == TCPS_ESTABLISHED && + (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && +/* (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) && */ + ti->ti_seq == tp->rcv_nxt && + tiwin && tiwin == tp->snd_wnd && + tp->snd_nxt == tp->snd_max) { + /* + * If last ACK falls within this segment's sequence numbers, + * record the timestamp. + */ +/* if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) && + * SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) { + * tp->ts_recent_age = tcp_now; + * tp->ts_recent = ts_val; + * } + */ + if (ti->ti_len == 0) { + if (SEQ_GT(ti->ti_ack, tp->snd_una) && + SEQ_LEQ(ti->ti_ack, tp->snd_max) && + tp->snd_cwnd >= tp->snd_wnd) { + /* + * this is a pure ack for outstanding data. + */ + ++tcpstat.tcps_predack; +/* if (ts_present) + * tcp_xmit_timer(tp, tcp_now-ts_ecr+1); + * else + */ if (tp->t_rtt && + SEQ_GT(ti->ti_ack, tp->t_rtseq)) + tcp_xmit_timer(tp, tp->t_rtt); + acked = ti->ti_ack - tp->snd_una; + tcpstat.tcps_rcvackpack++; + tcpstat.tcps_rcvackbyte += acked; + sbdrop(&so->so_snd, acked); + tp->snd_una = ti->ti_ack; + m_freem(m); + + /* + * If all outstanding data are acked, stop + * retransmit timer, otherwise restart timer + * using current (possibly backed-off) value. + * If process is waiting for space, + * wakeup/selwakeup/signal. If data + * are ready to send, let tcp_output + * decide between more output or persist. + */ + if (tp->snd_una == tp->snd_max) + tp->t_timer[TCPT_REXMT] = 0; + else if (tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + + /* + * There's room in so_snd, sowwakup will read() + * from the socket if we can + */ +/* if (so->so_snd.sb_flags & SB_NOTIFY) + * sowwakeup(so); + */ + /* + * This is called because sowwakeup might have + * put data into so_snd. Since we don't so sowwakeup, + * we don't need this.. XXX??? + */ + if (so->so_snd.sb_cc) + (void) tcp_output(tp); + + return; + } + } else if (ti->ti_ack == tp->snd_una && + tp->seg_next == (tcpiphdrp_32)tp && + ti->ti_len <= sbspace(&so->so_rcv)) { + /* + * this is a pure, in-sequence data packet + * with nothing on the reassembly queue and + * we have enough buffer space to take it. + */ + ++tcpstat.tcps_preddat; + tp->rcv_nxt += ti->ti_len; + tcpstat.tcps_rcvpack++; + tcpstat.tcps_rcvbyte += ti->ti_len; + /* + * Add data to socket buffer. + */ + if (so->so_emu) { + if (tcp_emu(so,m)) sbappend(so, m); + } else + sbappend(so, m); + + /* + * XXX This is called when data arrives. Later, check + * if we can actually write() to the socket + * XXX Need to check? It's be NON_BLOCKING + */ +/* sorwakeup(so); */ + + /* + * If this is a short packet, then ACK now - with Nagel + * congestion avoidance sender won't send more until + * he gets an ACK. + * + * Here are 3 interpretations of what should happen. + * The best (for me) is to delay-ack everything except + * if it's a one-byte packet containing an ESC + * (this means it's an arrow key (or similar) sent using + * Nagel, hence there will be no echo) + * The first of these is the original, the second is the + * middle ground between the other 2 + */ +/* if (((unsigned)ti->ti_len < tp->t_maxseg)) { + */ +/* if (((unsigned)ti->ti_len < tp->t_maxseg && + * (so->so_iptos & IPTOS_LOWDELAY) == 0) || + * ((so->so_iptos & IPTOS_LOWDELAY) && + * ((struct tcpiphdr_2 *)ti)->first_char == (char)27)) { + */ + if ((unsigned)ti->ti_len == 1 && + ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { + tp->t_flags |= TF_ACKNOW; + tcp_output(tp); + } else { + tp->t_flags |= TF_DELACK; + } + return; + } + } /* header prediction */ + /* + * Calculate amount of space in receive window, + * and then do TCP input processing. + * Receive window is amount of space in rcv queue, + * but not less than advertised window. + */ + { int win; + win = sbspace(&so->so_rcv); + if (win < 0) + win = 0; + tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt)); + } + + switch (tp->t_state) { + + /* + * If the state is LISTEN then ignore segment if it contains an RST. + * If the segment contains an ACK then it is bad and send a RST. + * If it does not contain a SYN then it is not interesting; drop it. + * Don't bother responding if the destination was a broadcast. + * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial + * tp->iss, and send a segment: + * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> + * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. + * Fill in remote peer address fields if not previously specified. + * Enter SYN_RECEIVED state, and process any other fields of this + * segment in this state. + */ + case TCPS_LISTEN: { + + if (tiflags & TH_RST) + goto drop; + if (tiflags & TH_ACK) + goto dropwithreset; + if ((tiflags & TH_SYN) == 0) + goto drop; + + /* + * This has way too many gotos... + * But a bit of spaghetti code never hurt anybody :) + */ + + /* + * If this is destined for the control address, then flag to + * tcp_ctl once connected, otherwise connect + */ + if ((so->so_faddr.s_addr&htonl(0xffffff00)) == special_addr.s_addr) { + int lastbyte=ntohl(so->so_faddr.s_addr) & 0xff; + if (lastbyte!=CTL_ALIAS && lastbyte!=CTL_DNS) { +#if 0 + if(lastbyte==CTL_CMD || lastbyte==CTL_EXEC) { + /* Command or exec adress */ + so->so_state |= SS_CTL; + } else { + /* May be an add exec */ + struct ex_list *ex_ptr; + + for(ex_ptr = exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { + if(ex_ptr->ex_fport == so->so_fport && + lastbyte == ex_ptr->ex_addr) { + so->so_state |= SS_CTL; + break; + } + } + } + if(so->so_state & SS_CTL) goto cont_input; +#endif + } + /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ + } + + if (so->so_emu & EMU_NOCONNECT) { + so->so_emu &= ~EMU_NOCONNECT; + goto cont_input; + } + + if(tcp_fconnect(so) == -1 && errno != EINPROGRESS) { + u_char code=ICMP_UNREACH_NET; + DEBUG_MISC((dfd," tcp fconnect errno = %d-%s\n", + errno,strerror(errno))); + if(errno == ECONNREFUSED) { + /* ACK the SYN, send RST to refuse the connection */ + tcp_respond(tp, ti, m, ti->ti_seq+1, (tcp_seq)0, + TH_RST|TH_ACK); + } else { + if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST; + HTONL(ti->ti_seq); /* restore tcp header */ + HTONL(ti->ti_ack); + HTONS(ti->ti_win); + HTONS(ti->ti_urp); + m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + m->m_len += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + *ip=save_ip; + icmp_error(m, ICMP_UNREACH,code, 0,strerror(errno)); + } + tp = tcp_close(tp); + m_free(m); + } else { + /* + * Haven't connected yet, save the current mbuf + * and ti, and return + * XXX Some OS's don't tell us whether the connect() + * succeeded or not. So we must time it out. + */ + so->so_m = m; + so->so_ti = ti; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tp->t_state = TCPS_SYN_RECEIVED; + } + return; + + cont_conn: + /* m==NULL + * Check if the connect succeeded + */ + if (so->so_state & SS_NOFDREF) { + tp = tcp_close(tp); + goto dropwithreset; + } + cont_input: + tcp_template(tp); + + if (optp) + tcp_dooptions(tp, (u_char *)optp, optlen, ti); + /* , */ + /* &ts_present, &ts_val, &ts_ecr); */ + + if (iss) + tp->iss = iss; + else + tp->iss = tcp_iss; + tcp_iss += TCP_ISSINCR/2; + tp->irs = ti->ti_seq; + tcp_sendseqinit(tp); + tcp_rcvseqinit(tp); + tp->t_flags |= TF_ACKNOW; + tp->t_state = TCPS_SYN_RECEIVED; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tcpstat.tcps_accepts++; + goto trimthenstep6; + } /* case TCPS_LISTEN */ + + /* + * If the state is SYN_SENT: + * if seg contains an ACK, but not for our SYN, drop the input. + * if seg contains a RST, then drop the connection. + * if seg does not contain SYN, then drop it. + * Otherwise this is an acceptable SYN segment + * initialize tp->rcv_nxt and tp->irs + * if seg contains ack then advance tp->snd_una + * if SYN has been acked change to ESTABLISHED else SYN_RCVD state + * arrange for segment to be acked (eventually) + * continue processing rest of data/controls, beginning with URG + */ + case TCPS_SYN_SENT: + if ((tiflags & TH_ACK) && + (SEQ_LEQ(ti->ti_ack, tp->iss) || + SEQ_GT(ti->ti_ack, tp->snd_max))) + goto dropwithreset; + + if (tiflags & TH_RST) { + if (tiflags & TH_ACK) + tp = tcp_drop(tp,0); /* XXX Check t_softerror! */ + goto drop; + } + + if ((tiflags & TH_SYN) == 0) + goto drop; + if (tiflags & TH_ACK) { + tp->snd_una = ti->ti_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + } + + tp->t_timer[TCPT_REXMT] = 0; + tp->irs = ti->ti_seq; + tcp_rcvseqinit(tp); + tp->t_flags |= TF_ACKNOW; + if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { + tcpstat.tcps_connects++; + soisfconnected(so); + tp->t_state = TCPS_ESTABLISHED; + + /* Do window scaling on this connection? */ +/* if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == + * (TF_RCVD_SCALE|TF_REQ_SCALE)) { + * tp->snd_scale = tp->requested_s_scale; + * tp->rcv_scale = tp->request_r_scale; + * } + */ + (void) tcp_reass(tp, (struct tcpiphdr *)0, + (struct mbuf *)0); + /* + * if we didn't have to retransmit the SYN, + * use its rtt as our initial srtt & rtt var. + */ + if (tp->t_rtt) + tcp_xmit_timer(tp, tp->t_rtt); + } else + tp->t_state = TCPS_SYN_RECEIVED; + +trimthenstep6: + /* + * Advance ti->ti_seq to correspond to first data byte. + * If data, trim to stay within window, + * dropping FIN if necessary. + */ + ti->ti_seq++; + if (ti->ti_len > tp->rcv_wnd) { + todrop = ti->ti_len - tp->rcv_wnd; + m_adj(m, -todrop); + ti->ti_len = tp->rcv_wnd; + tiflags &= ~TH_FIN; + tcpstat.tcps_rcvpackafterwin++; + tcpstat.tcps_rcvbyteafterwin += todrop; + } + tp->snd_wl1 = ti->ti_seq - 1; + tp->rcv_up = ti->ti_seq; + goto step6; + } /* switch tp->t_state */ + /* + * States other than LISTEN or SYN_SENT. + * First check timestamp, if present. + * Then check that at least some bytes of segment are within + * receive window. If segment begins before rcv_nxt, + * drop leading data (and SYN); if nothing left, just ack. + * + * RFC 1323 PAWS: If we have a timestamp reply on this segment + * and it's less than ts_recent, drop it. + */ +/* if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent && + * TSTMP_LT(ts_val, tp->ts_recent)) { + * + */ /* Check to see if ts_recent is over 24 days old. */ +/* if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) { + */ /* + * * Invalidate ts_recent. If this segment updates + * * ts_recent, the age will be reset later and ts_recent + * * will get a valid value. If it does not, setting + * * ts_recent to zero will at least satisfy the + * * requirement that zero be placed in the timestamp + * * echo reply when ts_recent isn't valid. The + * * age isn't reset until we get a valid ts_recent + * * because we don't want out-of-order segments to be + * * dropped when ts_recent is old. + * */ +/* tp->ts_recent = 0; + * } else { + * tcpstat.tcps_rcvduppack++; + * tcpstat.tcps_rcvdupbyte += ti->ti_len; + * tcpstat.tcps_pawsdrop++; + * goto dropafterack; + * } + * } + */ + + todrop = tp->rcv_nxt - ti->ti_seq; + if (todrop > 0) { + if (tiflags & TH_SYN) { + tiflags &= ~TH_SYN; + ti->ti_seq++; + if (ti->ti_urp > 1) + ti->ti_urp--; + else + tiflags &= ~TH_URG; + todrop--; + } + /* + * Following if statement from Stevens, vol. 2, p. 960. + */ + if (todrop > ti->ti_len + || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { + /* + * Any valid FIN must be to the left of the window. + * At this point the FIN must be a duplicate or out + * of sequence; drop it. + */ + tiflags &= ~TH_FIN; + + /* + * Send an ACK to resynchronize and drop any data. + * But keep on processing for RST or ACK. + */ + tp->t_flags |= TF_ACKNOW; + todrop = ti->ti_len; + tcpstat.tcps_rcvduppack++; + tcpstat.tcps_rcvdupbyte += todrop; + } else { + tcpstat.tcps_rcvpartduppack++; + tcpstat.tcps_rcvpartdupbyte += todrop; + } + m_adj(m, todrop); + ti->ti_seq += todrop; + ti->ti_len -= todrop; + if (ti->ti_urp > todrop) + ti->ti_urp -= todrop; + else { + tiflags &= ~TH_URG; + ti->ti_urp = 0; + } + } + /* + * If new data are received on a connection after the + * user processes are gone, then RST the other end. + */ + if ((so->so_state & SS_NOFDREF) && + tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) { + tp = tcp_close(tp); + tcpstat.tcps_rcvafterclose++; + goto dropwithreset; + } + + /* + * If segment ends after window, drop trailing data + * (and PUSH and FIN); if nothing left, just ACK. + */ + todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); + if (todrop > 0) { + tcpstat.tcps_rcvpackafterwin++; + if (todrop >= ti->ti_len) { + tcpstat.tcps_rcvbyteafterwin += ti->ti_len; + /* + * If a new connection request is received + * while in TIME_WAIT, drop the old connection + * and start over if the sequence numbers + * are above the previous ones. + */ + if (tiflags & TH_SYN && + tp->t_state == TCPS_TIME_WAIT && + SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { + iss = tp->rcv_nxt + TCP_ISSINCR; + tp = tcp_close(tp); + goto findso; + } + /* + * If window is closed can only take segments at + * window edge, and have to drop data and PUSH from + * incoming segments. Continue processing, but + * remember to ack. Otherwise, drop segment + * and ack. + */ + if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { + tp->t_flags |= TF_ACKNOW; + tcpstat.tcps_rcvwinprobe++; + } else + goto dropafterack; + } else + tcpstat.tcps_rcvbyteafterwin += todrop; + m_adj(m, -todrop); + ti->ti_len -= todrop; + tiflags &= ~(TH_PUSH|TH_FIN); + } + + /* + * If last ACK falls within this segment's sequence numbers, + * record its timestamp. + */ +/* if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) && + * SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len + + * ((tiflags & (TH_SYN|TH_FIN)) != 0))) { + * tp->ts_recent_age = tcp_now; + * tp->ts_recent = ts_val; + * } + */ + + /* + * If the RST bit is set examine the state: + * SYN_RECEIVED STATE: + * If passive open, return to LISTEN state. + * If active open, inform user that connection was refused. + * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: + * Inform user that connection was reset, and close tcb. + * CLOSING, LAST_ACK, TIME_WAIT STATES + * Close the tcb. + */ + if (tiflags&TH_RST) switch (tp->t_state) { + + case TCPS_SYN_RECEIVED: +/* so->so_error = ECONNREFUSED; */ + goto close; + + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: +/* so->so_error = ECONNRESET; */ + close: + tp->t_state = TCPS_CLOSED; + tcpstat.tcps_drops++; + tp = tcp_close(tp); + goto drop; + + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: + tp = tcp_close(tp); + goto drop; + } + + /* + * If a SYN is in the window, then this is an + * error and we send an RST and drop the connection. + */ + if (tiflags & TH_SYN) { + tp = tcp_drop(tp,0); + goto dropwithreset; + } + + /* + * If the ACK bit is off we drop the segment and return. + */ + if ((tiflags & TH_ACK) == 0) goto drop; + + /* + * Ack processing. + */ + switch (tp->t_state) { + /* + * In SYN_RECEIVED state if the ack ACKs our SYN then enter + * ESTABLISHED state and continue processing, otherwise + * send an RST. una<=ack<=max + */ + case TCPS_SYN_RECEIVED: + + if (SEQ_GT(tp->snd_una, ti->ti_ack) || + SEQ_GT(ti->ti_ack, tp->snd_max)) + goto dropwithreset; + tcpstat.tcps_connects++; + tp->t_state = TCPS_ESTABLISHED; + /* + * The sent SYN is ack'ed with our sequence number +1 + * The first data byte already in the buffer will get + * lost if no correction is made. This is only needed for + * SS_CTL since the buffer is empty otherwise. + * tp->snd_una++; or: + */ + tp->snd_una=ti->ti_ack; + if (so->so_state & SS_CTL) { + /* So tcp_ctl reports the right state */ + ret = tcp_ctl(so); + if (ret == 1) { + soisfconnected(so); + so->so_state &= ~SS_CTL; /* success XXX */ + } else if (ret == 2) { + so->so_state = SS_NOFDREF; /* CTL_CMD */ + } else { + needoutput = 1; + tp->t_state = TCPS_FIN_WAIT_1; + } + } else { + soisfconnected(so); + } + + /* Do window scaling? */ +/* if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == + * (TF_RCVD_SCALE|TF_REQ_SCALE)) { + * tp->snd_scale = tp->requested_s_scale; + * tp->rcv_scale = tp->request_r_scale; + * } + */ + (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); + tp->snd_wl1 = ti->ti_seq - 1; + /* Avoid ack processing; snd_una==ti_ack => dup ack */ + goto synrx_to_est; + /* fall into ... */ + + /* + * In ESTABLISHED state: drop duplicate ACKs; ACK out of range + * ACKs. If the ack is in the range + * tp->snd_una < ti->ti_ack <= tp->snd_max + * then advance tp->snd_una to ti->ti_ack and drop + * data from the retransmission queue. If this ACK reflects + * more up to date window information we update our window information. + */ + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: + + if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { + if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { + tcpstat.tcps_rcvdupack++; + DEBUG_MISC((dfd," dup ack m = %lx so = %lx \n", + (long )m, (long )so)); + /* + * If we have outstanding data (other than + * a window probe), this is a completely + * duplicate ack (ie, window info didn't + * change), the ack is the biggest we've + * seen and we've seen exactly our rexmt + * threshold of them, assume a packet + * has been dropped and retransmit it. + * Kludge snd_nxt & the congestion + * window so we send only this one + * packet. + * + * We know we're losing at the current + * window size so do congestion avoidance + * (set ssthresh to half the current window + * and pull our congestion window back to + * the new ssthresh). + * + * Dup acks mean that packets have left the + * network (they're now cached at the receiver) + * so bump cwnd by the amount in the receiver + * to keep a constant cwnd packets in the + * network. + */ + if (tp->t_timer[TCPT_REXMT] == 0 || + ti->ti_ack != tp->snd_una) + tp->t_dupacks = 0; + else if (++tp->t_dupacks == tcprexmtthresh) { + tcp_seq onxt = tp->snd_nxt; + u_int win = + min(tp->snd_wnd, tp->snd_cwnd) / 2 / + tp->t_maxseg; + + if (win < 2) + win = 2; + tp->snd_ssthresh = win * tp->t_maxseg; + tp->t_timer[TCPT_REXMT] = 0; + tp->t_rtt = 0; + tp->snd_nxt = ti->ti_ack; + tp->snd_cwnd = tp->t_maxseg; + (void) tcp_output(tp); + tp->snd_cwnd = tp->snd_ssthresh + + tp->t_maxseg * tp->t_dupacks; + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + goto drop; + } else if (tp->t_dupacks > tcprexmtthresh) { + tp->snd_cwnd += tp->t_maxseg; + (void) tcp_output(tp); + goto drop; + } + } else + tp->t_dupacks = 0; + break; + } + synrx_to_est: + /* + * If the congestion window was inflated to account + * for the other side's cached packets, retract it. + */ + if (tp->t_dupacks > tcprexmtthresh && + tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; + tp->t_dupacks = 0; + if (SEQ_GT(ti->ti_ack, tp->snd_max)) { + tcpstat.tcps_rcvacktoomuch++; + goto dropafterack; + } + acked = ti->ti_ack - tp->snd_una; + tcpstat.tcps_rcvackpack++; + tcpstat.tcps_rcvackbyte += acked; + + /* + * If we have a timestamp reply, update smoothed + * round trip time. If no timestamp is present but + * transmit timer is running and timed sequence + * number was acked, update smoothed round trip time. + * Since we now have an rtt measurement, cancel the + * timer backoff (cf., Phil Karn's retransmit alg.). + * Recompute the initial retransmit timer. + */ +/* if (ts_present) + * tcp_xmit_timer(tp, tcp_now-ts_ecr+1); + * else + */ + if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) + tcp_xmit_timer(tp,tp->t_rtt); + + /* + * If all outstanding data is acked, stop retransmit + * timer and remember to restart (more output or persist). + * If there is more data to be acked, restart retransmit + * timer, using current (possibly backed-off) value. + */ + if (ti->ti_ack == tp->snd_max) { + tp->t_timer[TCPT_REXMT] = 0; + needoutput = 1; + } else if (tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + /* + * When new data is acked, open the congestion window. + * If the window gives us less than ssthresh packets + * in flight, open exponentially (maxseg per packet). + * Otherwise open linearly: maxseg per window + * (maxseg^2 / cwnd per packet). + */ + { + register u_int cw = tp->snd_cwnd; + register u_int incr = tp->t_maxseg; + + if (cw > tp->snd_ssthresh) + incr = incr * incr / cw; + tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale); + } + if (acked > so->so_snd.sb_cc) { + tp->snd_wnd -= so->so_snd.sb_cc; + sbdrop(&so->so_snd, (int )so->so_snd.sb_cc); + ourfinisacked = 1; + } else { + sbdrop(&so->so_snd, acked); + tp->snd_wnd -= acked; + ourfinisacked = 0; + } + /* + * XXX sowwakup is called when data is acked and there's room for + * for more data... it should read() the socket + */ +/* if (so->so_snd.sb_flags & SB_NOTIFY) + * sowwakeup(so); + */ + tp->snd_una = ti->ti_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + + switch (tp->t_state) { + + /* + * In FIN_WAIT_1 STATE in addition to the processing + * for the ESTABLISHED state if our FIN is now acknowledged + * then enter FIN_WAIT_2. + */ + case TCPS_FIN_WAIT_1: + if (ourfinisacked) { + /* + * If we can't receive any more + * data, then closing user can proceed. + * Starting the timer is contrary to the + * specification, but if we don't get a FIN + * we'll hang forever. + */ + if (so->so_state & SS_FCANTRCVMORE) { + soisfdisconnected(so); + tp->t_timer[TCPT_2MSL] = tcp_maxidle; + } + tp->t_state = TCPS_FIN_WAIT_2; + } + break; + + /* + * In CLOSING STATE in addition to the processing for + * the ESTABLISHED state if the ACK acknowledges our FIN + * then enter the TIME-WAIT state, otherwise ignore + * the segment. + */ + case TCPS_CLOSING: + if (ourfinisacked) { + tp->t_state = TCPS_TIME_WAIT; + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + soisfdisconnected(so); + } + break; + + /* + * In LAST_ACK, we may still be waiting for data to drain + * and/or to be acked, as well as for the ack of our FIN. + * If our FIN is now acknowledged, delete the TCB, + * enter the closed state and return. + */ + case TCPS_LAST_ACK: + if (ourfinisacked) { + tp = tcp_close(tp); + goto drop; + } + break; + + /* + * In TIME_WAIT state the only thing that should arrive + * is a retransmission of the remote FIN. Acknowledge + * it and restart the finack timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + goto dropafterack; + } + } /* switch(tp->t_state) */ + +step6: + /* + * Update window information. + * Don't look at window if no ACK: TAC's send garbage on first SYN. + */ + if ((tiflags & TH_ACK) && + (SEQ_LT(tp->snd_wl1, ti->ti_seq) || + (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) || + (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { + /* keep track of pure window updates */ + if (ti->ti_len == 0 && + tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd) + tcpstat.tcps_rcvwinupd++; + tp->snd_wnd = tiwin; + tp->snd_wl1 = ti->ti_seq; + tp->snd_wl2 = ti->ti_ack; + if (tp->snd_wnd > tp->max_sndwnd) + tp->max_sndwnd = tp->snd_wnd; + needoutput = 1; + } + + /* + * Process segments with URG. + */ + if ((tiflags & TH_URG) && ti->ti_urp && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + /* + * This is a kludge, but if we receive and accept + * random urgent pointers, we'll crash in + * soreceive. It's hard to imagine someone + * actually wanting to send this much urgent data. + */ + if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { + ti->ti_urp = 0; + tiflags &= ~TH_URG; + goto dodata; + } + /* + * If this segment advances the known urgent pointer, + * then mark the data stream. This should not happen + * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since + * a FIN has been received from the remote side. + * In these states we ignore the URG. + * + * According to RFC961 (Assigned Protocols), + * the urgent pointer points to the last octet + * of urgent data. We continue, however, + * to consider it to indicate the first octet + * of data past the urgent section as the original + * spec states (in one of two places). + */ + if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { + tp->rcv_up = ti->ti_seq + ti->ti_urp; + so->so_urgc = so->so_rcv.sb_cc + + (tp->rcv_up - tp->rcv_nxt); /* -1; */ + tp->rcv_up = ti->ti_seq + ti->ti_urp; + + } + } else + /* + * If no out of band data is expected, + * pull receive urgent pointer along + * with the receive window. + */ + if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) + tp->rcv_up = tp->rcv_nxt; +dodata: + + /* + * Process the segment text, merging it into the TCP sequencing queue, + * and arranging for acknowledgment of receipt if necessary. + * This process logically involves adjusting tp->rcv_wnd as data + * is presented to the user (this happens in tcp_usrreq.c, + * case PRU_RCVD). If a FIN has already been received on this + * connection then we just ignore the text. + */ + if ((ti->ti_len || (tiflags&TH_FIN)) && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + TCP_REASS(tp, ti, m, so, tiflags); + /* + * Note the amount of data that peer has sent into + * our window, in order to estimate the sender's + * buffer size. + */ + len = so->so_rcv.sb_datalen - (tp->rcv_adv - tp->rcv_nxt); + } else { + m_free(m); + tiflags &= ~TH_FIN; + } + + /* + * If FIN is received ACK the FIN and let the user know + * that the connection is closing. + */ + if (tiflags & TH_FIN) { + if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { + /* + * If we receive a FIN we can't send more data, + * set it SS_FDRAIN + * Shutdown the socket if there is no rx data in the + * buffer. + * soread() is called on completion of shutdown() and + * will got to TCPS_LAST_ACK, and use tcp_output() + * to send the FIN. + */ +/* sofcantrcvmore(so); */ + sofwdrain(so); + + tp->t_flags |= TF_ACKNOW; + tp->rcv_nxt++; + } + switch (tp->t_state) { + + /* + * In SYN_RECEIVED and ESTABLISHED STATES + * enter the CLOSE_WAIT state. + */ + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + if(so->so_emu == EMU_CTL) /* no shutdown on socket */ + tp->t_state = TCPS_LAST_ACK; + else + tp->t_state = TCPS_CLOSE_WAIT; + break; + + /* + * If still in FIN_WAIT_1 STATE FIN has not been acked so + * enter the CLOSING state. + */ + case TCPS_FIN_WAIT_1: + tp->t_state = TCPS_CLOSING; + break; + + /* + * In FIN_WAIT_2 state enter the TIME_WAIT state, + * starting the time-wait timer, turning off the other + * standard timers. + */ + case TCPS_FIN_WAIT_2: + tp->t_state = TCPS_TIME_WAIT; + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + soisfdisconnected(so); + break; + + /* + * In TIME_WAIT state restart the 2 MSL time_wait timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + break; + } + } + + /* + * If this is a small packet, then ACK now - with Nagel + * congestion avoidance sender won't send more until + * he gets an ACK. + * + * See above. + */ +/* if (ti->ti_len && (unsigned)ti->ti_len < tp->t_maxseg) { + */ +/* if ((ti->ti_len && (unsigned)ti->ti_len < tp->t_maxseg && + * (so->so_iptos & IPTOS_LOWDELAY) == 0) || + * ((so->so_iptos & IPTOS_LOWDELAY) && + * ((struct tcpiphdr_2 *)ti)->first_char == (char)27)) { + */ + if (ti->ti_len && (unsigned)ti->ti_len <= 5 && + ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { + tp->t_flags |= TF_ACKNOW; + } + + /* + * Return any desired output. + */ + if (needoutput || (tp->t_flags & TF_ACKNOW)) { + (void) tcp_output(tp); + } + return; + +dropafterack: + /* + * Generate an ACK dropping incoming segment if it occupies + * sequence space, where the ACK reflects our state. + */ + if (tiflags & TH_RST) + goto drop; + m_freem(m); + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(tp); + return; + +dropwithreset: + /* reuses m if m!=NULL, m_free() unnecessary */ + if (tiflags & TH_ACK) + tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); + else { + if (tiflags & TH_SYN) ti->ti_len++; + tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0, + TH_RST|TH_ACK); + } + + return; + +drop: + /* + * Drop space held by incoming segment and return. + */ + m_free(m); + + return; +} + + /* , ts_present, ts_val, ts_ecr) */ +/* int *ts_present; + * u_int32_t *ts_val, *ts_ecr; + */ +void +tcp_dooptions(tp, cp, cnt, ti) + struct tcpcb *tp; + u_char *cp; + int cnt; + struct tcpiphdr *ti; +{ + u_int16_t mss; + int opt, optlen; + + DEBUG_CALL("tcp_dooptions"); + DEBUG_ARGS((dfd," tp = %lx cnt=%i \n", (long )tp, cnt)); + + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + optlen = cp[1]; + if (optlen <= 0) + break; + } + switch (opt) { + + default: + continue; + + case TCPOPT_MAXSEG: + if (optlen != TCPOLEN_MAXSEG) + continue; + if (!(ti->ti_flags & TH_SYN)) + continue; + memcpy((char *) &mss, (char *) cp + 2, sizeof(mss)); + NTOHS(mss); + (void) tcp_mss(tp, mss); /* sets t_maxseg */ + break; + +/* case TCPOPT_WINDOW: + * if (optlen != TCPOLEN_WINDOW) + * continue; + * if (!(ti->ti_flags & TH_SYN)) + * continue; + * tp->t_flags |= TF_RCVD_SCALE; + * tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT); + * break; + */ +/* case TCPOPT_TIMESTAMP: + * if (optlen != TCPOLEN_TIMESTAMP) + * continue; + * *ts_present = 1; + * memcpy((char *) ts_val, (char *)cp + 2, sizeof(*ts_val)); + * NTOHL(*ts_val); + * memcpy((char *) ts_ecr, (char *)cp + 6, sizeof(*ts_ecr)); + * NTOHL(*ts_ecr); + * + */ /* + * * A timestamp received in a SYN makes + * * it ok to send timestamp requests and replies. + * */ +/* if (ti->ti_flags & TH_SYN) { + * tp->t_flags |= TF_RCVD_TSTMP; + * tp->ts_recent = *ts_val; + * tp->ts_recent_age = tcp_now; + * } + */ break; + } + } +} + + +/* + * Pull out of band byte out of a segment so + * it doesn't appear in the user's data queue. + * It is still reflected in the segment length for + * sequencing purposes. + */ + +#ifdef notdef + +void +tcp_pulloutofband(so, ti, m) + struct socket *so; + struct tcpiphdr *ti; + register struct mbuf *m; +{ + int cnt = ti->ti_urp - 1; + + while (cnt >= 0) { + if (m->m_len > cnt) { + char *cp = mtod(m, caddr_t) + cnt; + struct tcpcb *tp = sototcpcb(so); + + tp->t_iobc = *cp; + tp->t_oobflags |= TCPOOB_HAVEDATA; + memcpy(sp, cp+1, (unsigned)(m->m_len - cnt - 1)); + m->m_len--; + return; + } + cnt -= m->m_len; + m = m->m_next; /* XXX WRONG! Fix it! */ + if (m == 0) + break; + } + panic("tcp_pulloutofband"); +} + +#endif /* notdef */ + +/* + * Collect new round-trip time estimate + * and update averages and current timeout. + */ + +void +tcp_xmit_timer(tp, rtt) + register struct tcpcb *tp; + int rtt; +{ + register short delta; + + DEBUG_CALL("tcp_xmit_timer"); + DEBUG_ARG("tp = %lx", (long)tp); + DEBUG_ARG("rtt = %d", rtt); + + tcpstat.tcps_rttupdated++; + if (tp->t_srtt != 0) { + /* + * srtt is stored as fixed point with 3 bits after the + * binary point (i.e., scaled by 8). The following magic + * is equivalent to the smoothing algorithm in rfc793 with + * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed + * point). Adjust rtt to origin 0. + */ + delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); + if ((tp->t_srtt += delta) <= 0) + tp->t_srtt = 1; + /* + * We accumulate a smoothed rtt variance (actually, a + * smoothed mean difference), then set the retransmit + * timer to smoothed rtt + 4 times the smoothed variance. + * rttvar is stored as fixed point with 2 bits after the + * binary point (scaled by 4). The following is + * equivalent to rfc793 smoothing with an alpha of .75 + * (rttvar = rttvar*3/4 + |delta| / 4). This replaces + * rfc793's wired-in beta. + */ + if (delta < 0) + delta = -delta; + delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); + if ((tp->t_rttvar += delta) <= 0) + tp->t_rttvar = 1; + } else { + /* + * No rtt measurement yet - use the unsmoothed rtt. + * Set the variance to half the rtt (so our first + * retransmit happens at 3*rtt). + */ + tp->t_srtt = rtt << TCP_RTT_SHIFT; + tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); + } + tp->t_rtt = 0; + tp->t_rxtshift = 0; + + /* + * the retransmit should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + */ + TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), + (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */ + + /* + * We received an ack for a packet that wasn't retransmitted; + * it is probably safe to discard any error indications we've + * received recently. This isn't quite right, but close enough + * for now (a route might have failed after we sent a segment, + * and the return path might not be symmetrical). + */ + tp->t_softerror = 0; +} + +/* + * Determine a reasonable value for maxseg size. + * If the route is known, check route for mtu. + * If none, use an mss that can be handled on the outgoing + * interface without forcing IP to fragment; if bigger than + * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES + * to utilize large mbufs. If no route is found, route has no mtu, + * or the destination isn't local, use a default, hopefully conservative + * size (usually 512 or the default IP max size, but no more than the mtu + * of the interface), as we can't discover anything about intervening + * gateways or networks. We also initialize the congestion/slow start + * window to be a single segment if the destination isn't local. + * While looking at the routing entry, we also initialize other path-dependent + * parameters from pre-set or cached values in the routing entry. + */ + +int +tcp_mss(tp, offer) + register struct tcpcb *tp; + u_int offer; +{ + struct socket *so = tp->t_socket; + int mss; + + DEBUG_CALL("tcp_mss"); + DEBUG_ARG("tp = %lx", (long)tp); + DEBUG_ARG("offer = %d", offer); + + mss = min(if_mtu, if_mru) - sizeof(struct tcpiphdr); + if (offer) + mss = min(mss, offer); + mss = max(mss, 32); + if (mss < tp->t_maxseg || offer != 0) + tp->t_maxseg = mss; + + tp->snd_cwnd = mss; + + sbreserve(&so->so_snd, tcp_sndspace+((tcp_sndspace%mss)?(mss-(tcp_sndspace%mss)):0)); + sbreserve(&so->so_rcv, tcp_rcvspace+((tcp_rcvspace%mss)?(mss-(tcp_rcvspace%mss)):0)); + + DEBUG_MISC((dfd, " returning mss = %d\n", mss)); + + return mss; +} diff --git a/slirp/tcp_output.c b/slirp/tcp_output.c new file mode 100644 index 000000000..0f05dfadc --- /dev/null +++ b/slirp/tcp_output.c @@ -0,0 +1,608 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 + * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> + +#define max(x,y) ((x) > (y) ? (x) : (y)) +#define min(x,y) ((x) < (y) ? (x) : (y)) + +/* + * Since this is only used in "stats socket", we give meaning + * names instead of the REAL names + */ +char *tcpstates[] = { +/* "CLOSED", "LISTEN", "SYN_SENT", "SYN_RCVD", */ + "REDIRECT", "LISTEN", "SYN_SENT", "SYN_RCVD", + "ESTABLISHED", "CLOSE_WAIT", "FIN_WAIT_1", "CLOSING", + "LAST_ACK", "FIN_WAIT_2", "TIME_WAIT", +}; + +u_char tcp_outflags[TCP_NSTATES] = { + TH_RST|TH_ACK, 0, TH_SYN, TH_SYN|TH_ACK, + TH_ACK, TH_ACK, TH_FIN|TH_ACK, TH_FIN|TH_ACK, + TH_FIN|TH_ACK, TH_ACK, TH_ACK, +}; + + +#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ + +/* + * Tcp output routine: figure out what should be sent and send it. + */ +int +tcp_output(tp) + register struct tcpcb *tp; +{ + register struct socket *so = tp->t_socket; + register long len, win; + int off, flags, error; + register struct mbuf *m; + register struct tcpiphdr *ti; + u_char opt[MAX_TCPOPTLEN]; + unsigned optlen, hdrlen; + int idle, sendalot; + + DEBUG_CALL("tcp_output"); + DEBUG_ARG("tp = %lx", (long )tp); + + /* + * Determine length of data that should be transmitted, + * and flags that will be used. + * If there is some data or critical controls (SYN, RST) + * to send, then transmit; otherwise, investigate further. + */ + idle = (tp->snd_max == tp->snd_una); + if (idle && tp->t_idle >= tp->t_rxtcur) + /* + * We have been idle for "a while" and no acks are + * expected to clock out any data we send -- + * slow start to get ack "clock" running again. + */ + tp->snd_cwnd = tp->t_maxseg; +again: + sendalot = 0; + off = tp->snd_nxt - tp->snd_una; + win = min(tp->snd_wnd, tp->snd_cwnd); + + flags = tcp_outflags[tp->t_state]; + + DEBUG_MISC((dfd, " --- tcp_output flags = 0x%x\n",flags)); + + /* + * If in persist timeout with window of 0, send 1 byte. + * Otherwise, if window is small but nonzero + * and timer expired, we will send what we can + * and go to transmit state. + */ + if (tp->t_force) { + if (win == 0) { + /* + * If we still have some data to send, then + * clear the FIN bit. Usually this would + * happen below when it realizes that we + * aren't sending all the data. However, + * if we have exactly 1 byte of unset data, + * then it won't clear the FIN bit below, + * and if we are in persist state, we wind + * up sending the packet without recording + * that we sent the FIN bit. + * + * We can't just blindly clear the FIN bit, + * because if we don't have any more data + * to send then the probe will be the FIN + * itself. + */ + if (off < so->so_snd.sb_cc) + flags &= ~TH_FIN; + win = 1; + } else { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + + len = min(so->so_snd.sb_cc, win) - off; + + if (len < 0) { + /* + * If FIN has been sent but not acked, + * but we haven't been called to retransmit, + * len will be -1. Otherwise, window shrank + * after we sent into it. If window shrank to 0, + * cancel pending retransmit and pull snd_nxt + * back to (closed) window. We will enter persist + * state below. If the window didn't close completely, + * just wait for an ACK. + */ + len = 0; + if (win == 0) { + tp->t_timer[TCPT_REXMT] = 0; + tp->snd_nxt = tp->snd_una; + } + } + + if (len > tp->t_maxseg) { + len = tp->t_maxseg; + sendalot = 1; + } + if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) + flags &= ~TH_FIN; + + win = sbspace(&so->so_rcv); + + /* + * Sender silly window avoidance. If connection is idle + * and can send all data, a maximum segment, + * at least a maximum default-size segment do it, + * or are forced, do it; otherwise don't bother. + * If peer's buffer is tiny, then send + * when window is at least half open. + * If retransmitting (possibly after persist timer forced us + * to send into a small window), then must resend. + */ + if (len) { + if (len == tp->t_maxseg) + goto send; + if ((1 || idle || tp->t_flags & TF_NODELAY) && + len + off >= so->so_snd.sb_cc) + goto send; + if (tp->t_force) + goto send; + if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) + goto send; + if (SEQ_LT(tp->snd_nxt, tp->snd_max)) + goto send; + } + + /* + * Compare available window to amount of window + * known to peer (as advertised window less + * next expected input). If the difference is at least two + * max size segments, or at least 50% of the maximum possible + * window, then want to send a window update to peer. + */ + if (win > 0) { + /* + * "adv" is the amount we can increase the window, + * taking into account that we are limited by + * TCP_MAXWIN << tp->rcv_scale. + */ + long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) - + (tp->rcv_adv - tp->rcv_nxt); + + if (adv >= (long) (2 * tp->t_maxseg)) + goto send; + if (2 * adv >= (long) so->so_rcv.sb_datalen) + goto send; + } + + /* + * Send if we owe peer an ACK. + */ + if (tp->t_flags & TF_ACKNOW) + goto send; + if (flags & (TH_SYN|TH_RST)) + goto send; + if (SEQ_GT(tp->snd_up, tp->snd_una)) + goto send; + /* + * If our state indicates that FIN should be sent + * and we have not yet done so, or we're retransmitting the FIN, + * then we need to send. + */ + if (flags & TH_FIN && + ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) + goto send; + + /* + * TCP window updates are not reliable, rather a polling protocol + * using ``persist'' packets is used to insure receipt of window + * updates. The three ``states'' for the output side are: + * idle not doing retransmits or persists + * persisting to move a small or zero window + * (re)transmitting and thereby not persisting + * + * tp->t_timer[TCPT_PERSIST] + * is set when we are in persist state. + * tp->t_force + * is set when we are called to send a persist packet. + * tp->t_timer[TCPT_REXMT] + * is set when we are retransmitting + * The output side is idle when both timers are zero. + * + * If send window is too small, there is data to transmit, and no + * retransmit or persist is pending, then go to persist state. + * If nothing happens soon, send when timer expires: + * if window is nonzero, transmit what we can, + * otherwise force out a byte. + */ + if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && + tp->t_timer[TCPT_PERSIST] == 0) { + tp->t_rxtshift = 0; + tcp_setpersist(tp); + } + + /* + * No reason to send a segment, just return. + */ + tcpstat.tcps_didnuttin++; + + return (0); + +send: + /* + * Before ESTABLISHED, force sending of initial options + * unless TCP set not to do any options. + * NOTE: we assume that the IP/TCP header plus TCP options + * always fit in a single mbuf, leaving room for a maximum + * link header, i.e. + * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN + */ + optlen = 0; + hdrlen = sizeof (struct tcpiphdr); + if (flags & TH_SYN) { + tp->snd_nxt = tp->iss; + if ((tp->t_flags & TF_NOOPT) == 0) { + u_int16_t mss; + + opt[0] = TCPOPT_MAXSEG; + opt[1] = 4; + mss = htons((u_int16_t) tcp_mss(tp, 0)); + memcpy((caddr_t)(opt + 2), (caddr_t)&mss, sizeof(mss)); + optlen = 4; + +/* if ((tp->t_flags & TF_REQ_SCALE) && + * ((flags & TH_ACK) == 0 || + * (tp->t_flags & TF_RCVD_SCALE))) { + * *((u_int32_t *) (opt + optlen)) = htonl( + * TCPOPT_NOP << 24 | + * TCPOPT_WINDOW << 16 | + * TCPOLEN_WINDOW << 8 | + * tp->request_r_scale); + * optlen += 4; + * } + */ + } + } + + /* + * Send a timestamp and echo-reply if this is a SYN and our side + * wants to use timestamps (TF_REQ_TSTMP is set) or both our side + * and our peer have sent timestamps in our SYN's. + */ +/* if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && + * (flags & TH_RST) == 0 && + * ((flags & (TH_SYN|TH_ACK)) == TH_SYN || + * (tp->t_flags & TF_RCVD_TSTMP))) { + * u_int32_t *lp = (u_int32_t *)(opt + optlen); + * + * / * Form timestamp option as shown in appendix A of RFC 1323. * / + * *lp++ = htonl(TCPOPT_TSTAMP_HDR); + * *lp++ = htonl(tcp_now); + * *lp = htonl(tp->ts_recent); + * optlen += TCPOLEN_TSTAMP_APPA; + * } + */ + hdrlen += optlen; + + /* + * Adjust data length if insertion of options will + * bump the packet length beyond the t_maxseg length. + */ + if (len > tp->t_maxseg - optlen) { + len = tp->t_maxseg - optlen; + sendalot = 1; + } + + /* + * Grab a header mbuf, attaching a copy of data to + * be transmitted, and initialize the header from + * the template for sends on this connection. + */ + if (len) { + if (tp->t_force && len == 1) + tcpstat.tcps_sndprobe++; + else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { + tcpstat.tcps_sndrexmitpack++; + tcpstat.tcps_sndrexmitbyte += len; + } else { + tcpstat.tcps_sndpack++; + tcpstat.tcps_sndbyte += len; + } + + m = m_get(); + if (m == NULL) { +/* error = ENOBUFS; */ + error = 1; + goto out; + } + m->m_data += if_maxlinkhdr; + m->m_len = hdrlen; + + /* + * This will always succeed, since we make sure our mbufs + * are big enough to hold one MSS packet + header + ... etc. + */ +/* if (len <= MHLEN - hdrlen - max_linkhdr) { */ + + sbcopy(&so->so_snd, off, (int) len, mtod(m, caddr_t) + hdrlen); + m->m_len += len; + +/* } else { + * m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len); + * if (m->m_next == 0) + * len = 0; + * } + */ + /* + * If we're sending everything we've got, set PUSH. + * (This will keep happy those implementations which only + * give data to the user when a buffer fills or + * a PUSH comes in.) + */ + if (off + len == so->so_snd.sb_cc) + flags |= TH_PUSH; + } else { + if (tp->t_flags & TF_ACKNOW) + tcpstat.tcps_sndacks++; + else if (flags & (TH_SYN|TH_FIN|TH_RST)) + tcpstat.tcps_sndctrl++; + else if (SEQ_GT(tp->snd_up, tp->snd_una)) + tcpstat.tcps_sndurg++; + else + tcpstat.tcps_sndwinup++; + + m = m_get(); + if (m == NULL) { +/* error = ENOBUFS; */ + error = 1; + goto out; + } + m->m_data += if_maxlinkhdr; + m->m_len = hdrlen; + } + + ti = mtod(m, struct tcpiphdr *); + + memcpy((caddr_t)ti, &tp->t_template, sizeof (struct tcpiphdr)); + + /* + * Fill in fields, remembering maximum advertised + * window for use in delaying messages about window sizes. + * If resending a FIN, be sure not to use a new sequence number. + */ + if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && + tp->snd_nxt == tp->snd_max) + tp->snd_nxt--; + /* + * If we are doing retransmissions, then snd_nxt will + * not reflect the first unsent octet. For ACK only + * packets, we do not want the sequence number of the + * retransmitted packet, we want the sequence number + * of the next unsent octet. So, if there is no data + * (and no SYN or FIN), use snd_max instead of snd_nxt + * when filling in ti_seq. But if we are in persist + * state, snd_max might reflect one byte beyond the + * right edge of the window, so use snd_nxt in that + * case, since we know we aren't doing a retransmission. + * (retransmit and persist are mutually exclusive...) + */ + if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST]) + ti->ti_seq = htonl(tp->snd_nxt); + else + ti->ti_seq = htonl(tp->snd_max); + ti->ti_ack = htonl(tp->rcv_nxt); + if (optlen) { + memcpy((caddr_t)(ti + 1), (caddr_t)opt, optlen); + ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; + } + ti->ti_flags = flags; + /* + * Calculate receive window. Don't shrink window, + * but avoid silly window syndrome. + */ + if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) + win = 0; + if (win > (long)TCP_MAXWIN << tp->rcv_scale) + win = (long)TCP_MAXWIN << tp->rcv_scale; + if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) + win = (long)(tp->rcv_adv - tp->rcv_nxt); + ti->ti_win = htons((u_int16_t) (win>>tp->rcv_scale)); + + if (SEQ_GT(tp->snd_up, tp->snd_una)) { + ti->ti_urp = htons((u_int16_t)(tp->snd_up - ntohl(ti->ti_seq))); +#ifdef notdef + if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { + ti->ti_urp = htons((u_int16_t)(tp->snd_up - tp->snd_nxt)); +#endif + ti->ti_flags |= TH_URG; + } else + /* + * If no urgent pointer to send, then we pull + * the urgent pointer to the left edge of the send window + * so that it doesn't drift into the send window on sequence + * number wraparound. + */ + tp->snd_up = tp->snd_una; /* drag it along */ + + /* + * Put TCP length in extended header, and then + * checksum extended header and data. + */ + if (len + optlen) + ti->ti_len = htons((u_int16_t)(sizeof (struct tcphdr) + + optlen + len)); + ti->ti_sum = cksum(m, (int)(hdrlen + len)); + + /* + * In transmit state, time the transmission and arrange for + * the retransmit. In persist state, just set snd_max. + */ + if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { + tcp_seq startseq = tp->snd_nxt; + + /* + * Advance snd_nxt over sequence space of this segment. + */ + if (flags & (TH_SYN|TH_FIN)) { + if (flags & TH_SYN) + tp->snd_nxt++; + if (flags & TH_FIN) { + tp->snd_nxt++; + tp->t_flags |= TF_SENTFIN; + } + } + tp->snd_nxt += len; + if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { + tp->snd_max = tp->snd_nxt; + /* + * Time this transmission if not a retransmission and + * not currently timing anything. + */ + if (tp->t_rtt == 0) { + tp->t_rtt = 1; + tp->t_rtseq = startseq; + tcpstat.tcps_segstimed++; + } + } + + /* + * Set retransmit timer if not currently set, + * and not doing an ack or a keep-alive probe. + * Initial value for retransmit timer is smoothed + * round-trip time + 2 * round-trip time variance. + * Initialize shift counter which is used for backoff + * of retransmit time. + */ + if (tp->t_timer[TCPT_REXMT] == 0 && + tp->snd_nxt != tp->snd_una) { + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + if (tp->t_timer[TCPT_PERSIST]) { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + } else + if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) + tp->snd_max = tp->snd_nxt + len; + + /* + * Fill in IP length and desired time to live and + * send to IP level. There should be a better way + * to handle ttl and tos; we could keep them in + * the template, but need a way to checksum without them. + */ + m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ + + { + + ((struct ip *)ti)->ip_len = m->m_len; + + ((struct ip *)ti)->ip_ttl = ip_defttl; + ((struct ip *)ti)->ip_tos = so->so_iptos; + +/* #if BSD >= 43 */ + /* Don't do IP options... */ +/* error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, + * so->so_options & SO_DONTROUTE, 0); + */ + error = ip_output(so, m); + +/* #else + * error = ip_output(m, (struct mbuf *)0, &tp->t_inpcb->inp_route, + * so->so_options & SO_DONTROUTE); + * #endif + */ + } + if (error) { +out: +/* if (error == ENOBUFS) { + * tcp_quench(tp->t_inpcb, 0); + * return (0); + * } + */ +/* if ((error == EHOSTUNREACH || error == ENETDOWN) + * && TCPS_HAVERCVDSYN(tp->t_state)) { + * tp->t_softerror = error; + * return (0); + * } + */ + return (error); + } + tcpstat.tcps_sndtotal++; + + /* + * Data sent (as far as we can tell). + * If this advertises a larger window than any other segment, + * then remember the size of the advertised window. + * Any pending ACK has now been sent. + */ + if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) + tp->rcv_adv = tp->rcv_nxt + win; + tp->last_ack_sent = tp->rcv_nxt; + tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); + if (sendalot) + goto again; + + return (0); +} + +void +tcp_setpersist(tp) + register struct tcpcb *tp; +{ + int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; + +/* if (tp->t_timer[TCPT_REXMT]) + * panic("tcp_output REXMT"); + */ + /* + * Start/restart persistence timer. + */ + TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], + t * tcp_backoff[tp->t_rxtshift], + TCPTV_PERSMIN, TCPTV_PERSMAX); + if (tp->t_rxtshift < TCP_MAXRXTSHIFT) + tp->t_rxtshift++; +} diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c new file mode 100644 index 000000000..bf8a2026f --- /dev/null +++ b/slirp/tcp_subr.c @@ -0,0 +1,1325 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 + * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#define WANT_SYS_IOCTL_H +#include <slirp.h> + +/* patchable/settable parameters for tcp */ +int tcp_mssdflt = TCP_MSS; +int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; +int tcp_do_rfc1323 = 0; /* Don't do rfc1323 performance enhancements */ +int tcp_rcvspace; /* You may want to change this */ +int tcp_sndspace; /* Keep small if you have an error prone link */ + +/* + * Tcp initialization + */ +void +tcp_init() +{ + tcp_iss = 1; /* wrong */ + tcb.so_next = tcb.so_prev = &tcb; + + /* tcp_rcvspace = our Window we advertise to the remote */ + tcp_rcvspace = TCP_RCVSPACE; + tcp_sndspace = TCP_SNDSPACE; + + /* Make sure tcp_sndspace is at least 2*MSS */ + if (tcp_sndspace < 2*(min(if_mtu, if_mru) - sizeof(struct tcpiphdr))) + tcp_sndspace = 2*(min(if_mtu, if_mru) - sizeof(struct tcpiphdr)); +} + +/* + * Create template to be used to send tcp packets on a connection. + * Call after host entry created, fills + * in a skeletal tcp/ip header, minimizing the amount of work + * necessary when the connection is used. + */ +/* struct tcpiphdr * */ +void +tcp_template(tp) + struct tcpcb *tp; +{ + struct socket *so = tp->t_socket; + register struct tcpiphdr *n = &tp->t_template; + + n->ti_next = n->ti_prev = 0; + n->ti_x1 = 0; + n->ti_pr = IPPROTO_TCP; + n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip)); + n->ti_src = so->so_faddr; + n->ti_dst = so->so_laddr; + n->ti_sport = so->so_fport; + n->ti_dport = so->so_lport; + + n->ti_seq = 0; + n->ti_ack = 0; + n->ti_x2 = 0; + n->ti_off = 5; + n->ti_flags = 0; + n->ti_win = 0; + n->ti_sum = 0; + n->ti_urp = 0; +} + +/* + * Send a single message to the TCP at address specified by + * the given TCP/IP header. If m == 0, then we make a copy + * of the tcpiphdr at ti and send directly to the addressed host. + * This is used to force keep alive messages out using the TCP + * template for a connection tp->t_template. If flags are given + * then we send a message back to the TCP which originated the + * segment ti, and discard the mbuf containing it and any other + * attached mbufs. + * + * In any case the ack and sequence number of the transmitted + * segment are as specified by the parameters. + */ +void +tcp_respond(tp, ti, m, ack, seq, flags) + struct tcpcb *tp; + register struct tcpiphdr *ti; + register struct mbuf *m; + tcp_seq ack, seq; + int flags; +{ + register int tlen; + int win = 0; + + DEBUG_CALL("tcp_respond"); + DEBUG_ARG("tp = %lx", (long)tp); + DEBUG_ARG("ti = %lx", (long)ti); + DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("ack = %u", ack); + DEBUG_ARG("seq = %u", seq); + DEBUG_ARG("flags = %x", flags); + + if (tp) + win = sbspace(&tp->t_socket->so_rcv); + if (m == 0) { + if ((m = m_get()) == NULL) + return; +#ifdef TCP_COMPAT_42 + tlen = 1; +#else + tlen = 0; +#endif + m->m_data += if_maxlinkhdr; + *mtod(m, struct tcpiphdr *) = *ti; + ti = mtod(m, struct tcpiphdr *); + flags = TH_ACK; + } else { + /* + * ti points into m so the next line is just making + * the mbuf point to ti + */ + m->m_data = (caddr_t)ti; + + m->m_len = sizeof (struct tcpiphdr); + tlen = 0; +#define xchg(a,b,type) { type t; t=a; a=b; b=t; } + xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_int32_t); + xchg(ti->ti_dport, ti->ti_sport, u_int16_t); +#undef xchg + } + ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen)); + tlen += sizeof (struct tcpiphdr); + m->m_len = tlen; + + ti->ti_next = ti->ti_prev = 0; + ti->ti_x1 = 0; + ti->ti_seq = htonl(seq); + ti->ti_ack = htonl(ack); + ti->ti_x2 = 0; + ti->ti_off = sizeof (struct tcphdr) >> 2; + ti->ti_flags = flags; + if (tp) + ti->ti_win = htons((u_int16_t) (win >> tp->rcv_scale)); + else + ti->ti_win = htons((u_int16_t)win); + ti->ti_urp = 0; + ti->ti_sum = 0; + ti->ti_sum = cksum(m, tlen); + ((struct ip *)ti)->ip_len = tlen; + + if(flags & TH_RST) + ((struct ip *)ti)->ip_ttl = MAXTTL; + else + ((struct ip *)ti)->ip_ttl = ip_defttl; + + (void) ip_output((struct socket *)0, m); +} + +/* + * Create a new TCP control block, making an + * empty reassembly queue and hooking it to the argument + * protocol control block. + */ +struct tcpcb * +tcp_newtcpcb(so) + struct socket *so; +{ + register struct tcpcb *tp; + + tp = (struct tcpcb *)malloc(sizeof(*tp)); + if (tp == NULL) + return ((struct tcpcb *)0); + + memset((char *) tp, 0, sizeof(struct tcpcb)); + tp->seg_next = tp->seg_prev = (tcpiphdrp_32)tp; + tp->t_maxseg = tcp_mssdflt; + + tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; + tp->t_socket = so; + + /* + * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no + * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives + * reasonable initial retransmit time. + */ + tp->t_srtt = TCPTV_SRTTBASE; + tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2; + tp->t_rttmin = TCPTV_MIN; + + TCPT_RANGESET(tp->t_rxtcur, + ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, + TCPTV_MIN, TCPTV_REXMTMAX); + + tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; + tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; + tp->t_state = TCPS_CLOSED; + + so->so_tcpcb = tp; + + return (tp); +} + +/* + * Drop a TCP connection, reporting + * the specified error. If connection is synchronized, + * then send a RST to peer. + */ +struct tcpcb *tcp_drop(struct tcpcb *tp, int errno) +{ +/* tcp_drop(tp, errno) + register struct tcpcb *tp; + int errno; +{ +*/ + + DEBUG_CALL("tcp_drop"); + DEBUG_ARG("tp = %lx", (long)tp); + DEBUG_ARG("errno = %d", errno); + + if (TCPS_HAVERCVDSYN(tp->t_state)) { + tp->t_state = TCPS_CLOSED; + (void) tcp_output(tp); + tcpstat.tcps_drops++; + } else + tcpstat.tcps_conndrops++; +/* if (errno == ETIMEDOUT && tp->t_softerror) + * errno = tp->t_softerror; + */ +/* so->so_error = errno; */ + return (tcp_close(tp)); +} + +/* + * Close a TCP control block: + * discard all space held by the tcp + * discard internet protocol block + * wake up any sleepers + */ +struct tcpcb * +tcp_close(tp) + register struct tcpcb *tp; +{ + register struct tcpiphdr *t; + struct socket *so = tp->t_socket; + register struct mbuf *m; + + DEBUG_CALL("tcp_close"); + DEBUG_ARG("tp = %lx", (long )tp); + + /* free the reassembly queue, if any */ + t = (struct tcpiphdr *) tp->seg_next; + while (t != (struct tcpiphdr *)tp) { + t = (struct tcpiphdr *)t->ti_next; + m = (struct mbuf *) REASS_MBUF((struct tcpiphdr *)t->ti_prev); + remque_32((struct tcpiphdr *) t->ti_prev); + m_freem(m); + } + /* It's static */ +/* if (tp->t_template) + * (void) m_free(dtom(tp->t_template)); + */ +/* free(tp, M_PCB); */ + free(tp); + so->so_tcpcb = 0; + soisfdisconnected(so); + /* clobber input socket cache if we're closing the cached connection */ + if (so == tcp_last_so) + tcp_last_so = &tcb; + close(so->s); + sbfree(&so->so_rcv); + sbfree(&so->so_snd); + sofree(so); + tcpstat.tcps_closed++; + return ((struct tcpcb *)0); +} + +void +tcp_drain() +{ + /* XXX */ +} + +/* + * When a source quench is received, close congestion window + * to one segment. We will gradually open it again as we proceed. + */ + +#ifdef notdef + +void +tcp_quench(i, errno) + + int errno; +{ + struct tcpcb *tp = intotcpcb(inp); + + if (tp) + tp->snd_cwnd = tp->t_maxseg; +} + +#endif /* notdef */ + +/* + * TCP protocol interface to socket abstraction. + */ + +/* + * User issued close, and wish to trail through shutdown states: + * if never received SYN, just forget it. If got a SYN from peer, + * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. + * If already got a FIN from peer, then almost done; go to LAST_ACK + * state. In all other cases, have already sent FIN to peer (e.g. + * after PRU_SHUTDOWN), and just have to play tedious game waiting + * for peer to send FIN or not respond to keep-alives, etc. + * We can let the user exit from the close as soon as the FIN is acked. + */ +void +tcp_sockclosed(tp) + struct tcpcb *tp; +{ + + DEBUG_CALL("tcp_sockclosed"); + DEBUG_ARG("tp = %lx", (long)tp); + + switch (tp->t_state) { + + case TCPS_CLOSED: + case TCPS_LISTEN: + case TCPS_SYN_SENT: + tp->t_state = TCPS_CLOSED; + tp = tcp_close(tp); + break; + + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + tp->t_state = TCPS_FIN_WAIT_1; + break; + + case TCPS_CLOSE_WAIT: + tp->t_state = TCPS_LAST_ACK; + break; + } +/* soisfdisconnecting(tp->t_socket); */ + if (tp && tp->t_state >= TCPS_FIN_WAIT_2) + soisfdisconnected(tp->t_socket); + if (tp) + tcp_output(tp); +} + +/* + * Connect to a host on the Internet + * Called by tcp_input + * Only do a connect, the tcp fields will be set in tcp_input + * return 0 if there's a result of the connect, + * else return -1 means we're still connecting + * The return value is almost always -1 since the socket is + * nonblocking. Connect returns after the SYN is sent, and does + * not wait for ACK+SYN. + */ +int tcp_fconnect(so) + struct socket *so; +{ + int ret=0; + + DEBUG_CALL("tcp_fconnect"); + DEBUG_ARG("so = %lx", (long )so); + + if( (ret=so->s=socket(AF_INET,SOCK_STREAM,0)) >= 0) { + int opt, s=so->s; + struct sockaddr_in addr; + + fd_nonblock(s); + opt = 1; + setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(opt )); + opt = 1; + setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(opt )); + + addr.sin_family = AF_INET; + if ((so->so_faddr.s_addr & htonl(0xffffff00)) == special_addr.s_addr) { + /* It's an alias */ + switch(ntohl(so->so_faddr.s_addr) & 0xff) { + case CTL_DNS: + addr.sin_addr = dns_addr; + break; + case CTL_ALIAS: + default: + addr.sin_addr = loopback_addr; + break; + } + } else + addr.sin_addr = so->so_faddr; + addr.sin_port = so->so_fport; + + DEBUG_MISC((dfd, " connect()ing, addr.sin_port=%d, " + "addr.sin_addr.s_addr=%.16s\n", + ntohs(addr.sin_port), inet_ntoa(addr.sin_addr))); + /* We don't care what port we get */ + ret = connect(s,(struct sockaddr *)&addr,sizeof (addr)); + + /* + * If it's not in progress, it failed, so we just return 0, + * without clearing SS_NOFDREF + */ + soisfconnecting(so); + } + + return(ret); +} + +/* + * Accept the socket and connect to the local-host + * + * We have a problem. The correct thing to do would be + * to first connect to the local-host, and only if the + * connection is accepted, then do an accept() here. + * But, a) we need to know who's trying to connect + * to the socket to be able to SYN the local-host, and + * b) we are already connected to the foreign host by + * the time it gets to accept(), so... We simply accept + * here and SYN the local-host. + */ +void +tcp_connect(inso) + struct socket *inso; +{ + struct socket *so; + struct sockaddr_in addr; + int addrlen = sizeof(struct sockaddr_in); + struct tcpcb *tp; + int s, opt; + + DEBUG_CALL("tcp_connect"); + DEBUG_ARG("inso = %lx", (long)inso); + + /* + * If it's an SS_ACCEPTONCE socket, no need to socreate() + * another socket, just use the accept() socket. + */ + if (inso->so_state & SS_FACCEPTONCE) { + /* FACCEPTONCE already have a tcpcb */ + so = inso; + } else { + if ((so = socreate()) == NULL) { + /* If it failed, get rid of the pending connection */ + close(accept(inso->s,(struct sockaddr *)&addr,&addrlen)); + return; + } + if (tcp_attach(so) < 0) { + free(so); /* NOT sofree */ + return; + } + so->so_laddr = inso->so_laddr; + so->so_lport = inso->so_lport; + } + + (void) tcp_mss(sototcpcb(so), 0); + + if ((s = accept(inso->s,(struct sockaddr *)&addr,&addrlen)) < 0) { + tcp_close(sototcpcb(so)); /* This will sofree() as well */ + return; + } + fd_nonblock(s); + opt = 1; + setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int)); + opt = 1; + setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int)); + + so->so_fport = addr.sin_port; + so->so_faddr = addr.sin_addr; + /* Translate connections from localhost to the real hostname */ + if (so->so_faddr.s_addr == 0 || so->so_faddr.s_addr == loopback_addr.s_addr) + so->so_faddr = our_addr; + + /* Close the accept() socket, set right state */ + if (inso->so_state & SS_FACCEPTONCE) { + close(so->s); /* If we only accept once, close the accept() socket */ + so->so_state = SS_NOFDREF; /* Don't select it yet, even though we have an FD */ + /* if it's not FACCEPTONCE, it's already NOFDREF */ + } + so->s = s; + + so->so_iptos = tcp_tos(so); + tp = sototcpcb(so); + + tcp_template(tp); + + /* Compute window scaling to request. */ +/* while (tp->request_r_scale < TCP_MAX_WINSHIFT && + * (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) + * tp->request_r_scale++; + */ + +/* soisconnecting(so); */ /* NOFDREF used instead */ + tcpstat.tcps_connattempt++; + + tp->t_state = TCPS_SYN_SENT; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tp->iss = tcp_iss; + tcp_iss += TCP_ISSINCR/2; + tcp_sendseqinit(tp); + tcp_output(tp); +} + +/* + * Attach a TCPCB to a socket. + */ +int +tcp_attach(so) + struct socket *so; +{ + if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) + return -1; + + insque(so, &tcb); + + return 0; +} + +/* + * Set the socket's type of service field + */ +struct tos_t tcptos[] = { + {0, 20, IPTOS_THROUGHPUT, 0}, /* ftp data */ + {21, 21, IPTOS_LOWDELAY, EMU_FTP}, /* ftp control */ + {0, 23, IPTOS_LOWDELAY, 0}, /* telnet */ + {0, 80, IPTOS_THROUGHPUT, 0}, /* WWW */ + {0, 513, IPTOS_LOWDELAY, EMU_RLOGIN|EMU_NOCONNECT}, /* rlogin */ + {0, 514, IPTOS_LOWDELAY, EMU_RSH|EMU_NOCONNECT}, /* shell */ + {0, 544, IPTOS_LOWDELAY, EMU_KSH}, /* kshell */ + {0, 543, IPTOS_LOWDELAY, 0}, /* klogin */ + {0, 6667, IPTOS_THROUGHPUT, EMU_IRC}, /* IRC */ + {0, 6668, IPTOS_THROUGHPUT, EMU_IRC}, /* IRC undernet */ + {0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ + {0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ + {0, 0, 0, 0} +}; + +struct emu_t *tcpemu = 0; + +/* + * Return TOS according to the above table + */ +u_int8_t +tcp_tos(so) + struct socket *so; +{ + int i = 0; + struct emu_t *emup; + + while(tcptos[i].tos) { + if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || + (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { + so->so_emu = tcptos[i].emu; + return tcptos[i].tos; + } + i++; + } + + /* Nope, lets see if there's a user-added one */ + for (emup = tcpemu; emup; emup = emup->next) { + if ((emup->fport && (ntohs(so->so_fport) == emup->fport)) || + (emup->lport && (ntohs(so->so_lport) == emup->lport))) { + so->so_emu = emup->emu; + return emup->tos; + } + } + + return 0; +} + +int do_echo = -1; + +/* + * Emulate programs that try and connect to us + * This includes ftp (the data connection is + * initiated by the server) and IRC (DCC CHAT and + * DCC SEND) for now + * + * NOTE: It's possible to crash SLiRP by sending it + * unstandard strings to emulate... if this is a problem, + * more checks are needed here + * + * XXX Assumes the whole command came in one packet + * + * XXX Some ftp clients will have their TOS set to + * LOWDELAY and so Nagel will kick in. Because of this, + * we'll get the first letter, followed by the rest, so + * we simply scan for ORT instead of PORT... + * DCC doesn't have this problem because there's other stuff + * in the packet before the DCC command. + * + * Return 1 if the mbuf m is still valid and should be + * sbappend()ed + * + * NOTE: if you return 0 you MUST m_free() the mbuf! + */ +int +tcp_emu(so, m) + struct socket *so; + struct mbuf *m; +{ + u_int n1, n2, n3, n4, n5, n6; + char buff[256]; + u_int32_t laddr; + u_int lport; + char *bptr; + + DEBUG_CALL("tcp_emu"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("m = %lx", (long)m); + + switch(so->so_emu) { + int x, i; + + case EMU_IDENT: + /* + * Identification protocol as per rfc-1413 + */ + + { + struct socket *tmpso; + struct sockaddr_in addr; + int addrlen = sizeof(struct sockaddr_in); + struct sbuf *so_rcv = &so->so_rcv; + + memcpy(so_rcv->sb_wptr, m->m_data, m->m_len); + so_rcv->sb_wptr += m->m_len; + so_rcv->sb_rptr += m->m_len; + m->m_data[m->m_len] = 0; /* NULL terminate */ + if (strchr(m->m_data, '\r') || strchr(m->m_data, '\n')) { + if (sscanf(so_rcv->sb_data, "%d%*[ ,]%d", &n1, &n2) == 2) { + HTONS(n1); + HTONS(n2); + /* n2 is the one on our host */ + for (tmpso = tcb.so_next; tmpso != &tcb; tmpso = tmpso->so_next) { + if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && + tmpso->so_lport == n2 && + tmpso->so_faddr.s_addr == so->so_faddr.s_addr && + tmpso->so_fport == n1) { + if (getsockname(tmpso->s, + (struct sockaddr *)&addr, &addrlen) == 0) + n2 = ntohs(addr.sin_port); + break; + } + } + } + so_rcv->sb_cc = sprintf(so_rcv->sb_data, "%d,%d\r\n", n1, n2); + so_rcv->sb_rptr = so_rcv->sb_data; + so_rcv->sb_wptr = so_rcv->sb_data + so_rcv->sb_cc; + } + m_free(m); + return 0; + } + +#if 0 + case EMU_RLOGIN: + /* + * Rlogin emulation + * First we accumulate all the initial option negotiation, + * then fork_exec() rlogin according to the options + */ + { + int i, i2, n; + char *ptr; + char args[100]; + char term[100]; + struct sbuf *so_snd = &so->so_snd; + struct sbuf *so_rcv = &so->so_rcv; + + /* First check if they have a priveladged port, or too much data has arrived */ + if (ntohs(so->so_lport) > 1023 || ntohs(so->so_lport) < 512 || + (m->m_len + so_rcv->sb_wptr) > (so_rcv->sb_data + so_rcv->sb_datalen)) { + memcpy(so_snd->sb_wptr, "Permission denied\n", 18); + so_snd->sb_wptr += 18; + so_snd->sb_cc += 18; + tcp_sockclosed(sototcpcb(so)); + m_free(m); + return 0; + } + + /* Append the current data */ + memcpy(so_rcv->sb_wptr, m->m_data, m->m_len); + so_rcv->sb_wptr += m->m_len; + so_rcv->sb_rptr += m->m_len; + m_free(m); + + /* + * Check if we have all the initial options, + * and build argument list to rlogin while we're here + */ + n = 0; + ptr = so_rcv->sb_data; + args[0] = 0; + term[0] = 0; + while (ptr < so_rcv->sb_wptr) { + if (*ptr++ == 0) { + n++; + if (n == 2) { + sprintf(args, "rlogin -l %s %s", + ptr, inet_ntoa(so->so_faddr)); + } else if (n == 3) { + i2 = so_rcv->sb_wptr - ptr; + for (i = 0; i < i2; i++) { + if (ptr[i] == '/') { + ptr[i] = 0; +#ifdef HAVE_SETENV + sprintf(term, "%s", ptr); +#else + sprintf(term, "TERM=%s", ptr); +#endif + ptr[i] = '/'; + break; + } + } + } + } + } + + if (n != 4) + return 0; + + /* We have it, set our term variable and fork_exec() */ +#ifdef HAVE_SETENV + setenv("TERM", term, 1); +#else + putenv(term); +#endif + fork_exec(so, args, 2); + term[0] = 0; + so->so_emu = 0; + + /* And finally, send the client a 0 character */ + so_snd->sb_wptr[0] = 0; + so_snd->sb_wptr++; + so_snd->sb_cc++; + + return 0; + } + + case EMU_RSH: + /* + * rsh emulation + * First we accumulate all the initial option negotiation, + * then rsh_exec() rsh according to the options + */ + { + int n; + char *ptr; + char *user; + char *args; + struct sbuf *so_snd = &so->so_snd; + struct sbuf *so_rcv = &so->so_rcv; + + /* First check if they have a priveladged port, or too much data has arrived */ + if (ntohs(so->so_lport) > 1023 || ntohs(so->so_lport) < 512 || + (m->m_len + so_rcv->sb_wptr) > (so_rcv->sb_data + so_rcv->sb_datalen)) { + memcpy(so_snd->sb_wptr, "Permission denied\n", 18); + so_snd->sb_wptr += 18; + so_snd->sb_cc += 18; + tcp_sockclosed(sototcpcb(so)); + m_free(m); + return 0; + } + + /* Append the current data */ + memcpy(so_rcv->sb_wptr, m->m_data, m->m_len); + so_rcv->sb_wptr += m->m_len; + so_rcv->sb_rptr += m->m_len; + m_free(m); + + /* + * Check if we have all the initial options, + * and build argument list to rlogin while we're here + */ + n = 0; + ptr = so_rcv->sb_data; + user=""; + args=""; + if (so->extra==NULL) { + struct socket *ns; + struct tcpcb* tp; + int port=atoi(ptr); + if (port <= 0) return 0; + if (port > 1023 || port < 512) { + memcpy(so_snd->sb_wptr, "Permission denied\n", 18); + so_snd->sb_wptr += 18; + so_snd->sb_cc += 18; + tcp_sockclosed(sototcpcb(so)); + return 0; + } + if ((ns=socreate()) == NULL) + return 0; + if (tcp_attach(ns)<0) { + free(ns); + return 0; + } + + ns->so_laddr=so->so_laddr; + ns->so_lport=htons(port); + + (void) tcp_mss(sototcpcb(ns), 0); + + ns->so_faddr=so->so_faddr; + ns->so_fport=htons(IPPORT_RESERVED-1); /* Use a fake port. */ + + if (ns->so_faddr.s_addr == 0 || + ns->so_faddr.s_addr == loopback_addr.s_addr) + ns->so_faddr = our_addr; + + ns->so_iptos = tcp_tos(ns); + tp = sototcpcb(ns); + + tcp_template(tp); + + /* Compute window scaling to request. */ + /* while (tp->request_r_scale < TCP_MAX_WINSHIFT && + * (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) + * tp->request_r_scale++; + */ + + /*soisfconnecting(ns);*/ + + tcpstat.tcps_connattempt++; + + tp->t_state = TCPS_SYN_SENT; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tp->iss = tcp_iss; + tcp_iss += TCP_ISSINCR/2; + tcp_sendseqinit(tp); + tcp_output(tp); + so->extra=ns; + } + while (ptr < so_rcv->sb_wptr) { + if (*ptr++ == 0) { + n++; + if (n == 2) { + user=ptr; + } else if (n == 3) { + args=ptr; + } + } + } + + if (n != 4) + return 0; + + rsh_exec(so,so->extra, user, inet_ntoa(so->so_faddr), args); + so->so_emu = 0; + so->extra=NULL; + + /* And finally, send the client a 0 character */ + so_snd->sb_wptr[0] = 0; + so_snd->sb_wptr++; + so_snd->sb_cc++; + + return 0; + } + + case EMU_CTL: + { + int num; + struct sbuf *so_snd = &so->so_snd; + struct sbuf *so_rcv = &so->so_rcv; + + /* + * If there is binary data here, we save it in so->so_m + */ + if (!so->so_m) { + int rxlen; + char *rxdata; + rxdata=mtod(m, char *); + for (rxlen=m->m_len; rxlen; rxlen--) { + if (*rxdata++ & 0x80) { + so->so_m = m; + return 0; + } + } + } /* if(so->so_m==NULL) */ + + /* + * Append the line + */ + sbappendsb(so_rcv, m); + + /* To avoid going over the edge of the buffer, we reset it */ + if (so_snd->sb_cc == 0) + so_snd->sb_wptr = so_snd->sb_rptr = so_snd->sb_data; + + /* + * A bit of a hack: + * If the first packet we get here is 1 byte long, then it + * was done in telnet character mode, therefore we must echo + * the characters as they come. Otherwise, we echo nothing, + * because in linemode, the line is already echoed + * XXX two or more control connections won't work + */ + if (do_echo == -1) { + if (m->m_len == 1) do_echo = 1; + else do_echo = 0; + } + if (do_echo) { + sbappendsb(so_snd, m); + m_free(m); + tcp_output(sototcpcb(so)); /* XXX */ + } else + m_free(m); + + num = 0; + while (num < so->so_rcv.sb_cc) { + if (*(so->so_rcv.sb_rptr + num) == '\n' || + *(so->so_rcv.sb_rptr + num) == '\r') { + int n; + + *(so_rcv->sb_rptr + num) = 0; + if (ctl_password && !ctl_password_ok) { + /* Need a password */ + if (sscanf(so_rcv->sb_rptr, "pass %256s", buff) == 1) { + if (strcmp(buff, ctl_password) == 0) { + ctl_password_ok = 1; + n = sprintf(so_snd->sb_wptr, + "Password OK.\r\n"); + goto do_prompt; + } + } + n = sprintf(so_snd->sb_wptr, + "Error: Password required, log on with \"pass PASSWORD\"\r\n"); + goto do_prompt; + } + cfg_quitting = 0; + n = do_config(so_rcv->sb_rptr, so, PRN_SPRINTF); + if (!cfg_quitting) { + /* Register the printed data */ +do_prompt: + so_snd->sb_cc += n; + so_snd->sb_wptr += n; + /* Add prompt */ + n = sprintf(so_snd->sb_wptr, "Slirp> "); + so_snd->sb_cc += n; + so_snd->sb_wptr += n; + } + /* Drop so_rcv data */ + so_rcv->sb_cc = 0; + so_rcv->sb_wptr = so_rcv->sb_rptr = so_rcv->sb_data; + tcp_output(sototcpcb(so)); /* Send the reply */ + } + num++; + } + return 0; + } +#endif + case EMU_FTP: /* ftp */ + *(m->m_data+m->m_len) = 0; /* NULL terminate for strstr */ + if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { + /* + * Need to emulate the PORT command + */ + x = sscanf(bptr, "ORT %d,%d,%d,%d,%d,%d\r\n%256[^\177]", + &n1, &n2, &n3, &n4, &n5, &n6, buff); + if (x < 6) + return 1; + + laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); + lport = htons((n5 << 8) | (n6)); + + if ((so = solisten(0, laddr, lport, SS_FACCEPTONCE)) == NULL) + return 1; + + n6 = ntohs(so->so_fport); + + n5 = (n6 >> 8) & 0xff; + n6 &= 0xff; + + laddr = ntohl(so->so_faddr.s_addr); + + n1 = ((laddr >> 24) & 0xff); + n2 = ((laddr >> 16) & 0xff); + n3 = ((laddr >> 8) & 0xff); + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += sprintf(bptr,"ORT %d,%d,%d,%d,%d,%d\r\n%s", + n1, n2, n3, n4, n5, n6, x==7?buff:""); + return 1; + } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { + /* + * Need to emulate the PASV response + */ + x = sscanf(bptr, "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%256[^\177]", + &n1, &n2, &n3, &n4, &n5, &n6, buff); + if (x < 6) + return 1; + + laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); + lport = htons((n5 << 8) | (n6)); + + if ((so = solisten(0, laddr, lport, SS_FACCEPTONCE)) == NULL) + return 1; + + n6 = ntohs(so->so_fport); + + n5 = (n6 >> 8) & 0xff; + n6 &= 0xff; + + laddr = ntohl(so->so_faddr.s_addr); + + n1 = ((laddr >> 24) & 0xff); + n2 = ((laddr >> 16) & 0xff); + n3 = ((laddr >> 8) & 0xff); + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += sprintf(bptr,"27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", + n1, n2, n3, n4, n5, n6, x==7?buff:""); + + return 1; + } + + return 1; + + case EMU_KSH: + /* + * The kshell (Kerberos rsh) and shell services both pass + * a local port port number to carry signals to the server + * and stderr to the client. It is passed at the beginning + * of the connection as a NUL-terminated decimal ASCII string. + */ + so->so_emu = 0; + for (lport = 0, i = 0; i < m->m_len-1; ++i) { + if (m->m_data[i] < '0' || m->m_data[i] > '9') + return 1; /* invalid number */ + lport *= 10; + lport += m->m_data[i] - '0'; + } + if (m->m_data[m->m_len-1] == '\0' && lport != 0 && + (so = solisten(0, so->so_laddr.s_addr, htons(lport), SS_FACCEPTONCE)) != NULL) + m->m_len = sprintf(m->m_data, "%d", ntohs(so->so_fport))+1; + return 1; + + case EMU_IRC: + /* + * Need to emulate DCC CHAT, DCC SEND and DCC MOVE + */ + *(m->m_data+m->m_len) = 0; /* NULL terminate the string for strstr */ + if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) + return 1; + + /* The %256s is for the broken mIRC */ + if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { + if ((so = solisten(0, htonl(laddr), htons(lport), SS_FACCEPTONCE)) == NULL) + return 1; + + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += sprintf(bptr, "DCC CHAT chat %lu %u%c\n", + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), 1); + } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, &n1) == 4) { + if ((so = solisten(0, htonl(laddr), htons(lport), SS_FACCEPTONCE)) == NULL) + return 1; + + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += sprintf(bptr, "DCC SEND %s %lu %u %u%c\n", + buff, (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, &n1) == 4) { + if ((so = solisten(0, htonl(laddr), htons(lport), SS_FACCEPTONCE)) == NULL) + return 1; + + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += sprintf(bptr, "DCC MOVE %s %lu %u %u%c\n", + buff, (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } + return 1; + + case EMU_REALAUDIO: + /* + * RealAudio emulation - JP. We must try to parse the incoming + * data and try to find the two characters that contain the + * port number. Then we redirect an udp port and replace the + * number with the real port we got. + * + * The 1.0 beta versions of the player are not supported + * any more. + * + * A typical packet for player version 1.0 (release version): + * + * 0000:50 4E 41 00 05 + * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 .....×..gælÜc..P + * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH + * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v + * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB + * + * Now the port number 0x1BD7 is found at offset 0x04 of the + * Now the port number 0x1BD7 is found at offset 0x04 of the + * second packet. This time we received five bytes first and + * then the rest. You never know how many bytes you get. + * + * A typical packet for player version 2.0 (beta): + * + * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA...........Á. + * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .guxõc..Win2.0.0 + * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ + * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas + * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B + * + * Port number 0x1BC1 is found at offset 0x0d. + * + * This is just a horrible switch statement. Variable ra tells + * us where we're going. + */ + + bptr = m->m_data; + while (bptr < m->m_data + m->m_len) { + u_short p; + static int ra = 0; + char ra_tbl[4]; + + ra_tbl[0] = 0x50; + ra_tbl[1] = 0x4e; + ra_tbl[2] = 0x41; + ra_tbl[3] = 0; + + switch (ra) { + case 0: + case 2: + case 3: + if (*bptr++ != ra_tbl[ra]) { + ra = 0; + continue; + } + break; + + case 1: + /* + * We may get 0x50 several times, ignore them + */ + if (*bptr == 0x50) { + ra = 1; + bptr++; + continue; + } else if (*bptr++ != ra_tbl[ra]) { + ra = 0; + continue; + } + break; + + case 4: + /* + * skip version number + */ + bptr++; + break; + + case 5: + /* + * The difference between versions 1.0 and + * 2.0 is here. For future versions of + * the player this may need to be modified. + */ + if (*(bptr + 1) == 0x02) + bptr += 8; + else + bptr += 4; + break; + + case 6: + /* This is the field containing the port + * number that RA-player is listening to. + */ + lport = (((u_char*)bptr)[0] << 8) + + ((u_char *)bptr)[1]; + if (lport < 6970) + lport += 256; /* don't know why */ + if (lport < 6970 || lport > 7170) + return 1; /* failed */ + + /* try to get udp port between 6970 - 7170 */ + for (p = 6970; p < 7071; p++) { + if (udp_listen( htons(p), + so->so_laddr.s_addr, + htons(lport), + SS_FACCEPTONCE)) { + break; + } + } + if (p == 7071) + p = 0; + *(u_char *)bptr++ = (p >> 8) & 0xff; + *(u_char *)bptr++ = p & 0xff; + ra = 0; + return 1; /* port redirected, we're done */ + break; + + default: + ra = 0; + } + ra++; + } + return 1; + + default: + /* Ooops, not emulated, won't call tcp_emu again */ + so->so_emu = 0; + return 1; + } +} + +/* + * Do misc. config of SLiRP while its running. + * Return 0 if this connections is to be closed, 1 otherwise, + * return 2 if this is a command-line connection + */ +int +tcp_ctl(so) + struct socket *so; +{ +#if 0 + struct sbuf *sb = &so->so_snd; + int command; + struct ex_list *ex_ptr; + int do_pty; + struct socket *tmpso; + + DEBUG_CALL("tcp_ctl"); + DEBUG_ARG("so = %lx", (long )so); + + /* + * Check if they're authorised + */ + if (ctl_addr.s_addr && (ctl_addr.s_addr == -1 || (so->so_laddr.s_addr != ctl_addr.s_addr))) { + sb->sb_cc = sprintf(sb->sb_wptr,"Error: Permission denied.\r\n"); + sb->sb_wptr += sb->sb_cc; + return 0; + } + + command = (ntohl(so->so_faddr.s_addr) & 0xff); + + switch(command) { + default: /* Check for exec's */ + + /* + * Check if it's pty_exec + */ + for (ex_ptr = exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { + if (ex_ptr->ex_fport == so->so_fport && + command == ex_ptr->ex_addr) { + do_pty = ex_ptr->ex_pty; + goto do_exec; + } + } + + /* + * Nothing bound.. + */ + /* tcp_fconnect(so); */ + + /* FALLTHROUGH */ + case CTL_ALIAS: + sb->sb_cc = sprintf(sb->sb_wptr, + "Error: No application configured.\r\n"); + sb->sb_wptr += sb->sb_cc; + return(0); + + do_exec: + DEBUG_MISC((dfd, " executing %s \n",ex_ptr->ex_exec)); + return(fork_exec(so, ex_ptr->ex_exec, do_pty)); + + case CTL_CMD: + for (tmpso = tcb.so_next; tmpso != &tcb; tmpso = tmpso->so_next) { + if (tmpso->so_emu == EMU_CTL && + !(tmpso->so_tcpcb? + (tmpso->so_tcpcb->t_state & (TCPS_TIME_WAIT|TCPS_LAST_ACK)) + :0)) { + /* Ooops, control connection already active */ + sb->sb_cc = sprintf(sb->sb_wptr,"Sorry, already connected.\r\n"); + sb->sb_wptr += sb->sb_cc; + return 0; + } + } + so->so_emu = EMU_CTL; + ctl_password_ok = 0; + sb->sb_cc = sprintf(sb->sb_wptr, "Slirp command-line ready (type \"help\" for help).\r\nSlirp> "); + sb->sb_wptr += sb->sb_cc; + do_echo=-1; + return(2); + } +#else + return 0; +#endif +} diff --git a/slirp/tcp_timer.c b/slirp/tcp_timer.c new file mode 100644 index 000000000..166979a3e --- /dev/null +++ b/slirp/tcp_timer.c @@ -0,0 +1,329 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 + * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp + */ + +#include <slirp.h> + +#define max(x,y) ((x) > (y) ? (x) : (y)) +#define min(x,y) ((x) < (y) ? (x) : (y)) + +int tcp_keepidle = TCPTV_KEEP_IDLE; +int tcp_keepintvl = TCPTV_KEEPINTVL; +int tcp_maxidle; +int so_options = DO_KEEPALIVE; + +struct tcpstat tcpstat; /* tcp statistics */ +u_int32_t tcp_now; /* for RFC 1323 timestamps */ + +/* + * Fast timeout routine for processing delayed acks + */ +void +tcp_fasttimo() +{ + register struct socket *so; + register struct tcpcb *tp; + + DEBUG_CALL("tcp_fasttimo"); + + so = tcb.so_next; + if (so) + for (; so != &tcb; so = so->so_next) + if ((tp = (struct tcpcb *)so->so_tcpcb) && + (tp->t_flags & TF_DELACK)) { + tp->t_flags &= ~TF_DELACK; + tp->t_flags |= TF_ACKNOW; + tcpstat.tcps_delack++; + (void) tcp_output(tp); + } +} + +/* + * Tcp protocol timeout routine called every 500 ms. + * Updates the timers in all active tcb's and + * causes finite state machine actions if timers expire. + */ +void +tcp_slowtimo() +{ + register struct socket *ip, *ipnxt; + register struct tcpcb *tp; + register int i; + + DEBUG_CALL("tcp_slowtimo"); + + tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl; + /* + * Search through tcb's and update active timers. + */ + ip = tcb.so_next; + if (ip == 0) + return; + for (; ip != &tcb; ip = ipnxt) { + ipnxt = ip->so_next; + tp = sototcpcb(ip); + if (tp == 0) + continue; + for (i = 0; i < TCPT_NTIMERS; i++) { + if (tp->t_timer[i] && --tp->t_timer[i] == 0) { + tcp_timers(tp,i); + if (ipnxt->so_prev != ip) + goto tpgone; + } + } + tp->t_idle++; + if (tp->t_rtt) + tp->t_rtt++; +tpgone: + ; + } + tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */ +#ifdef TCP_COMPAT_42 + if ((int)tcp_iss < 0) + tcp_iss = 0; /* XXX */ +#endif + tcp_now++; /* for timestamps */ +} + +/* + * Cancel all timers for TCP tp. + */ +void +tcp_canceltimers(tp) + struct tcpcb *tp; +{ + register int i; + + for (i = 0; i < TCPT_NTIMERS; i++) + tp->t_timer[i] = 0; +} + +int tcp_backoff[TCP_MAXRXTSHIFT + 1] = + { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; + +/* + * TCP timer processing. + */ +struct tcpcb * +tcp_timers(tp, timer) + register struct tcpcb *tp; + int timer; +{ + register int rexmt; + + DEBUG_CALL("tcp_timers"); + + switch (timer) { + + /* + * 2 MSL timeout in shutdown went off. If we're closed but + * still waiting for peer to close and connection has been idle + * too long, or if 2MSL time is up from TIME_WAIT, delete connection + * control block. Otherwise, check again in a bit. + */ + case TCPT_2MSL: + if (tp->t_state != TCPS_TIME_WAIT && + tp->t_idle <= tcp_maxidle) + tp->t_timer[TCPT_2MSL] = tcp_keepintvl; + else + tp = tcp_close(tp); + break; + + /* + * Retransmission timer went off. Message has not + * been acked within retransmit interval. Back off + * to a longer retransmit interval and retransmit one segment. + */ + case TCPT_REXMT: + + /* + * XXXXX If a packet has timed out, then remove all the queued + * packets for that session. + */ + + if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { + /* + * This is a hack to suit our terminal server here at the uni of canberra + * since they have trouble with zeroes... It usually lets them through + * unharmed, but under some conditions, it'll eat the zeros. If we + * keep retransmitting it, it'll keep eating the zeroes, so we keep + * retransmitting, and eventually the connection dies... + * (this only happens on incoming data) + * + * So, if we were gonna drop the connection from too many retransmits, + * don't... instead halve the t_maxseg, which might break up the NULLs and + * let them through + * + * *sigh* + */ + + tp->t_maxseg >>= 1; + if (tp->t_maxseg < 32) { + /* + * We tried our best, now the connection must die! + */ + tp->t_rxtshift = TCP_MAXRXTSHIFT; + tcpstat.tcps_timeoutdrop++; + tp = tcp_drop(tp, tp->t_softerror); + /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ + return (tp); /* XXX */ + } + + /* + * Set rxtshift to 6, which is still at the maximum + * backoff time + */ + tp->t_rxtshift = 6; + } + tcpstat.tcps_rexmttimeo++; + rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; + TCPT_RANGESET(tp->t_rxtcur, rexmt, + (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */ + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + /* + * If losing, let the lower level know and try for + * a better route. Also, if we backed off this far, + * our srtt estimate is probably bogus. Clobber it + * so we'll take the next rtt measurement as our srtt; + * move the current srtt into rttvar to keep the current + * retransmit times until then. + */ + if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { +/* in_losing(tp->t_inpcb); */ + tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); + tp->t_srtt = 0; + } + tp->snd_nxt = tp->snd_una; + /* + * If timing a segment in this window, stop the timer. + */ + tp->t_rtt = 0; + /* + * Close the congestion window down to one segment + * (we'll open it by one segment for each ack we get). + * Since we probably have a window's worth of unacked + * data accumulated, this "slow start" keeps us from + * dumping all that data as back-to-back packets (which + * might overwhelm an intermediate gateway). + * + * There are two phases to the opening: Initially we + * open by one mss on each ack. This makes the window + * size increase exponentially with time. If the + * window is larger than the path can handle, this + * exponential growth results in dropped packet(s) + * almost immediately. To get more time between + * drops but still "push" the network to take advantage + * of improving conditions, we switch from exponential + * to linear window opening at some threshold size. + * For a threshold, we use half the current window + * size, truncated to a multiple of the mss. + * + * (the minimum cwnd that will give us exponential + * growth is 2 mss. We don't allow the threshold + * to go below this.) + */ + { + u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; + if (win < 2) + win = 2; + tp->snd_cwnd = tp->t_maxseg; + tp->snd_ssthresh = win * tp->t_maxseg; + tp->t_dupacks = 0; + } + (void) tcp_output(tp); + break; + + /* + * Persistence timer into zero window. + * Force a byte to be output, if possible. + */ + case TCPT_PERSIST: + tcpstat.tcps_persisttimeo++; + tcp_setpersist(tp); + tp->t_force = 1; + (void) tcp_output(tp); + tp->t_force = 0; + break; + + /* + * Keep-alive timer went off; send something + * or drop connection if idle for too long. + */ + case TCPT_KEEP: + tcpstat.tcps_keeptimeo++; + if (tp->t_state < TCPS_ESTABLISHED) + goto dropit; + +/* if (tp->t_socket->so_options & SO_KEEPALIVE && */ + if ((so_options) && tp->t_state <= TCPS_CLOSE_WAIT) { + if (tp->t_idle >= tcp_keepidle + tcp_maxidle) + goto dropit; + /* + * Send a packet designed to force a response + * if the peer is up and reachable: + * either an ACK if the connection is still alive, + * or an RST if the peer has closed the connection + * due to timeout or reboot. + * Using sequence number tp->snd_una-1 + * causes the transmitted zero-length segment + * to lie outside the receive window; + * by the protocol spec, this requires the + * correspondent TCP to respond. + */ + tcpstat.tcps_keepprobe++; +#ifdef TCP_COMPAT_42 + /* + * The keepalive packet must have nonzero length + * to get a 4.2 host to respond. + */ + tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, + tp->rcv_nxt - 1, tp->snd_una - 1, 0); +#else + tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, + tp->rcv_nxt, tp->snd_una - 1, 0); +#endif + tp->t_timer[TCPT_KEEP] = tcp_keepintvl; + } else + tp->t_timer[TCPT_KEEP] = tcp_keepidle; + break; + + dropit: + tcpstat.tcps_keepdrops++; + tp = tcp_drop(tp, 0); /* ETIMEDOUT); */ + break; + } + + return (tp); +} diff --git a/slirp/tcp_timer.h b/slirp/tcp_timer.h new file mode 100644 index 000000000..59933bc1b --- /dev/null +++ b/slirp/tcp_timer.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 + * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp + */ + +#ifndef _TCP_TIMER_H_ +#define _TCP_TIMER_H_ + +/* + * Definitions of the TCP timers. These timers are counted + * down PR_SLOWHZ times a second. + */ +#define TCPT_NTIMERS 4 + +#define TCPT_REXMT 0 /* retransmit */ +#define TCPT_PERSIST 1 /* retransmit persistence */ +#define TCPT_KEEP 2 /* keep alive */ +#define TCPT_2MSL 3 /* 2*msl quiet time timer */ + +/* + * The TCPT_REXMT timer is used to force retransmissions. + * The TCP has the TCPT_REXMT timer set whenever segments + * have been sent for which ACKs are expected but not yet + * received. If an ACK is received which advances tp->snd_una, + * then the retransmit timer is cleared (if there are no more + * outstanding segments) or reset to the base value (if there + * are more ACKs expected). Whenever the retransmit timer goes off, + * we retransmit one unacknowledged segment, and do a backoff + * on the retransmit timer. + * + * The TCPT_PERSIST timer is used to keep window size information + * flowing even if the window goes shut. If all previous transmissions + * have been acknowledged (so that there are no retransmissions in progress), + * and the window is too small to bother sending anything, then we start + * the TCPT_PERSIST timer. When it expires, if the window is nonzero, + * we go to transmit state. Otherwise, at intervals send a single byte + * into the peer's window to force him to update our window information. + * We do this at most as often as TCPT_PERSMIN time intervals, + * but no more frequently than the current estimate of round-trip + * packet time. The TCPT_PERSIST timer is cleared whenever we receive + * a window update from the peer. + * + * The TCPT_KEEP timer is used to keep connections alive. If an + * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, + * but not yet established, then we drop the connection. Once the connection + * is established, if the connection is idle for TCPTV_KEEP_IDLE time + * (and keepalives have been enabled on the socket), we begin to probe + * the connection. We force the peer to send us a segment by sending: + * <SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK> + * This segment is (deliberately) outside the window, and should elicit + * an ack segment in response from the peer. If, despite the TCPT_KEEP + * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE + * amount of time probing, then we drop the connection. + */ + +/* + * Time constants. + */ +#define TCPTV_MSL ( 5*PR_SLOWHZ) /* max seg lifetime (hah!) */ + +#define TCPTV_SRTTBASE 0 /* base roundtrip time; + if 0, no idea yet */ +#define TCPTV_SRTTDFLT ( 3*PR_SLOWHZ) /* assumed RTT if no info */ + +#define TCPTV_PERSMIN ( 5*PR_SLOWHZ) /* retransmit persistence */ +#define TCPTV_PERSMAX ( 60*PR_SLOWHZ) /* maximum persist interval */ + +#define TCPTV_KEEP_INIT ( 75*PR_SLOWHZ) /* initial connect keep alive */ +#define TCPTV_KEEP_IDLE (120*60*PR_SLOWHZ) /* dflt time before probing */ +#define TCPTV_KEEPINTVL ( 75*PR_SLOWHZ) /* default probe interval */ +#define TCPTV_KEEPCNT 8 /* max probes before drop */ + +#define TCPTV_MIN ( 1*PR_SLOWHZ) /* minimum allowable value */ +/* #define TCPTV_REXMTMAX ( 64*PR_SLOWHZ) */ /* max allowable REXMT value */ +#define TCPTV_REXMTMAX ( 12*PR_SLOWHZ) /* max allowable REXMT value */ + +#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ + +#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ + + +#ifdef TCPTIMERS +char *tcptimers[] = + { "REXMT", "PERSIST", "KEEP", "2MSL" }; +#endif + +/* + * Force a time value to be in a certain range. + */ +#define TCPT_RANGESET(tv, value, tvmin, tvmax) { \ + (tv) = (value); \ + if ((tv) < (tvmin)) \ + (tv) = (tvmin); \ + else if ((tv) > (tvmax)) \ + (tv) = (tvmax); \ +} + +extern int tcp_keepidle; /* time before keepalive probes begin */ +extern int tcp_keepintvl; /* time between keepalive probes */ +extern int tcp_maxidle; /* time to drop after starting probes */ +extern int tcp_ttl; /* time to live for TCP segs */ +extern int tcp_backoff[]; + +struct tcpcb; + +void tcp_fasttimo _P((void)); +void tcp_slowtimo _P((void)); +void tcp_canceltimers _P((struct tcpcb *)); +struct tcpcb * tcp_timers _P((register struct tcpcb *, int)); + +#endif diff --git a/slirp/tcp_var.h b/slirp/tcp_var.h new file mode 100644 index 000000000..0d6cd245e --- /dev/null +++ b/slirp/tcp_var.h @@ -0,0 +1,252 @@ +/* + * Copyright (c) 1982, 1986, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 + * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp + */ + +#ifndef _TCP_VAR_H_ +#define _TCP_VAR_H_ + +#include "tcpip.h" +#include "tcp_timer.h" + +#if SIZEOF_CHAR_P == 4 + typedef struct tcpiphdr *tcpiphdrp_32; +#else + typedef u_int32_t tcpiphdrp_32; +#endif + +/* + * Tcp control block, one per tcp; fields: + */ +struct tcpcb { + tcpiphdrp_32 seg_next; /* sequencing queue */ + tcpiphdrp_32 seg_prev; + short t_state; /* state of this connection */ + short t_timer[TCPT_NTIMERS]; /* tcp timers */ + short t_rxtshift; /* log(2) of rexmt exp. backoff */ + short t_rxtcur; /* current retransmit value */ + short t_dupacks; /* consecutive dup acks recd */ + u_short t_maxseg; /* maximum segment size */ + char t_force; /* 1 if forcing out a byte */ + u_short t_flags; +#define TF_ACKNOW 0x0001 /* ack peer immediately */ +#define TF_DELACK 0x0002 /* ack, but try to delay it */ +#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ +#define TF_NOOPT 0x0008 /* don't use tcp options */ +#define TF_SENTFIN 0x0010 /* have sent FIN */ +#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ +#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ +#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ +#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ +#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ + + /* Make it static for now */ +/* struct tcpiphdr *t_template; / * skeletal packet for transmit */ + struct tcpiphdr t_template; + + struct socket *t_socket; /* back pointer to socket */ +/* + * The following fields are used as in the protocol specification. + * See RFC783, Dec. 1981, page 21. + */ +/* send sequence variables */ + tcp_seq snd_una; /* send unacknowledged */ + tcp_seq snd_nxt; /* send next */ + tcp_seq snd_up; /* send urgent pointer */ + tcp_seq snd_wl1; /* window update seg seq number */ + tcp_seq snd_wl2; /* window update seg ack number */ + tcp_seq iss; /* initial send sequence number */ + u_int32_t snd_wnd; /* send window */ +/* receive sequence variables */ + u_int32_t rcv_wnd; /* receive window */ + tcp_seq rcv_nxt; /* receive next */ + tcp_seq rcv_up; /* receive urgent pointer */ + tcp_seq irs; /* initial receive sequence number */ +/* + * Additional variables for this implementation. + */ +/* receive variables */ + tcp_seq rcv_adv; /* advertised window */ +/* retransmit variables */ + tcp_seq snd_max; /* highest sequence number sent; + * used to recognize retransmits + */ +/* congestion control (for slow start, source quench, retransmit after loss) */ + u_int32_t snd_cwnd; /* congestion-controlled window */ + u_int32_t snd_ssthresh; /* snd_cwnd size threshold for + * for slow start exponential to + * linear switch + */ +/* + * transmit timing stuff. See below for scale of srtt and rttvar. + * "Variance" is actually smoothed difference. + */ + short t_idle; /* inactivity time */ + short t_rtt; /* round trip time */ + tcp_seq t_rtseq; /* sequence number being timed */ + short t_srtt; /* smoothed round-trip time */ + short t_rttvar; /* variance in round-trip time */ + u_short t_rttmin; /* minimum rtt allowed */ + u_int32_t max_sndwnd; /* largest window peer has offered */ + +/* out-of-band data */ + char t_oobflags; /* have some */ + char t_iobc; /* input character */ +#define TCPOOB_HAVEDATA 0x01 +#define TCPOOB_HADDATA 0x02 + short t_softerror; /* possible error not yet reported */ + +/* RFC 1323 variables */ + u_char snd_scale; /* window scaling for send window */ + u_char rcv_scale; /* window scaling for recv window */ + u_char request_r_scale; /* pending window scaling */ + u_char requested_s_scale; + u_int32_t ts_recent; /* timestamp echo data */ + u_int32_t ts_recent_age; /* when last updated */ + tcp_seq last_ack_sent; + +}; + +#define sototcpcb(so) ((so)->so_tcpcb) + +/* + * The smoothed round-trip time and estimated variance + * are stored as fixed point numbers scaled by the values below. + * For convenience, these scales are also used in smoothing the average + * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). + * With these scales, srtt has 3 bits to the right of the binary point, + * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the + * binary point, and is smoothed with an ALPHA of 0.75. + */ +#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ +#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ +#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ +#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ + +/* + * The initial retransmission should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + * This macro assumes that the value of TCP_RTTVAR_SCALE + * is the same as the multiplier for rttvar. + */ +#define TCP_REXMTVAL(tp) \ + (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) + +/* XXX + * We want to avoid doing m_pullup on incoming packets but that + * means avoiding dtom on the tcp reassembly code. That in turn means + * keeping an mbuf pointer in the reassembly queue (since we might + * have a cluster). As a quick hack, the source & destination + * port numbers (which are no longer needed once we've located the + * tcpcb) are overlayed with an mbuf pointer. + */ +#if SIZEOF_CHAR_P == 4 +typedef struct mbuf *mbufp_32; +#else +typedef u_int32_t mbufp_32; +#endif +#define REASS_MBUF(ti) (*(mbufp_32 *)&((ti)->ti_t)) + +/* + * TCP statistics. + * Many of these should be kept per connection, + * but that's inconvenient at the moment. + */ +struct tcpstat { + u_long tcps_connattempt; /* connections initiated */ + u_long tcps_accepts; /* connections accepted */ + u_long tcps_connects; /* connections established */ + u_long tcps_drops; /* connections dropped */ + u_long tcps_conndrops; /* embryonic connections dropped */ + u_long tcps_closed; /* conn. closed (includes drops) */ + u_long tcps_segstimed; /* segs where we tried to get rtt */ + u_long tcps_rttupdated; /* times we succeeded */ + u_long tcps_delack; /* delayed acks sent */ + u_long tcps_timeoutdrop; /* conn. dropped in rxmt timeout */ + u_long tcps_rexmttimeo; /* retransmit timeouts */ + u_long tcps_persisttimeo; /* persist timeouts */ + u_long tcps_keeptimeo; /* keepalive timeouts */ + u_long tcps_keepprobe; /* keepalive probes sent */ + u_long tcps_keepdrops; /* connections dropped in keepalive */ + + u_long tcps_sndtotal; /* total packets sent */ + u_long tcps_sndpack; /* data packets sent */ + u_long tcps_sndbyte; /* data bytes sent */ + u_long tcps_sndrexmitpack; /* data packets retransmitted */ + u_long tcps_sndrexmitbyte; /* data bytes retransmitted */ + u_long tcps_sndacks; /* ack-only packets sent */ + u_long tcps_sndprobe; /* window probes sent */ + u_long tcps_sndurg; /* packets sent with URG only */ + u_long tcps_sndwinup; /* window update-only packets sent */ + u_long tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */ + + u_long tcps_rcvtotal; /* total packets received */ + u_long tcps_rcvpack; /* packets received in sequence */ + u_long tcps_rcvbyte; /* bytes received in sequence */ + u_long tcps_rcvbadsum; /* packets received with ccksum errs */ + u_long tcps_rcvbadoff; /* packets received with bad offset */ +/* u_long tcps_rcvshort; */ /* packets received too short */ + u_long tcps_rcvduppack; /* duplicate-only packets received */ + u_long tcps_rcvdupbyte; /* duplicate-only bytes received */ + u_long tcps_rcvpartduppack; /* packets with some duplicate data */ + u_long tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */ + u_long tcps_rcvoopack; /* out-of-order packets received */ + u_long tcps_rcvoobyte; /* out-of-order bytes received */ + u_long tcps_rcvpackafterwin; /* packets with data after window */ + u_long tcps_rcvbyteafterwin; /* bytes rcvd after window */ + u_long tcps_rcvafterclose; /* packets rcvd after "close" */ + u_long tcps_rcvwinprobe; /* rcvd window probe packets */ + u_long tcps_rcvdupack; /* rcvd duplicate acks */ + u_long tcps_rcvacktoomuch; /* rcvd acks for unsent data */ + u_long tcps_rcvackpack; /* rcvd ack packets */ + u_long tcps_rcvackbyte; /* bytes acked by rcvd acks */ + u_long tcps_rcvwinupd; /* rcvd window update packets */ +/* u_long tcps_pawsdrop; */ /* segments dropped due to PAWS */ + u_long tcps_predack; /* times hdr predict ok for acks */ + u_long tcps_preddat; /* times hdr predict ok for data pkts */ + u_long tcps_socachemiss; /* tcp_last_so misses */ + u_long tcps_didnuttin; /* Times tcp_output didn't do anything XXX */ +}; + +extern struct tcpstat tcpstat; /* tcp statistics */ +extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ + +#endif diff --git a/slirp/tcpip.h b/slirp/tcpip.h new file mode 100644 index 000000000..82708b09c --- /dev/null +++ b/slirp/tcpip.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 + * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp + */ + +#ifndef _TCPIP_H_ +#define _TCPIP_H_ + +/* + * Tcp+ip header, after ip options removed. + */ +struct tcpiphdr { + struct ipovly ti_i; /* overlaid ip structure */ + struct tcphdr ti_t; /* tcp header */ +}; +#define ti_next ti_i.ih_next +#define ti_prev ti_i.ih_prev +#define ti_x1 ti_i.ih_x1 +#define ti_pr ti_i.ih_pr +#define ti_len ti_i.ih_len +#define ti_src ti_i.ih_src +#define ti_dst ti_i.ih_dst +#define ti_sport ti_t.th_sport +#define ti_dport ti_t.th_dport +#define ti_seq ti_t.th_seq +#define ti_ack ti_t.th_ack +#define ti_x2 ti_t.th_x2 +#define ti_off ti_t.th_off +#define ti_flags ti_t.th_flags +#define ti_win ti_t.th_win +#define ti_sum ti_t.th_sum +#define ti_urp ti_t.th_urp + +/* + * Just a clean way to get to the first byte + * of the packet + */ +struct tcpiphdr_2 { + struct tcpiphdr dummy; + char first_char; +}; + +#endif diff --git a/slirp/udp.c b/slirp/udp.c new file mode 100644 index 000000000..76a4fcc97 --- /dev/null +++ b/slirp/udp.c @@ -0,0 +1,654 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 + * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include <slirp.h> +#include "ip_icmp.h" + +struct udpstat udpstat; + +struct socket udb; + +/* + * UDP protocol implementation. + * Per RFC 768, August, 1980. + */ +#ifndef COMPAT_42 +int udpcksum = 1; +#else +int udpcksum = 0; /* XXX */ +#endif + +struct socket *udp_last_so = &udb; + +void +udp_init() +{ + udb.so_next = udb.so_prev = &udb; +} +/* m->m_data points at ip packet header + * m->m_len length ip packet + * ip->ip_len length data (IPDU) + */ +void +udp_input(m, iphlen) + register struct mbuf *m; + int iphlen; +{ + register struct ip *ip; + register struct udphdr *uh; +/* struct mbuf *opts = 0;*/ + int len; + struct ip save_ip; + struct socket *so; + + DEBUG_CALL("udp_input"); + DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("iphlen = %d", iphlen); + + udpstat.udps_ipackets++; + + /* + * Strip IP options, if any; should skip this, + * make available to user, and use on returned packets, + * but we don't yet have a way to check the checksum + * with options still present. + */ + if(iphlen > sizeof(struct ip)) { + ip_stripoptions(m, (struct mbuf *)0); + iphlen = sizeof(struct ip); + } + + /* + * Get IP and UDP header together in first mbuf. + */ + ip = mtod(m, struct ip *); + uh = (struct udphdr *)((caddr_t)ip + iphlen); + + /* + * Make mbuf data length reflect UDP length. + * If not enough data to reflect UDP length, drop. + */ + len = ntohs((u_int16_t)uh->uh_ulen); + + if (ip->ip_len != len) { + if (len > ip->ip_len) { + udpstat.udps_badlen++; + goto bad; + } + m_adj(m, len - ip->ip_len); + ip->ip_len = len; + } + + /* + * Save a copy of the IP header in case we want restore it + * for sending an ICMP error message in response. + */ + save_ip = *ip; + save_ip.ip_len+= iphlen; /* tcp_input subtracts this */ + + /* + * Checksum extended UDP header and data. + */ + if (udpcksum && uh->uh_sum) { + ((struct ipovly *)ip)->ih_next = 0; + ((struct ipovly *)ip)->ih_prev = 0; + ((struct ipovly *)ip)->ih_x1 = 0; + ((struct ipovly *)ip)->ih_len = uh->uh_ulen; + /* keep uh_sum for ICMP reply + * uh->uh_sum = cksum(m, len + sizeof (struct ip)); + * if (uh->uh_sum) { + */ + if(cksum(m, len + sizeof(struct ip))) { + udpstat.udps_badsum++; + goto bad; + } + } + + /* + * handle DHCP/BOOTP + */ + if (ntohs(uh->uh_dport) == BOOTP_SERVER) { + bootp_input(m); + goto bad; + } + + /* + * Locate pcb for datagram. + */ + so = udp_last_so; + if (so->so_lport != uh->uh_sport || + so->so_laddr.s_addr != ip->ip_src.s_addr) { + struct socket *tmp; + + for (tmp = udb.so_next; tmp != &udb; tmp = tmp->so_next) { + if (tmp->so_lport == uh->uh_sport && + tmp->so_laddr.s_addr == ip->ip_src.s_addr) { + tmp->so_faddr.s_addr = ip->ip_dst.s_addr; + tmp->so_fport = uh->uh_dport; + so = tmp; + break; + } + } + if (tmp == &udb) { + so = NULL; + } else { + udpstat.udpps_pcbcachemiss++; + udp_last_so = so; + } + } + + if (so == NULL) { + /* + * If there's no socket for this packet, + * create one + */ + if ((so = socreate()) == NULL) goto bad; + if(udp_attach(so) == -1) { + DEBUG_MISC((dfd," udp_attach errno = %d-%s\n", + errno,strerror(errno))); + sofree(so); + goto bad; + } + + /* + * Setup fields + */ + /* udp_last_so = so; */ + so->so_laddr = ip->ip_src; + so->so_lport = uh->uh_sport; + so->so_faddr = ip->ip_dst; /* XXX */ + so->so_fport = uh->uh_dport; /* XXX */ + + if ((so->so_iptos = udp_tos(so)) == 0) + so->so_iptos = ip->ip_tos; + + /* + * XXXXX Here, check if it's in udpexec_list, + * and if it is, do the fork_exec() etc. + */ + } + + iphlen += sizeof(struct udphdr); + m->m_len -= iphlen; + m->m_data += iphlen; + + /* + * Now we sendto() the packet. + */ + if (so->so_emu) + udp_emu(so, m); + + if(sosendto(so,m) == -1) { + m->m_len += iphlen; + m->m_data -= iphlen; + *ip=save_ip; + DEBUG_MISC((dfd,"udp tx errno = %d-%s\n",errno,strerror(errno))); + icmp_error(m, ICMP_UNREACH,ICMP_UNREACH_NET, 0,strerror(errno)); + } + + m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ + + /* restore the orig mbuf packet */ + m->m_len += iphlen; + m->m_data -= iphlen; + *ip=save_ip; + so->so_m=m; /* ICMP backup */ + + return; +bad: + m_freem(m); + /* if (opts) m_freem(opts); */ + return; +} + +int udp_output2(struct socket *so, struct mbuf *m, + struct sockaddr_in *saddr, struct sockaddr_in *daddr, + int iptos) +{ + register struct udpiphdr *ui; + int error = 0; + + DEBUG_CALL("udp_output"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("saddr = %lx", (long)saddr->sin_addr.s_addr); + DEBUG_ARG("daddr = %lx", (long)daddr->sin_addr.s_addr); + + /* + * Adjust for header + */ + m->m_data -= sizeof(struct udpiphdr); + m->m_len += sizeof(struct udpiphdr); + + /* + * Fill in mbuf with extended UDP header + * and addresses and length put into network format. + */ + ui = mtod(m, struct udpiphdr *); + ui->ui_next = ui->ui_prev = 0; + ui->ui_x1 = 0; + ui->ui_pr = IPPROTO_UDP; + ui->ui_len = htons(m->m_len - sizeof(struct ip)); /* + sizeof (struct udphdr)); */ + /* XXXXX Check for from-one-location sockets, or from-any-location sockets */ + ui->ui_src = saddr->sin_addr; + ui->ui_dst = daddr->sin_addr; + ui->ui_sport = saddr->sin_port; + ui->ui_dport = daddr->sin_port; + ui->ui_ulen = ui->ui_len; + + /* + * Stuff checksum and output datagram. + */ + ui->ui_sum = 0; + if (udpcksum) { + if ((ui->ui_sum = cksum(m, /* sizeof (struct udpiphdr) + */ m->m_len)) == 0) + ui->ui_sum = 0xffff; + } + ((struct ip *)ui)->ip_len = m->m_len; + + ((struct ip *)ui)->ip_ttl = ip_defttl; + ((struct ip *)ui)->ip_tos = iptos; + + udpstat.udps_opackets++; + + error = ip_output(so, m); + + return (error); +} + +int udp_output(struct socket *so, struct mbuf *m, + struct sockaddr_in *addr) + +{ + struct sockaddr_in saddr, daddr; + + saddr = *addr; + if ((so->so_faddr.s_addr & htonl(0xffffff00)) == special_addr.s_addr) + saddr.sin_addr.s_addr = so->so_faddr.s_addr; + daddr.sin_addr = so->so_laddr; + daddr.sin_port = so->so_lport; + + return udp_output2(so, m, &saddr, &daddr, so->so_iptos); +} + +int +udp_attach(so) + struct socket *so; +{ + struct sockaddr_in addr; + + if((so->s = socket(AF_INET,SOCK_DGRAM,0)) != -1) { + /* + * Here, we bind() the socket. Although not really needed + * (sendto() on an unbound socket will bind it), it's done + * here so that emulation of ytalk etc. don't have to do it + */ + addr.sin_family = AF_INET; + addr.sin_port = 0; + addr.sin_addr.s_addr = INADDR_ANY; + if(bind(so->s, (struct sockaddr *)&addr, sizeof(addr))<0) { + int lasterrno=errno; + close(so->s); + so->s=-1; + errno=lasterrno; + } else { + /* success, insert in queue */ + so->so_expire = curtime + SO_EXPIRE; + insque(so,&udb); + } + } + return(so->s); +} + +void +udp_detach(so) + struct socket *so; +{ + close(so->s); + /* if (so->so_m) m_free(so->so_m); done by sofree */ + + sofree(so); +} + +struct tos_t udptos[] = { + {0, 53, IPTOS_LOWDELAY, 0}, /* DNS */ + {517, 517, IPTOS_LOWDELAY, EMU_TALK}, /* talk */ + {518, 518, IPTOS_LOWDELAY, EMU_NTALK}, /* ntalk */ + {0, 7648, IPTOS_LOWDELAY, EMU_CUSEEME}, /* Cu-Seeme */ + {0, 0, 0, 0} +}; + +u_int8_t +udp_tos(so) + struct socket *so; +{ + int i = 0; + + while(udptos[i].tos) { + if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || + (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { + so->so_emu = udptos[i].emu; + return udptos[i].tos; + } + i++; + } + + return 0; +} + +#ifdef EMULATE_TALK +#include "talkd.h" +#endif + +/* + * Here, talk/ytalk/ntalk requests must be emulated + */ +void +udp_emu(so, m) + struct socket *so; + struct mbuf *m; +{ + struct sockaddr_in addr; + int addrlen = sizeof(addr); +#ifdef EMULATE_TALK + CTL_MSG_OLD *omsg; + CTL_MSG *nmsg; + char buff[sizeof(CTL_MSG)]; + u_char type; + +struct talk_request { + struct talk_request *next; + struct socket *udp_so; + struct socket *tcp_so; +} *req; + + static struct talk_request *req_tbl = 0; + +#endif + +struct cu_header { + char dest[8]; + short family; + u_short port; + u_long addr; +} *cu_head; + + switch(so->so_emu) { + +#ifdef EMULATE_TALK + case EMU_TALK: + case EMU_NTALK: + /* + * Talk emulation. We always change the ctl_addr to get + * some answers from the daemon. When an ANNOUNCE comes, + * we send LEAVE_INVITE to the local daemons. Also when a + * DELETE comes, we send copies to the local daemons. + */ + if (getsockname(so->s, (struct sockaddr *)&addr, &addrlen) < 0) + return; + +#define IS_OLD (so->so_emu == EMU_TALK) + +#define COPY_MSG(dest, src) { dest->type = src->type; \ + dest->id_num = src->id_num; \ + dest->pid = src->pid; \ + dest->addr = src->addr; \ + dest->ctl_addr = src->ctl_addr; \ + memcpy(&dest->l_name, &src->l_name, NAME_SIZE_OLD); \ + memcpy(&dest->r_name, &src->r_name, NAME_SIZE_OLD); \ + memcpy(&dest->r_tty, &src->r_tty, TTY_SIZE); } + +#define OTOSIN(ptr, field) ((struct sockaddr_in *)&ptr->field) +/* old_sockaddr to sockaddr_in */ + + + if (IS_OLD) { /* old talk */ + omsg = mtod(m, CTL_MSG_OLD*); + nmsg = (CTL_MSG *) buff; + type = omsg->type; + OTOSIN(omsg, ctl_addr)->sin_port = addr.sin_port; + OTOSIN(omsg, ctl_addr)->sin_addr = our_addr; + strncpy(omsg->l_name, getlogin(), NAME_SIZE_OLD); + } else { /* new talk */ + omsg = (CTL_MSG_OLD *) buff; + nmsg = mtod(m, CTL_MSG *); + type = nmsg->type; + OTOSIN(nmsg, ctl_addr)->sin_port = addr.sin_port; + OTOSIN(nmsg, ctl_addr)->sin_addr = our_addr; + strncpy(nmsg->l_name, getlogin(), NAME_SIZE_OLD); + } + + if (type == LOOK_UP) + return; /* for LOOK_UP this is enough */ + + if (IS_OLD) { /* make a copy of the message */ + COPY_MSG(nmsg, omsg); + nmsg->vers = 1; + nmsg->answer = 0; + } else + COPY_MSG(omsg, nmsg); + + /* + * If if is an ANNOUNCE message, we go through the + * request table to see if a tcp port has already + * been redirected for this socket. If not, we solisten() + * a new socket and add this entry to the table. + * The port number of the tcp socket and our IP + * are put to the addr field of the message structures. + * Then a LEAVE_INVITE is sent to both local daemon + * ports, 517 and 518. This is why we have two copies + * of the message, one in old talk and one in new talk + * format. + */ + + if (type == ANNOUNCE) { + int s; + u_short temp_port; + + for(req = req_tbl; req; req = req->next) + if (so == req->udp_so) + break; /* found it */ + + if (!req) { /* no entry for so, create new */ + req = (struct talk_request *) + malloc(sizeof(struct talk_request)); + req->udp_so = so; + req->tcp_so = solisten(0, + OTOSIN(omsg, addr)->sin_addr.s_addr, + OTOSIN(omsg, addr)->sin_port, + SS_FACCEPTONCE); + req->next = req_tbl; + req_tbl = req; + } + + /* replace port number in addr field */ + addrlen = sizeof(addr); + getsockname(req->tcp_so->s, + (struct sockaddr *) &addr, + &addrlen); + OTOSIN(omsg, addr)->sin_port = addr.sin_port; + OTOSIN(omsg, addr)->sin_addr = our_addr; + OTOSIN(nmsg, addr)->sin_port = addr.sin_port; + OTOSIN(nmsg, addr)->sin_addr = our_addr; + + /* send LEAVE_INVITEs */ + temp_port = OTOSIN(omsg, ctl_addr)->sin_port; + OTOSIN(omsg, ctl_addr)->sin_port = 0; + OTOSIN(nmsg, ctl_addr)->sin_port = 0; + omsg->type = nmsg->type = LEAVE_INVITE; + + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); + addr.sin_addr = our_addr; + addr.sin_family = AF_INET; + addr.sin_port = htons(517); + sendto(s, (char *)omsg, sizeof(*omsg), 0, + (struct sockaddr *)&addr, sizeof(addr)); + addr.sin_port = htons(518); + sendto(s, (char *)nmsg, sizeof(*nmsg), 0, + (struct sockaddr *) &addr, sizeof(addr)); + close(s) ; + + omsg->type = nmsg->type = ANNOUNCE; + OTOSIN(omsg, ctl_addr)->sin_port = temp_port; + OTOSIN(nmsg, ctl_addr)->sin_port = temp_port; + } + + /* + * If it is a DELETE message, we send a copy to the + * local daemons. Then we delete the entry corresponding + * to our socket from the request table. + */ + + if (type == DELETE) { + struct talk_request *temp_req, *req_next; + int s; + u_short temp_port; + + temp_port = OTOSIN(omsg, ctl_addr)->sin_port; + OTOSIN(omsg, ctl_addr)->sin_port = 0; + OTOSIN(nmsg, ctl_addr)->sin_port = 0; + + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); + addr.sin_addr = our_addr; + addr.sin_family = AF_INET; + addr.sin_port = htons(517); + sendto(s, (char *)omsg, sizeof(*omsg), 0, + (struct sockaddr *)&addr, sizeof(addr)); + addr.sin_port = htons(518); + sendto(s, (char *)nmsg, sizeof(*nmsg), 0, + (struct sockaddr *)&addr, sizeof(addr)); + close(s); + + OTOSIN(omsg, ctl_addr)->sin_port = temp_port; + OTOSIN(nmsg, ctl_addr)->sin_port = temp_port; + + /* delete table entry */ + if (so == req_tbl->udp_so) { + temp_req = req_tbl; + req_tbl = req_tbl->next; + free(temp_req); + } else { + temp_req = req_tbl; + for(req = req_tbl->next; req; req = req_next) { + req_next = req->next; + if (so == req->udp_so) { + temp_req->next = req_next; + free(req); + break; + } else { + temp_req = req; + } + } + } + } + + return; +#endif + + case EMU_CUSEEME: + + /* + * Cu-SeeMe emulation. + * Hopefully the packet is more that 16 bytes long. We don't + * do any other tests, just replace the address and port + * fields. + */ + if (m->m_len >= sizeof (*cu_head)) { + if (getsockname(so->s, (struct sockaddr *)&addr, &addrlen) < 0) + return; + cu_head = mtod(m, struct cu_header *); + cu_head->port = addr.sin_port; + cu_head->addr = (u_long) our_addr.s_addr; + } + + return; + } +} + +struct socket * +udp_listen(port, laddr, lport, flags) + u_int port; + u_int32_t laddr; + u_int lport; + int flags; +{ + struct sockaddr_in addr; + struct socket *so; + int addrlen = sizeof(struct sockaddr_in), opt = 1; + + if ((so = socreate()) == NULL) { + free(so); + return NULL; + } + so->s = socket(AF_INET,SOCK_DGRAM,0); + so->so_expire = curtime + SO_EXPIRE; + insque(so,&udb); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = INADDR_ANY; + addr.sin_port = port; + + if (bind(so->s,(struct sockaddr *)&addr, addrlen) < 0) { + udp_detach(so); + return NULL; + } + setsockopt(so->s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int)); +/* setsockopt(so->s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int)); */ + + getsockname(so->s,(struct sockaddr *)&addr,&addrlen); + so->so_fport = addr.sin_port; + if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr) + so->so_faddr = our_addr; + else + so->so_faddr = addr.sin_addr; + + so->so_lport = lport; + so->so_laddr.s_addr = laddr; + if (flags != SS_FACCEPTONCE) + so->so_expire = 0; + + so->so_state = SS_ISFCONNECTED; + + return so; +} diff --git a/slirp/udp.h b/slirp/udp.h new file mode 100644 index 000000000..195b1bfff --- /dev/null +++ b/slirp/udp.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp.h 8.1 (Berkeley) 6/10/93 + * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp + */ + +#ifndef _UDP_H_ +#define _UDP_H_ + +#define UDP_TTL 0x60 +#define UDP_UDPDATALEN 16192 + +extern struct socket *udp_last_so; + +/* + * Udp protocol header. + * Per RFC 768, September, 1981. + */ +struct udphdr { + u_int16_t uh_sport; /* source port */ + u_int16_t uh_dport; /* destination port */ + int16_t uh_ulen; /* udp length */ + u_int16_t uh_sum; /* udp checksum */ +}; + +/* + * UDP kernel structures and variables. + */ +struct udpiphdr { + struct ipovly ui_i; /* overlaid ip structure */ + struct udphdr ui_u; /* udp header */ +}; +#define ui_next ui_i.ih_next +#define ui_prev ui_i.ih_prev +#define ui_x1 ui_i.ih_x1 +#define ui_pr ui_i.ih_pr +#define ui_len ui_i.ih_len +#define ui_src ui_i.ih_src +#define ui_dst ui_i.ih_dst +#define ui_sport ui_u.uh_sport +#define ui_dport ui_u.uh_dport +#define ui_ulen ui_u.uh_ulen +#define ui_sum ui_u.uh_sum + +struct udpstat { + /* input statistics: */ + u_long udps_ipackets; /* total input packets */ + u_long udps_hdrops; /* packet shorter than header */ + u_long udps_badsum; /* checksum error */ + u_long udps_badlen; /* data length larger than packet */ + u_long udps_noport; /* no socket on port */ + u_long udps_noportbcast; /* of above, arrived as broadcast */ + u_long udps_fullsock; /* not delivered, input socket full */ + u_long udpps_pcbcachemiss; /* input packets missing pcb cache */ + /* output statistics: */ + u_long udps_opackets; /* total output packets */ +}; + +/* + * Names for UDP sysctl objects + */ +#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ +#define UDPCTL_MAXID 2 + +extern struct udpstat udpstat; +extern struct socket udb; + +void udp_init _P((void)); +void udp_input _P((register struct mbuf *, int)); +int udp_output _P((struct socket *, struct mbuf *, struct sockaddr_in *)); +int udp_attach _P((struct socket *)); +void udp_detach _P((struct socket *)); +u_int8_t udp_tos _P((struct socket *)); +void udp_emu _P((struct socket *, struct mbuf *)); +struct socket * udp_listen _P((u_int, u_int32_t, u_int, int)); +int udp_output2(struct socket *so, struct mbuf *m, + struct sockaddr_in *saddr, struct sockaddr_in *daddr, + int iptos); +#endif |