lguest: Tell Guest net not to notify us on every packet xmit
virtio_ring has the ability to suppress notifications. This prevents a guest exit for every packet, but we need to set a timer on packet receipt to re-check if there were any remaining packets. Here are the times for 1G TCP Guest->Host with different timeout settings (it matters because the TCP window doesn't grow big enough to fill the entire buffer): Timeout value Seconds Xmit/Recv/Timeout None (before) 25.3784 xmit 7750233 recv 1 2500 usec 62.5119 xmit 207020 recv 2 timeout 207020 1000 usec 34.5379 xmit 207003 recv 2 timeout 207003 750 usec 29.2305 xmit 207002 recv 1 timeout 207002 500 usec 19.1887 xmit 561141 recv 1 timeout 559657 250 usec 20.0465 xmit 214128 recv 2 timeout 214110 100 usec 19.2583 xmit 561621 recv 1 timeout 560153 (Note that these values are sensitive to the GSO patches which come later, and probably other traffic-related variables, so take with a large grain of salt). Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
parent
5dae785a82
commit
a161883a29
1 changed files with 93 additions and 13 deletions
|
@ -36,6 +36,7 @@
|
|||
#include <sched.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <signal.h>
|
||||
#include "linux/lguest_launcher.h"
|
||||
#include "linux/virtio_config.h"
|
||||
#include "linux/virtio_net.h"
|
||||
|
@ -81,6 +82,8 @@ static int waker_fd;
|
|||
static void *guest_base;
|
||||
/* The maximum guest physical address allowed, and maximum possible. */
|
||||
static unsigned long guest_limit, guest_max;
|
||||
/* The pipe for signal hander to write to. */
|
||||
static int timeoutpipe[2];
|
||||
|
||||
/* a per-cpu variable indicating whose vcpu is currently running */
|
||||
static unsigned int __thread cpu_id;
|
||||
|
@ -156,11 +159,14 @@ struct virtqueue
|
|||
/* Last available index we saw. */
|
||||
u16 last_avail_idx;
|
||||
|
||||
/* The routine to call when the Guest pings us. */
|
||||
void (*handle_output)(int fd, struct virtqueue *me);
|
||||
/* The routine to call when the Guest pings us, or timeout. */
|
||||
void (*handle_output)(int fd, struct virtqueue *me, bool timeout);
|
||||
|
||||
/* Outstanding buffers */
|
||||
unsigned int inflight;
|
||||
|
||||
/* Is this blocked awaiting a timer? */
|
||||
bool blocked;
|
||||
};
|
||||
|
||||
/* Remember the arguments to the program so we can "reboot" */
|
||||
|
@ -874,7 +880,7 @@ static bool handle_console_input(int fd, struct device *dev)
|
|||
|
||||
/* Handling output for console is simple: we just get all the output buffers
|
||||
* and write them to stdout. */
|
||||
static void handle_console_output(int fd, struct virtqueue *vq)
|
||||
static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
|
||||
{
|
||||
unsigned int head, out, in;
|
||||
int len;
|
||||
|
@ -889,6 +895,21 @@ static void handle_console_output(int fd, struct virtqueue *vq)
|
|||
}
|
||||
}
|
||||
|
||||
static void block_vq(struct virtqueue *vq)
|
||||
{
|
||||
struct itimerval itm;
|
||||
|
||||
vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
|
||||
vq->blocked = true;
|
||||
|
||||
itm.it_interval.tv_sec = 0;
|
||||
itm.it_interval.tv_usec = 0;
|
||||
itm.it_value.tv_sec = 0;
|
||||
itm.it_value.tv_usec = 500;
|
||||
|
||||
setitimer(ITIMER_REAL, &itm, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* The Network
|
||||
*
|
||||
|
@ -896,9 +917,9 @@ static void handle_console_output(int fd, struct virtqueue *vq)
|
|||
* and write them (ignoring the first element) to this device's file descriptor
|
||||
* (/dev/net/tun).
|
||||
*/
|
||||
static void handle_net_output(int fd, struct virtqueue *vq)
|
||||
static void handle_net_output(int fd, struct virtqueue *vq, bool timeout)
|
||||
{
|
||||
unsigned int head, out, in;
|
||||
unsigned int head, out, in, num = 0;
|
||||
int len;
|
||||
struct iovec iov[vq->vring.num];
|
||||
|
||||
|
@ -912,7 +933,12 @@ static void handle_net_output(int fd, struct virtqueue *vq)
|
|||
(void)convert(&iov[0], struct virtio_net_hdr);
|
||||
len = writev(vq->dev->fd, iov+1, out-1);
|
||||
add_used_and_trigger(fd, vq, head, len);
|
||||
num++;
|
||||
}
|
||||
|
||||
/* Block further kicks and set up a timer if we saw anything. */
|
||||
if (!timeout && num)
|
||||
block_vq(vq);
|
||||
}
|
||||
|
||||
/* This is where we handle a packet coming in from the tun device to our
|
||||
|
@ -967,18 +993,18 @@ static bool handle_tun_input(int fd, struct device *dev)
|
|||
/*L:215 This is the callback attached to the network and console input
|
||||
* virtqueues: it ensures we try again, in case we stopped console or net
|
||||
* delivery because Guest didn't have any buffers. */
|
||||
static void enable_fd(int fd, struct virtqueue *vq)
|
||||
static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
|
||||
{
|
||||
add_device_fd(vq->dev->fd);
|
||||
/* Tell waker to listen to it again */
|
||||
write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
|
||||
}
|
||||
|
||||
static void net_enable_fd(int fd, struct virtqueue *vq)
|
||||
static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout)
|
||||
{
|
||||
/* We don't need to know again when Guest refills receive buffer. */
|
||||
vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
|
||||
enable_fd(fd, vq);
|
||||
enable_fd(fd, vq, timeout);
|
||||
}
|
||||
|
||||
/* When the Guest tells us they updated the status field, we handle it. */
|
||||
|
@ -1047,7 +1073,7 @@ static void handle_output(int fd, unsigned long addr)
|
|||
if (strcmp(vq->dev->name, "console") != 0)
|
||||
verbose("Output to %s\n", vq->dev->name);
|
||||
if (vq->handle_output)
|
||||
vq->handle_output(fd, vq);
|
||||
vq->handle_output(fd, vq, false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1061,6 +1087,29 @@ static void handle_output(int fd, unsigned long addr)
|
|||
strnlen(from_guest_phys(addr), guest_limit - addr));
|
||||
}
|
||||
|
||||
static void handle_timeout(int fd)
|
||||
{
|
||||
char buf[32];
|
||||
struct device *i;
|
||||
struct virtqueue *vq;
|
||||
|
||||
/* Clear the pipe */
|
||||
read(timeoutpipe[0], buf, sizeof(buf));
|
||||
|
||||
/* Check each device and virtqueue: flush blocked ones. */
|
||||
for (i = devices.dev; i; i = i->next) {
|
||||
for (vq = i->vq; vq; vq = vq->next) {
|
||||
if (!vq->blocked)
|
||||
continue;
|
||||
|
||||
vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
|
||||
vq->blocked = false;
|
||||
if (vq->handle_output)
|
||||
vq->handle_output(fd, vq, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* This is called when the Waker wakes us up: check for incoming file
|
||||
* descriptors. */
|
||||
static void handle_input(int fd)
|
||||
|
@ -1071,9 +1120,14 @@ static void handle_input(int fd)
|
|||
for (;;) {
|
||||
struct device *i;
|
||||
fd_set fds = devices.infds;
|
||||
int num;
|
||||
|
||||
num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
|
||||
/* Could get interrupted */
|
||||
if (num < 0)
|
||||
continue;
|
||||
/* If nothing is ready, we're done. */
|
||||
if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0)
|
||||
if (num == 0)
|
||||
break;
|
||||
|
||||
/* Otherwise, call the device(s) which have readable file
|
||||
|
@ -1097,6 +1151,10 @@ static void handle_input(int fd)
|
|||
write(waker_fd, &dev_fd, sizeof(dev_fd));
|
||||
}
|
||||
}
|
||||
|
||||
/* Is this the timeout fd? */
|
||||
if (FD_ISSET(timeoutpipe[0], &fds))
|
||||
handle_timeout(fd);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1145,7 +1203,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
|
|||
/* Each device descriptor is followed by the description of its virtqueues. We
|
||||
* specify how many descriptors the virtqueue is to have. */
|
||||
static void add_virtqueue(struct device *dev, unsigned int num_descs,
|
||||
void (*handle_output)(int fd, struct virtqueue *me))
|
||||
void (*handle_output)(int, struct virtqueue *, bool))
|
||||
{
|
||||
unsigned int pages;
|
||||
struct virtqueue **i, *vq = malloc(sizeof(*vq));
|
||||
|
@ -1161,6 +1219,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
|
|||
vq->last_avail_idx = 0;
|
||||
vq->dev = dev;
|
||||
vq->inflight = 0;
|
||||
vq->blocked = false;
|
||||
|
||||
/* Initialize the configuration. */
|
||||
vq->config.num = num_descs;
|
||||
|
@ -1293,6 +1352,24 @@ static void setup_console(void)
|
|||
}
|
||||
/*:*/
|
||||
|
||||
static void timeout_alarm(int sig)
|
||||
{
|
||||
write(timeoutpipe[1], "", 1);
|
||||
}
|
||||
|
||||
static void setup_timeout(void)
|
||||
{
|
||||
if (pipe(timeoutpipe) != 0)
|
||||
err(1, "Creating timeout pipe");
|
||||
|
||||
if (fcntl(timeoutpipe[1], F_SETFL,
|
||||
fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
|
||||
err(1, "Making timeout pipe nonblocking");
|
||||
|
||||
add_device_fd(timeoutpipe[0]);
|
||||
signal(SIGALRM, timeout_alarm);
|
||||
}
|
||||
|
||||
/*M:010 Inter-guest networking is an interesting area. Simplest is to have a
|
||||
* --sharenet=<name> option which opens or creates a named pipe. This can be
|
||||
* used to send packets to another guest in a 1:1 manner.
|
||||
|
@ -1653,7 +1730,7 @@ static bool handle_io_finish(int fd, struct device *dev)
|
|||
}
|
||||
|
||||
/* When the Guest submits some I/O, we just need to wake the I/O thread. */
|
||||
static void handle_virtblk_output(int fd, struct virtqueue *vq)
|
||||
static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout)
|
||||
{
|
||||
struct vblk_info *vblk = vq->dev->priv;
|
||||
char c = 0;
|
||||
|
@ -1824,7 +1901,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
|
|||
/* ERESTART means that we need to reboot the guest */
|
||||
} else if (errno == ERESTART) {
|
||||
restart_guest();
|
||||
/* EAGAIN means the Waker wanted us to look at some input.
|
||||
/* EAGAIN means a signal (timeout).
|
||||
* Anything else means a bug or incompatible change. */
|
||||
} else if (errno != EAGAIN)
|
||||
err(1, "Running guest failed");
|
||||
|
@ -1948,6 +2025,9 @@ int main(int argc, char *argv[])
|
|||
/* We always have a console device */
|
||||
setup_console();
|
||||
|
||||
/* We can timeout waiting for Guest network transmit. */
|
||||
setup_timeout();
|
||||
|
||||
/* Now we load the kernel */
|
||||
start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
|
||||
|
||||
|
|
Loading…
Reference in a new issue