summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '0050-tools-xenstore-limit-outstanding-requests.patch')
-rw-r--r--0050-tools-xenstore-limit-outstanding-requests.patch453
1 files changed, 453 insertions, 0 deletions
diff --git a/0050-tools-xenstore-limit-outstanding-requests.patch b/0050-tools-xenstore-limit-outstanding-requests.patch
new file mode 100644
index 0000000..bb10180
--- /dev/null
+++ b/0050-tools-xenstore-limit-outstanding-requests.patch
@@ -0,0 +1,453 @@
+From 49344fb86ff040bae1107e236592c2d4dc4607f3 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:08 +0200
+Subject: [PATCH 50/87] tools/xenstore: limit outstanding requests
+
+Add another quota for limiting the number of outstanding requests of a
+guest. As the way to specify quotas on the command line is becoming
+rather nasty, switch to a new scheme using [--quota|-Q] <what>=<val>
+allowing to add more quotas in future easily.
+
+Set the default value to 20 (basically a random value not seeming to
+be too high or too low).
+
+A request is said to be outstanding if any message generated by this
+request (the direct response plus potential watch events) is not yet
+completely stored into a ring buffer. The initial watch event sent as
+a result of registering a watch is an exception.
+
+Note that across a live update the relation to buffered watch events
+for other domains is lost.
+
+Use talloc_zero() for allocating the domain structure in order to have
+all per-domain quota zeroed initially.
+
+This is part of XSA-326 / CVE-2022-42312.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 36de433a273f55d614c83b89c9a8972287a1e475)
+---
+ tools/xenstore/xenstored_core.c | 88 +++++++++++++++++++++++++++++--
+ tools/xenstore/xenstored_core.h | 20 ++++++-
+ tools/xenstore/xenstored_domain.c | 38 ++++++++++---
+ tools/xenstore/xenstored_domain.h | 3 ++
+ tools/xenstore/xenstored_watch.c | 15 ++++--
+ 5 files changed, 150 insertions(+), 14 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 45244c021cd3..488d540f3a32 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -107,6 +107,7 @@ int quota_max_entry_size = 2048; /* 2K */
+ int quota_max_transaction = 10;
+ int quota_nb_perms_per_node = 5;
+ int quota_max_path_len = XENSTORE_REL_PATH_MAX;
++int quota_req_outstanding = 20;
+
+ unsigned int timeout_watch_event_msec = 20000;
+
+@@ -223,12 +224,24 @@ static uint64_t get_now_msec(void)
+ return now_ts.tv_sec * 1000 + now_ts.tv_nsec / 1000000;
+ }
+
++/*
++ * Remove a struct buffered_data from the list of outgoing data.
++ * A struct buffered_data related to a request having caused watch events to be
++ * sent is kept until all those events have been written out.
++ * Each watch event is referencing the related request via pend.req, while the
++ * number of watch events caused by a request is kept in pend.ref.event_cnt
++ * (those two cases are mutually exclusive, so the two fields can share memory
++ * via a union).
++ * The struct buffered_data is freed only if no related watch event is
++ * referencing it. The related return data can be freed right away.
++ */
+ static void free_buffered_data(struct buffered_data *out,
+ struct connection *conn)
+ {
+ struct buffered_data *req;
+
+ list_del(&out->list);
++ out->on_out_list = false;
+
+ /*
+ * Update conn->timeout_msec with the next found timeout value in the
+@@ -244,6 +257,30 @@ static void free_buffered_data(struct buffered_data *out,
+ }
+ }
+
++ if (out->hdr.msg.type == XS_WATCH_EVENT) {
++ req = out->pend.req;
++ if (req) {
++ req->pend.ref.event_cnt--;
++ if (!req->pend.ref.event_cnt && !req->on_out_list) {
++ if (req->on_ref_list) {
++ domain_outstanding_domid_dec(
++ req->pend.ref.domid);
++ list_del(&req->list);
++ }
++ talloc_free(req);
++ }
++ }
++ } else if (out->pend.ref.event_cnt) {
++ /* Hang out off from conn. */
++ talloc_steal(NULL, out);
++ if (out->buffer != out->default_buffer)
++ talloc_free(out->buffer);
++ list_add(&out->list, &conn->ref_list);
++ out->on_ref_list = true;
++ return;
++ } else
++ domain_outstanding_dec(conn);
++
+ talloc_free(out);
+ }
+
+@@ -405,6 +442,7 @@ int delay_request(struct connection *conn, struct buffered_data *in,
+ static int destroy_conn(void *_conn)
+ {
+ struct connection *conn = _conn;
++ struct buffered_data *req;
+
+ /* Flush outgoing if possible, but don't block. */
+ if (!conn->domain) {
+@@ -418,6 +456,11 @@ static int destroy_conn(void *_conn)
+ break;
+ close(conn->fd);
+ }
++
++ conn_free_buffered_data(conn);
++ list_for_each_entry(req, &conn->ref_list, list)
++ req->on_ref_list = false;
++
+ if (conn->target)
+ talloc_unlink(conn, conn->target);
+ list_del(&conn->list);
+@@ -893,6 +936,8 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+
+ /* Queue for later transmission. */
+ list_add_tail(&bdata->list, &conn->out_list);
++ bdata->on_out_list = true;
++ domain_outstanding_inc(conn);
+ }
+
+ /*
+@@ -900,7 +945,8 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ * As this is not directly related to the current command, errors can't be
+ * reported.
+ */
+-void send_event(struct connection *conn, const char *path, const char *token)
++void send_event(struct buffered_data *req, struct connection *conn,
++ const char *path, const char *token)
+ {
+ struct buffered_data *bdata;
+ unsigned int len;
+@@ -930,8 +976,13 @@ void send_event(struct connection *conn, const char *path, const char *token)
+ conn->timeout_msec = bdata->timeout_msec;
+ }
+
++ bdata->pend.req = req;
++ if (req)
++ req->pend.ref.event_cnt++;
++
+ /* Queue for later transmission. */
+ list_add_tail(&bdata->list, &conn->out_list);
++ bdata->on_out_list = true;
+ }
+
+ /* Some routines (write, mkdir, etc) just need a non-error return */
+@@ -1740,6 +1791,7 @@ static void handle_input(struct connection *conn)
+ return;
+ }
+ in = conn->in;
++ in->pend.ref.domid = conn->id;
+
+ /* Not finished header yet? */
+ if (in->inhdr) {
+@@ -1808,6 +1860,7 @@ struct connection *new_connection(const struct interface_funcs *funcs)
+ new->is_stalled = false;
+ new->transaction_started = 0;
+ INIT_LIST_HEAD(&new->out_list);
++ INIT_LIST_HEAD(&new->ref_list);
+ INIT_LIST_HEAD(&new->watches);
+ INIT_LIST_HEAD(&new->transaction_list);
+ INIT_LIST_HEAD(&new->delayed);
+@@ -2286,6 +2339,9 @@ static void usage(void)
+ " -t, --transaction <nb> limit the number of transaction allowed per domain,\n"
+ " -A, --perm-nb <nb> limit the number of permissions per node,\n"
+ " -M, --path-max <chars> limit the allowed Xenstore node path length,\n"
++" -Q, --quota <what>=<nb> set the quota <what> to the value <nb>, allowed\n"
++" quotas are:\n"
++" outstanding: number of outstanding requests\n"
+ " -w, --timeout <what>=<seconds> set the timeout in seconds for <what>,\n"
+ " allowed timeout candidates are:\n"
+ " watch-event: time a watch-event is kept pending\n"
+@@ -2311,6 +2367,7 @@ static struct option options[] = {
+ { "transaction", 1, NULL, 't' },
+ { "perm-nb", 1, NULL, 'A' },
+ { "path-max", 1, NULL, 'M' },
++ { "quota", 1, NULL, 'Q' },
+ { "timeout", 1, NULL, 'w' },
+ { "no-recovery", 0, NULL, 'R' },
+ { "internal-db", 0, NULL, 'I' },
+@@ -2358,6 +2415,20 @@ static void set_timeout(const char *arg)
+ barf("unknown timeout \"%s\"\n", arg);
+ }
+
++static void set_quota(const char *arg)
++{
++ const char *eq = strchr(arg, '=');
++ int val;
++
++ if (!eq)
++ barf("quotas must be specified via <what>=<nb>\n");
++ val = get_optval_int(eq + 1);
++ if (what_matches(arg, "outstanding"))
++ quota_req_outstanding = val;
++ else
++ barf("unknown quota \"%s\"\n", arg);
++}
++
+ int main(int argc, char *argv[])
+ {
+ int opt;
+@@ -2372,8 +2443,8 @@ int main(int argc, char *argv[])
+ orig_argc = argc;
+ orig_argv = argv;
+
+- while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:T:RVW:w:U", options,
+- NULL)) != -1) {
++ while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:Q:T:RVW:w:U",
++ options, NULL)) != -1) {
+ switch (opt) {
+ case 'D':
+ no_domain_init = true;
+@@ -2422,6 +2493,9 @@ int main(int argc, char *argv[])
+ quota_max_path_len = min(XENSTORE_REL_PATH_MAX,
+ quota_max_path_len);
+ break;
++ case 'Q':
++ set_quota(optarg);
++ break;
+ case 'w':
+ set_timeout(optarg);
+ break;
+@@ -2875,6 +2949,14 @@ static void add_buffered_data(struct buffered_data *bdata,
+
+ /* Queue for later transmission. */
+ list_add_tail(&bdata->list, &conn->out_list);
++ bdata->on_out_list = true;
++ /*
++ * Watch events are never "outstanding", but the request causing them
++ * are instead kept "outstanding" until all watch events caused by that
++ * request have been delivered.
++ */
++ if (bdata->hdr.msg.type != XS_WATCH_EVENT)
++ domain_outstanding_inc(conn);
+ }
+
+ void read_state_buffered_data(const void *ctx, struct connection *conn,
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 8a81fc693f01..db09f463a657 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -56,6 +56,8 @@ struct xs_state_connection;
+ struct buffered_data
+ {
+ struct list_head list;
++ bool on_out_list;
++ bool on_ref_list;
+
+ /* Are we still doing the header? */
+ bool inhdr;
+@@ -63,6 +65,17 @@ struct buffered_data
+ /* How far are we? */
+ unsigned int used;
+
++ /* Outstanding request accounting. */
++ union {
++ /* ref is being used for requests. */
++ struct {
++ unsigned int event_cnt; /* # of outstanding events. */
++ unsigned int domid; /* domid of request. */
++ } ref;
++ /* req is being used for watch events. */
++ struct buffered_data *req; /* request causing event. */
++ } pend;
++
+ union {
+ struct xsd_sockmsg msg;
+ char raw[sizeof(struct xsd_sockmsg)];
+@@ -123,6 +136,9 @@ struct connection
+ struct list_head out_list;
+ uint64_t timeout_msec;
+
++ /* Referenced requests no longer pending. */
++ struct list_head ref_list;
++
+ /* Transaction context for current request (NULL if none). */
+ struct transaction *transaction;
+
+@@ -191,7 +207,8 @@ unsigned int get_string(const struct buffered_data *data, unsigned int offset);
+
+ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ const void *data, unsigned int len);
+-void send_event(struct connection *conn, const char *path, const char *token);
++void send_event(struct buffered_data *req, struct connection *conn,
++ const char *path, const char *token);
+
+ /* Some routines (write, mkdir, etc) just need a non-error return */
+ void send_ack(struct connection *conn, enum xsd_sockmsg_type type);
+@@ -247,6 +264,7 @@ extern int dom0_domid;
+ extern int dom0_event;
+ extern int priv_domid;
+ extern int quota_nb_entry_per_domain;
++extern int quota_req_outstanding;
+
+ extern unsigned int timeout_watch_event_msec;
+
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index 93c4c1edcdd1..850085a92c76 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -78,6 +78,9 @@ struct domain
+ /* number of watch for this domain */
+ int nbwatch;
+
++ /* Number of outstanding requests. */
++ int nboutstanding;
++
+ /* write rate limit */
+ wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */
+ struct wrl_timestampt wrl_timestamp;
+@@ -183,8 +186,12 @@ static bool domain_can_read(struct connection *conn)
+ {
+ struct xenstore_domain_interface *intf = conn->domain->interface;
+
+- if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0)
+- return false;
++ if (domain_is_unprivileged(conn)) {
++ if (conn->domain->wrl_credit < 0)
++ return false;
++ if (conn->domain->nboutstanding >= quota_req_outstanding)
++ return false;
++ }
+
+ return (intf->req_cons != intf->req_prod);
+ }
+@@ -331,7 +338,7 @@ static struct domain *alloc_domain(const void *context, unsigned int domid)
+ {
+ struct domain *domain;
+
+- domain = talloc(context, struct domain);
++ domain = talloc_zero(context, struct domain);
+ if (!domain) {
+ errno = ENOMEM;
+ return NULL;
+@@ -392,9 +399,6 @@ static int new_domain(struct domain *domain, int port, bool restore)
+ domain->conn->domain = domain;
+ domain->conn->id = domain->domid;
+
+- domain->nbentry = 0;
+- domain->nbwatch = 0;
+-
+ return 0;
+ }
+
+@@ -938,6 +942,28 @@ int domain_watch(struct connection *conn)
+ : 0;
+ }
+
++void domain_outstanding_inc(struct connection *conn)
++{
++ if (!conn || !conn->domain)
++ return;
++ conn->domain->nboutstanding++;
++}
++
++void domain_outstanding_dec(struct connection *conn)
++{
++ if (!conn || !conn->domain)
++ return;
++ conn->domain->nboutstanding--;
++}
++
++void domain_outstanding_domid_dec(unsigned int domid)
++{
++ struct domain *d = find_domain_by_domid(domid);
++
++ if (d)
++ d->nboutstanding--;
++}
++
+ static wrl_creditt wrl_config_writecost = WRL_FACTOR;
+ static wrl_creditt wrl_config_rate = WRL_RATE * WRL_FACTOR;
+ static wrl_creditt wrl_config_dburst = WRL_DBURST * WRL_FACTOR;
+diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
+index 1e929b8f8c6f..4f51b005291a 100644
+--- a/tools/xenstore/xenstored_domain.h
++++ b/tools/xenstore/xenstored_domain.h
+@@ -64,6 +64,9 @@ int domain_entry(struct connection *conn);
+ void domain_watch_inc(struct connection *conn);
+ void domain_watch_dec(struct connection *conn);
+ int domain_watch(struct connection *conn);
++void domain_outstanding_inc(struct connection *conn);
++void domain_outstanding_dec(struct connection *conn);
++void domain_outstanding_domid_dec(unsigned int domid);
+
+ /* Special node permission handling. */
+ int set_perms_special(struct connection *conn, const char *name,
+diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
+index 205d9d8ea116..0755ffa375ba 100644
+--- a/tools/xenstore/xenstored_watch.c
++++ b/tools/xenstore/xenstored_watch.c
+@@ -142,6 +142,7 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
+ struct node *node, bool exact, struct node_perms *perms)
+ {
+ struct connection *i;
++ struct buffered_data *req;
+ struct watch *watch;
+
+ /* During transactions, don't fire watches, but queue them. */
+@@ -150,6 +151,8 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
+ return;
+ }
+
++ req = domain_is_unprivileged(conn) ? conn->in : NULL;
++
+ /* Create an event for each watch. */
+ list_for_each_entry(i, &connections, list) {
+ /* introduce/release domain watches */
+@@ -164,12 +167,12 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
+ list_for_each_entry(watch, &i->watches, list) {
+ if (exact) {
+ if (streq(name, watch->node))
+- send_event(i,
++ send_event(req, i,
+ get_watch_path(watch, name),
+ watch->token);
+ } else {
+ if (is_child(name, watch->node))
+- send_event(i,
++ send_event(req, i,
+ get_watch_path(watch, name),
+ watch->token);
+ }
+@@ -269,8 +272,12 @@ int do_watch(struct connection *conn, struct buffered_data *in)
+ trace_create(watch, "watch");
+ send_ack(conn, XS_WATCH);
+
+- /* We fire once up front: simplifies clients and restart. */
+- send_event(conn, get_watch_path(watch, watch->node), watch->token);
++ /*
++ * We fire once up front: simplifies clients and restart.
++ * This event will not be linked to the XS_WATCH request.
++ */
++ send_event(NULL, conn, get_watch_path(watch, watch->node),
++ watch->token);
+
+ return 0;
+ }
+--
+2.37.4
+