From 8181b8c1f3a69fe5abcc51cb732eb512ccd1566a Mon Sep 17 00:00:00 2001 From: Gabor Fekete Date: Wed, 8 Jun 2005 14:54:38 -0700 Subject: [PATCH 1/7] [IPV6]: Update parm.link in ip6ip6_tnl_change() Signed-off-by: Gabor Fekete Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3b1c9fa184ae..ba3b0c267f75 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -882,6 +882,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) t->parms.hop_limit = p->hop_limit; t->parms.encap_limit = p->encap_limit; t->parms.flowinfo = p->flowinfo; + t->parms.link = p->link; ip6ip6_tnl_link_config(t); return 0; } From 699a411451a32cc111410f44f172b265f6d679c8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 8 Jun 2005 14:55:42 -0700 Subject: [PATCH 2/7] [NET]: Allow controlling NAPI device weight with sysfs Simple interface to allow changing network device scheduling weight with sysfs. Please consider this for 2.6.12, since risk/impact is small. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 910eb4c05a47..e2137f3e489d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -185,6 +185,22 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len); +NETDEVICE_SHOW(weight, fmt_dec); + +static int change_weight(struct net_device *net, unsigned long new_weight) +{ + net->weight = new_weight; + return 0; +} + +static ssize_t store_weight(struct class_device *dev, const char *buf, size_t len) +{ + return netdev_store(dev, buf, len, change_weight); +} + +static CLASS_DEVICE_ATTR(weight, S_IRUGO | S_IWUSR, show_weight, + store_weight); + static struct class_device_attribute *net_class_attributes[] = { &class_device_attr_ifindex, @@ -194,6 +210,7 @@ static struct class_device_attribute *net_class_attributes[] = { &class_device_attr_features, &class_device_attr_mtu, &class_device_attr_flags, + &class_device_attr_weight, &class_device_attr_type, &class_device_attr_address, &class_device_attr_broadcast, From e3876605450979fe52a1a03e7eb78a89bf59e76a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 8 Jun 2005 14:56:01 -0700 Subject: [PATCH 3/7] [NET]: Fix sysctl net.core.dev_weight Changing the sysctl net.core.dev_weight has no effect because the weight of the backlog devices is set during initialization and never changed. This patch propagates any changes to the global value affected by sysctl to the per-cpu devices. It is done every time the packet handler function is run. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev.c b/net/core/dev.c index f15a3ffff635..ab935778ce81 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1744,6 +1744,7 @@ static int process_backlog(struct net_device *backlog_dev, int *budget) struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; + backlog_dev->weight = weight_p; for (;;) { struct sk_buff *skb; struct net_device *dev; From b824979aeccbfd997e6e5dbe75c47d586b5a2923 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 8 Jun 2005 15:10:22 -0700 Subject: [PATCH 4/7] [PKT_SCHED]: Fix typo in NET_EMATCH_STACK help text Spotted by Geert Uytterhoeven . Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/sched/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index b0941186f867..b22c9beb604d 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -405,7 +405,7 @@ config NET_EMATCH_STACK ---help--- Size of the local stack variable used while evaluating the tree of ematches. Limits the depth of the tree, i.e. the number of - encapsulated precedences. Every level requires 4 bytes of addtional + encapsulated precedences. Every level requires 4 bytes of additional stack space. config NET_EMATCH_CMP From 4890062960cbc4d3cebdbd8261a68bc85efcf5d4 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 8 Jun 2005 15:10:48 -0700 Subject: [PATCH 5/7] [PKT_SCHED]: Allow socket attributes to be matched on via meta ematch Adds meta collectors for all socket attributes that make sense to be filtered upon. Some of them are only useful for debugging but having them doesn't hurt. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/tc_ematch/tc_em_meta.h | 30 +++ net/sched/em_meta.c | 291 ++++++++++++++++++++++++--- 2 files changed, 297 insertions(+), 24 deletions(-) diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index aa6b48bb4dcd..a6b2cc530af5 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -56,6 +56,36 @@ enum TCF_META_ID_TCCLASSID, TCF_META_ID_RTCLASSID, TCF_META_ID_RTIIF, + TCF_META_ID_SK_FAMILY, + TCF_META_ID_SK_STATE, + TCF_META_ID_SK_REUSE, + TCF_META_ID_SK_BOUND_IF, + TCF_META_ID_SK_REFCNT, + TCF_META_ID_SK_SHUTDOWN, + TCF_META_ID_SK_PROTO, + TCF_META_ID_SK_TYPE, + TCF_META_ID_SK_RCVBUF, + TCF_META_ID_SK_RMEM_ALLOC, + TCF_META_ID_SK_WMEM_ALLOC, + TCF_META_ID_SK_OMEM_ALLOC, + TCF_META_ID_SK_WMEM_QUEUED, + TCF_META_ID_SK_RCV_QLEN, + TCF_META_ID_SK_SND_QLEN, + TCF_META_ID_SK_ERR_QLEN, + TCF_META_ID_SK_FORWARD_ALLOCS, + TCF_META_ID_SK_SNDBUF, + TCF_META_ID_SK_ALLOCS, + TCF_META_ID_SK_ROUTE_CAPS, + TCF_META_ID_SK_HASHENT, + TCF_META_ID_SK_LINGERTIME, + TCF_META_ID_SK_ACK_BACKLOG, + TCF_META_ID_SK_MAX_ACK_BACKLOG, + TCF_META_ID_SK_PRIO, + TCF_META_ID_SK_RCVLOWAT, + TCF_META_ID_SK_RCVTIMEO, + TCF_META_ID_SK_SNDTIMEO, + TCF_META_ID_SK_SENDMSG_OFF, + TCF_META_ID_SK_WRITE_PENDING, __TCF_META_ID_MAX }; #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index f1eeaf65cee5..ed2a46cbb67f 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -32,7 +32,7 @@ * +-----------+ +-----------+ * | | * ---> meta_ops[INT][INDEV](...) | - * | | + * | | * ----------- | * V V * +-----------+ +-----------+ @@ -70,6 +70,7 @@ #include #include #include +#include struct meta_obj { @@ -283,6 +284,214 @@ META_COLLECTOR(int_rtiif) dst->value = ((struct rtable*) skb->dst)->fl.iif; } +/************************************************************************** + * Socket Attributes + **************************************************************************/ + +#define SKIP_NONLOCAL(skb) \ + if (unlikely(skb->sk == NULL)) { \ + *err = -1; \ + return; \ + } + +META_COLLECTOR(int_sk_family) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_family; +} + +META_COLLECTOR(int_sk_state) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_state; +} + +META_COLLECTOR(int_sk_reuse) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_reuse; +} + +META_COLLECTOR(int_sk_bound_if) +{ + SKIP_NONLOCAL(skb); + /* No error if bound_dev_if is 0, legal userspace check */ + dst->value = skb->sk->sk_bound_dev_if; +} + +META_COLLECTOR(var_sk_bound_if) +{ + SKIP_NONLOCAL(skb); + + if (skb->sk->sk_bound_dev_if == 0) { + dst->value = (unsigned long) "any"; + dst->len = 3; + } else { + struct net_device *dev; + + dev = dev_get_by_index(skb->sk->sk_bound_dev_if); + *err = var_dev(dev, dst); + if (dev) + dev_put(dev); + } +} + +META_COLLECTOR(int_sk_refcnt) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_refcnt); +} + +META_COLLECTOR(int_sk_rcvbuf) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_rcvbuf; +} + +META_COLLECTOR(int_sk_shutdown) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_shutdown; +} + +META_COLLECTOR(int_sk_proto) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_protocol; +} + +META_COLLECTOR(int_sk_type) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_type; +} + +META_COLLECTOR(int_sk_rmem_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_rmem_alloc); +} + +META_COLLECTOR(int_sk_wmem_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_wmem_alloc); +} + +META_COLLECTOR(int_sk_omem_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_omem_alloc); +} + +META_COLLECTOR(int_sk_rcv_qlen) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_receive_queue.qlen; +} + +META_COLLECTOR(int_sk_snd_qlen) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_write_queue.qlen; +} + +META_COLLECTOR(int_sk_wmem_queued) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_wmem_queued; +} + +META_COLLECTOR(int_sk_fwd_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_forward_alloc; +} + +META_COLLECTOR(int_sk_sndbuf) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_sndbuf; +} + +META_COLLECTOR(int_sk_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_allocation; +} + +META_COLLECTOR(int_sk_route_caps) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_route_caps; +} + +META_COLLECTOR(int_sk_hashent) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_hashent; +} + +META_COLLECTOR(int_sk_lingertime) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_lingertime / HZ; +} + +META_COLLECTOR(int_sk_err_qlen) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_error_queue.qlen; +} + +META_COLLECTOR(int_sk_ack_bl) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_ack_backlog; +} + +META_COLLECTOR(int_sk_max_ack_bl) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_max_ack_backlog; +} + +META_COLLECTOR(int_sk_prio) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_priority; +} + +META_COLLECTOR(int_sk_rcvlowat) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_rcvlowat; +} + +META_COLLECTOR(int_sk_rcvtimeo) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_rcvtimeo / HZ; +} + +META_COLLECTOR(int_sk_sndtimeo) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_sndtimeo / HZ; +} + +META_COLLECTOR(int_sk_sendmsg_off) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_sndmsg_off; +} + +META_COLLECTOR(int_sk_write_pend) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_write_pending; +} + /************************************************************************** * Meta value collectors assignment table **************************************************************************/ @@ -293,41 +502,75 @@ struct meta_ops struct meta_value *, struct meta_obj *, int *); }; +#define META_ID(name) TCF_META_ID_##name +#define META_FUNC(name) { .get = meta_##name } + /* Meta value operations table listing all meta value collectors and * assigns them to a type and meta id. */ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { [TCF_META_TYPE_VAR] = { - [TCF_META_ID_DEV] = { .get = meta_var_dev }, - [TCF_META_ID_INDEV] = { .get = meta_var_indev }, - [TCF_META_ID_REALDEV] = { .get = meta_var_realdev } + [META_ID(DEV)] = META_FUNC(var_dev), + [META_ID(INDEV)] = META_FUNC(var_indev), + [META_ID(REALDEV)] = META_FUNC(var_realdev), + [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), }, [TCF_META_TYPE_INT] = { - [TCF_META_ID_RANDOM] = { .get = meta_int_random }, - [TCF_META_ID_LOADAVG_0] = { .get = meta_int_loadavg_0 }, - [TCF_META_ID_LOADAVG_1] = { .get = meta_int_loadavg_1 }, - [TCF_META_ID_LOADAVG_2] = { .get = meta_int_loadavg_2 }, - [TCF_META_ID_DEV] = { .get = meta_int_dev }, - [TCF_META_ID_INDEV] = { .get = meta_int_indev }, - [TCF_META_ID_REALDEV] = { .get = meta_int_realdev }, - [TCF_META_ID_PRIORITY] = { .get = meta_int_priority }, - [TCF_META_ID_PROTOCOL] = { .get = meta_int_protocol }, - [TCF_META_ID_SECURITY] = { .get = meta_int_security }, - [TCF_META_ID_PKTTYPE] = { .get = meta_int_pkttype }, - [TCF_META_ID_PKTLEN] = { .get = meta_int_pktlen }, - [TCF_META_ID_DATALEN] = { .get = meta_int_datalen }, - [TCF_META_ID_MACLEN] = { .get = meta_int_maclen }, + [META_ID(RANDOM)] = META_FUNC(int_random), + [META_ID(LOADAVG_0)] = META_FUNC(int_loadavg_0), + [META_ID(LOADAVG_1)] = META_FUNC(int_loadavg_1), + [META_ID(LOADAVG_2)] = META_FUNC(int_loadavg_2), + [META_ID(DEV)] = META_FUNC(int_dev), + [META_ID(INDEV)] = META_FUNC(int_indev), + [META_ID(REALDEV)] = META_FUNC(int_realdev), + [META_ID(PRIORITY)] = META_FUNC(int_priority), + [META_ID(PROTOCOL)] = META_FUNC(int_protocol), + [META_ID(SECURITY)] = META_FUNC(int_security), + [META_ID(PKTTYPE)] = META_FUNC(int_pkttype), + [META_ID(PKTLEN)] = META_FUNC(int_pktlen), + [META_ID(DATALEN)] = META_FUNC(int_datalen), + [META_ID(MACLEN)] = META_FUNC(int_maclen), #ifdef CONFIG_NETFILTER - [TCF_META_ID_NFMARK] = { .get = meta_int_nfmark }, + [META_ID(NFMARK)] = META_FUNC(int_nfmark), #endif - [TCF_META_ID_TCINDEX] = { .get = meta_int_tcindex }, + [META_ID(TCINDEX)] = META_FUNC(int_tcindex), #ifdef CONFIG_NET_CLS_ACT - [TCF_META_ID_TCVERDICT] = { .get = meta_int_tcverd }, - [TCF_META_ID_TCCLASSID] = { .get = meta_int_tcclassid }, + [META_ID(TCVERDICT)] = META_FUNC(int_tcverd), + [META_ID(TCCLASSID)] = META_FUNC(int_tcclassid), #endif #ifdef CONFIG_NET_CLS_ROUTE - [TCF_META_ID_RTCLASSID] = { .get = meta_int_rtclassid }, + [META_ID(RTCLASSID)] = META_FUNC(int_rtclassid), #endif - [TCF_META_ID_RTIIF] = { .get = meta_int_rtiif } + [META_ID(RTIIF)] = META_FUNC(int_rtiif), + [META_ID(SK_FAMILY)] = META_FUNC(int_sk_family), + [META_ID(SK_STATE)] = META_FUNC(int_sk_state), + [META_ID(SK_REUSE)] = META_FUNC(int_sk_reuse), + [META_ID(SK_BOUND_IF)] = META_FUNC(int_sk_bound_if), + [META_ID(SK_REFCNT)] = META_FUNC(int_sk_refcnt), + [META_ID(SK_RCVBUF)] = META_FUNC(int_sk_rcvbuf), + [META_ID(SK_SNDBUF)] = META_FUNC(int_sk_sndbuf), + [META_ID(SK_SHUTDOWN)] = META_FUNC(int_sk_shutdown), + [META_ID(SK_PROTO)] = META_FUNC(int_sk_proto), + [META_ID(SK_TYPE)] = META_FUNC(int_sk_type), + [META_ID(SK_RMEM_ALLOC)] = META_FUNC(int_sk_rmem_alloc), + [META_ID(SK_WMEM_ALLOC)] = META_FUNC(int_sk_wmem_alloc), + [META_ID(SK_OMEM_ALLOC)] = META_FUNC(int_sk_omem_alloc), + [META_ID(SK_WMEM_QUEUED)] = META_FUNC(int_sk_wmem_queued), + [META_ID(SK_RCV_QLEN)] = META_FUNC(int_sk_rcv_qlen), + [META_ID(SK_SND_QLEN)] = META_FUNC(int_sk_snd_qlen), + [META_ID(SK_ERR_QLEN)] = META_FUNC(int_sk_err_qlen), + [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc), + [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc), + [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps), + [META_ID(SK_HASHENT)] = META_FUNC(int_sk_hashent), + [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime), + [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl), + [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl), + [META_ID(SK_PRIO)] = META_FUNC(int_sk_prio), + [META_ID(SK_RCVLOWAT)] = META_FUNC(int_sk_rcvlowat), + [META_ID(SK_RCVTIMEO)] = META_FUNC(int_sk_rcvtimeo), + [META_ID(SK_SNDTIMEO)] = META_FUNC(int_sk_sndtimeo), + [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), + [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), } }; From e1e284a4bd827db2288af9536664b44590e419eb Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 8 Jun 2005 15:11:02 -0700 Subject: [PATCH 6/7] [PKT_SCHED]: Dump classification result for basic classifier Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/sched/cls_basic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 0d2d4415f334..dfb300bb6baa 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -261,6 +261,9 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh, rta = (struct rtattr *) b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + if (f->res.classid) + RTA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid); + if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 || tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto rtattr_failure; From 98e56405521b74b4826f855d45ef7859f34548ff Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 8 Jun 2005 15:11:19 -0700 Subject: [PATCH 7/7] [PKT_SCHED]: Fix numeric comparison in meta ematch This patch is brought to you by the department of applied stupidity. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/sched/em_meta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index ed2a46cbb67f..48bb23c2a35a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -639,9 +639,9 @@ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b) /* Let gcc optimize it, the unlikely is not really based on * some numbers but jump free code for mismatches seems * more logical. */ - if (unlikely(a == b)) + if (unlikely(a->value == b->value)) return 0; - else if (a < b) + else if (a->value < b->value) return -1; else return 1;