4a6ee25c7e
netprio kept track of the highest prioidx allocated and resized priomaps accordingly when necessary. This makes it necessary to keep track of prioidx allocation and may end up resizing on every new prioidx. Update extend_netdev_table() such that it takes @target_idx which the priomap should be able to accomodate. If the priomap is large enough, nothing happens; otherwise, the size is doubled until @target_idx can be accomodated. This makes max_prioidx and write_update_netdev_table() unnecessary. write_priomap() now calls extend_netdev_table() directly. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Neil Horman <nhorman@tuxdriver.com> Tested-and-Acked-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Acked-by: David S. Miller <davem@davemloft.net>
333 lines
7.5 KiB
C
333 lines
7.5 KiB
C
/*
|
|
* net/core/netprio_cgroup.c Priority Control Group
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
* Authors: Neil Horman <nhorman@tuxdriver.com>
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/types.h>
|
|
#include <linux/string.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/cgroup.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/atomic.h>
|
|
#include <net/rtnetlink.h>
|
|
#include <net/pkt_cls.h>
|
|
#include <net/sock.h>
|
|
#include <net/netprio_cgroup.h>
|
|
|
|
#include <linux/fdtable.h>
|
|
|
|
#define PRIOMAP_MIN_SZ 128
|
|
#define PRIOIDX_SZ 128
|
|
|
|
static unsigned long prioidx_map[PRIOIDX_SZ];
|
|
static DEFINE_SPINLOCK(prioidx_map_lock);
|
|
|
|
static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
|
|
{
|
|
return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
|
|
struct cgroup_netprio_state, css);
|
|
}
|
|
|
|
static int get_prioidx(u32 *prio)
|
|
{
|
|
unsigned long flags;
|
|
u32 prioidx;
|
|
|
|
spin_lock_irqsave(&prioidx_map_lock, flags);
|
|
prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
|
|
if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) {
|
|
spin_unlock_irqrestore(&prioidx_map_lock, flags);
|
|
return -ENOSPC;
|
|
}
|
|
set_bit(prioidx, prioidx_map);
|
|
spin_unlock_irqrestore(&prioidx_map_lock, flags);
|
|
*prio = prioidx;
|
|
return 0;
|
|
}
|
|
|
|
static void put_prioidx(u32 idx)
|
|
{
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&prioidx_map_lock, flags);
|
|
clear_bit(idx, prioidx_map);
|
|
spin_unlock_irqrestore(&prioidx_map_lock, flags);
|
|
}
|
|
|
|
/*
|
|
* Extend @dev->priomap so that it's large enough to accomodate
|
|
* @target_idx. @dev->priomap.priomap_len > @target_idx after successful
|
|
* return. Must be called under rtnl lock.
|
|
*/
|
|
static int extend_netdev_table(struct net_device *dev, u32 target_idx)
|
|
{
|
|
struct netprio_map *old, *new;
|
|
size_t new_sz, new_len;
|
|
|
|
/* is the existing priomap large enough? */
|
|
old = rtnl_dereference(dev->priomap);
|
|
if (old && old->priomap_len > target_idx)
|
|
return 0;
|
|
|
|
/*
|
|
* Determine the new size. Let's keep it power-of-two. We start
|
|
* from PRIOMAP_MIN_SZ and double it until it's large enough to
|
|
* accommodate @target_idx.
|
|
*/
|
|
new_sz = PRIOMAP_MIN_SZ;
|
|
while (true) {
|
|
new_len = (new_sz - offsetof(struct netprio_map, priomap)) /
|
|
sizeof(new->priomap[0]);
|
|
if (new_len > target_idx)
|
|
break;
|
|
new_sz *= 2;
|
|
/* overflowed? */
|
|
if (WARN_ON(new_sz < PRIOMAP_MIN_SZ))
|
|
return -ENOSPC;
|
|
}
|
|
|
|
/* allocate & copy */
|
|
new = kzalloc(new_sz, GFP_KERNEL);
|
|
if (!new) {
|
|
pr_warn("Unable to alloc new priomap!\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
if (old)
|
|
memcpy(new->priomap, old->priomap,
|
|
old->priomap_len * sizeof(old->priomap[0]));
|
|
|
|
new->priomap_len = new_len;
|
|
|
|
/* install the new priomap */
|
|
rcu_assign_pointer(dev->priomap, new);
|
|
if (old)
|
|
kfree_rcu(old, rcu);
|
|
return 0;
|
|
}
|
|
|
|
static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
|
|
{
|
|
struct cgroup_netprio_state *cs;
|
|
int ret = -EINVAL;
|
|
|
|
cs = kzalloc(sizeof(*cs), GFP_KERNEL);
|
|
if (!cs)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx)
|
|
goto out;
|
|
|
|
ret = get_prioidx(&cs->prioidx);
|
|
if (ret < 0) {
|
|
pr_warn("No space in priority index array\n");
|
|
goto out;
|
|
}
|
|
|
|
return &cs->css;
|
|
out:
|
|
kfree(cs);
|
|
return ERR_PTR(ret);
|
|
}
|
|
|
|
static void cgrp_css_free(struct cgroup *cgrp)
|
|
{
|
|
struct cgroup_netprio_state *cs;
|
|
struct net_device *dev;
|
|
struct netprio_map *map;
|
|
|
|
cs = cgrp_netprio_state(cgrp);
|
|
rtnl_lock();
|
|
for_each_netdev(&init_net, dev) {
|
|
map = rtnl_dereference(dev->priomap);
|
|
if (map && cs->prioidx < map->priomap_len)
|
|
map->priomap[cs->prioidx] = 0;
|
|
}
|
|
rtnl_unlock();
|
|
put_prioidx(cs->prioidx);
|
|
kfree(cs);
|
|
}
|
|
|
|
static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
|
|
{
|
|
return (u64)cgrp_netprio_state(cgrp)->prioidx;
|
|
}
|
|
|
|
static int read_priomap(struct cgroup *cont, struct cftype *cft,
|
|
struct cgroup_map_cb *cb)
|
|
{
|
|
struct net_device *dev;
|
|
u32 prioidx = cgrp_netprio_state(cont)->prioidx;
|
|
u32 priority;
|
|
struct netprio_map *map;
|
|
|
|
rcu_read_lock();
|
|
for_each_netdev_rcu(&init_net, dev) {
|
|
map = rcu_dereference(dev->priomap);
|
|
priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0;
|
|
cb->fill(cb, dev->name, priority);
|
|
}
|
|
rcu_read_unlock();
|
|
return 0;
|
|
}
|
|
|
|
static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
|
|
const char *buffer)
|
|
{
|
|
u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
|
|
char devname[IFNAMSIZ + 1];
|
|
struct net_device *dev;
|
|
struct netprio_map *map;
|
|
u32 prio;
|
|
int ret;
|
|
|
|
if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
|
|
return -EINVAL;
|
|
|
|
dev = dev_get_by_name(&init_net, devname);
|
|
if (!dev)
|
|
return -ENODEV;
|
|
|
|
rtnl_lock();
|
|
|
|
ret = extend_netdev_table(dev, prioidx);
|
|
if (ret)
|
|
goto out_unlock;
|
|
|
|
map = rtnl_dereference(dev->priomap);
|
|
if (map)
|
|
map->priomap[prioidx] = prio;
|
|
out_unlock:
|
|
rtnl_unlock();
|
|
dev_put(dev);
|
|
return ret;
|
|
}
|
|
|
|
static int update_netprio(const void *v, struct file *file, unsigned n)
|
|
{
|
|
int err;
|
|
struct socket *sock = sock_from_file(file, &err);
|
|
if (sock)
|
|
sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v;
|
|
return 0;
|
|
}
|
|
|
|
void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
|
|
{
|
|
struct task_struct *p;
|
|
void *v;
|
|
|
|
cgroup_taskset_for_each(p, cgrp, tset) {
|
|
task_lock(p);
|
|
v = (void *)(unsigned long)task_netprioidx(p);
|
|
iterate_fd(p->files, 0, update_netprio, v);
|
|
task_unlock(p);
|
|
}
|
|
}
|
|
|
|
static struct cftype ss_files[] = {
|
|
{
|
|
.name = "prioidx",
|
|
.read_u64 = read_prioidx,
|
|
},
|
|
{
|
|
.name = "ifpriomap",
|
|
.read_map = read_priomap,
|
|
.write_string = write_priomap,
|
|
},
|
|
{ } /* terminate */
|
|
};
|
|
|
|
struct cgroup_subsys net_prio_subsys = {
|
|
.name = "net_prio",
|
|
.css_alloc = cgrp_css_alloc,
|
|
.css_free = cgrp_css_free,
|
|
.attach = net_prio_attach,
|
|
.subsys_id = net_prio_subsys_id,
|
|
.base_cftypes = ss_files,
|
|
.module = THIS_MODULE,
|
|
|
|
/*
|
|
* net_prio has artificial limit on the number of cgroups and
|
|
* disallows nesting making it impossible to co-mount it with other
|
|
* hierarchical subsystems. Remove the artificially low PRIOIDX_SZ
|
|
* limit and properly nest configuration such that children follow
|
|
* their parents' configurations by default and are allowed to
|
|
* override and remove the following.
|
|
*/
|
|
.broken_hierarchy = true,
|
|
};
|
|
|
|
static int netprio_device_event(struct notifier_block *unused,
|
|
unsigned long event, void *ptr)
|
|
{
|
|
struct net_device *dev = ptr;
|
|
struct netprio_map *old;
|
|
|
|
/*
|
|
* Note this is called with rtnl_lock held so we have update side
|
|
* protection on our rcu assignments
|
|
*/
|
|
|
|
switch (event) {
|
|
case NETDEV_UNREGISTER:
|
|
old = rtnl_dereference(dev->priomap);
|
|
RCU_INIT_POINTER(dev->priomap, NULL);
|
|
if (old)
|
|
kfree_rcu(old, rcu);
|
|
break;
|
|
}
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
static struct notifier_block netprio_device_notifier = {
|
|
.notifier_call = netprio_device_event
|
|
};
|
|
|
|
static int __init init_cgroup_netprio(void)
|
|
{
|
|
int ret;
|
|
|
|
ret = cgroup_load_subsys(&net_prio_subsys);
|
|
if (ret)
|
|
goto out;
|
|
|
|
register_netdevice_notifier(&netprio_device_notifier);
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static void __exit exit_cgroup_netprio(void)
|
|
{
|
|
struct netprio_map *old;
|
|
struct net_device *dev;
|
|
|
|
unregister_netdevice_notifier(&netprio_device_notifier);
|
|
|
|
cgroup_unload_subsys(&net_prio_subsys);
|
|
|
|
rtnl_lock();
|
|
for_each_netdev(&init_net, dev) {
|
|
old = rtnl_dereference(dev->priomap);
|
|
RCU_INIT_POINTER(dev->priomap, NULL);
|
|
if (old)
|
|
kfree_rcu(old, rcu);
|
|
}
|
|
rtnl_unlock();
|
|
}
|
|
|
|
module_init(init_cgroup_netprio);
|
|
module_exit(exit_cgroup_netprio);
|
|
MODULE_LICENSE("GPL v2");
|