netfilter: move skb_gso_segment into nfnetlink_queue module
skb_gso_segment is expensive, so it would be nice if we could avoid it in the future. However, userspace needs to be prepared to receive larger-than-mtu-packets (which will also have incorrect l3/l4 checksums), so we cannot simply remove it. The plan is to add a per-queue feature flag that userspace can set when binding the queue. The problem is that in nf_queue, we only have a queue number, not the queue context/configuration settings. This patch should have no impact other than the skb_gso_segment call now being in a function that has access to the queue config data. A new size attribute in nf_queue_entry is needed so nfnetlink_queue can duplicate the entry of the gso skb when segmenting the skb while also copying the route key. The follow up patch adds switch to disable skb_gso_segment when queue config says so. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
parent
4bd60443cc
commit
a5fedd43d5
3 changed files with 152 additions and 104 deletions
|
@ -9,10 +9,13 @@ struct nf_queue_entry {
|
||||||
|
|
||||||
struct nf_hook_ops *elem;
|
struct nf_hook_ops *elem;
|
||||||
u_int8_t pf;
|
u_int8_t pf;
|
||||||
|
u16 size; /* sizeof(entry) + saved route keys */
|
||||||
unsigned int hook;
|
unsigned int hook;
|
||||||
struct net_device *indev;
|
struct net_device *indev;
|
||||||
struct net_device *outdev;
|
struct net_device *outdev;
|
||||||
int (*okfn)(struct sk_buff *);
|
int (*okfn)(struct sk_buff *);
|
||||||
|
|
||||||
|
/* extra space to store route keys */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define nf_queue_entry_reroute(x) ((void *)x + sizeof(struct nf_queue_entry))
|
#define nf_queue_entry_reroute(x) ((void *)x + sizeof(struct nf_queue_entry))
|
||||||
|
@ -27,4 +30,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh);
|
||||||
void nf_unregister_queue_handler(void);
|
void nf_unregister_queue_handler(void);
|
||||||
extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
|
extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
|
||||||
|
|
||||||
|
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
|
||||||
|
void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
|
||||||
|
|
||||||
#endif /* _NF_QUEUE_H */
|
#endif /* _NF_QUEUE_H */
|
||||||
|
|
|
@ -45,7 +45,7 @@ void nf_unregister_queue_handler(void)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(nf_unregister_queue_handler);
|
EXPORT_SYMBOL(nf_unregister_queue_handler);
|
||||||
|
|
||||||
static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
|
void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
|
||||||
{
|
{
|
||||||
/* Release those devices we held, or Alexey will kill me. */
|
/* Release those devices we held, or Alexey will kill me. */
|
||||||
if (entry->indev)
|
if (entry->indev)
|
||||||
|
@ -65,9 +65,10 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
|
||||||
/* Drop reference to owner of hook which queued us. */
|
/* Drop reference to owner of hook which queued us. */
|
||||||
module_put(entry->elem->owner);
|
module_put(entry->elem->owner);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
|
||||||
|
|
||||||
/* Bump dev refs so they don't vanish while packet is out */
|
/* Bump dev refs so they don't vanish while packet is out */
|
||||||
static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
|
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
|
||||||
{
|
{
|
||||||
if (!try_module_get(entry->elem->owner))
|
if (!try_module_get(entry->elem->owner))
|
||||||
return false;
|
return false;
|
||||||
|
@ -92,12 +93,13 @@ static bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Any packet that leaves via this function must come back
|
* Any packet that leaves via this function must come back
|
||||||
* through nf_reinject().
|
* through nf_reinject().
|
||||||
*/
|
*/
|
||||||
static int __nf_queue(struct sk_buff *skb,
|
int nf_queue(struct sk_buff *skb,
|
||||||
struct nf_hook_ops *elem,
|
struct nf_hook_ops *elem,
|
||||||
u_int8_t pf, unsigned int hook,
|
u_int8_t pf, unsigned int hook,
|
||||||
struct net_device *indev,
|
struct net_device *indev,
|
||||||
|
@ -137,6 +139,7 @@ static int __nf_queue(struct sk_buff *skb,
|
||||||
.indev = indev,
|
.indev = indev,
|
||||||
.outdev = outdev,
|
.outdev = outdev,
|
||||||
.okfn = okfn,
|
.okfn = okfn,
|
||||||
|
.size = sizeof(*entry) + afinfo->route_key_size,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!nf_queue_entry_get_refs(entry)) {
|
if (!nf_queue_entry_get_refs(entry)) {
|
||||||
|
@ -163,87 +166,6 @@ static int __nf_queue(struct sk_buff *skb,
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_BRIDGE_NETFILTER
|
|
||||||
/* When called from bridge netfilter, skb->data must point to MAC header
|
|
||||||
* before calling skb_gso_segment(). Else, original MAC header is lost
|
|
||||||
* and segmented skbs will be sent to wrong destination.
|
|
||||||
*/
|
|
||||||
static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
|
|
||||||
{
|
|
||||||
if (skb->nf_bridge)
|
|
||||||
__skb_push(skb, skb->network_header - skb->mac_header);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
|
|
||||||
{
|
|
||||||
if (skb->nf_bridge)
|
|
||||||
__skb_pull(skb, skb->network_header - skb->mac_header);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
#define nf_bridge_adjust_skb_data(s) do {} while (0)
|
|
||||||
#define nf_bridge_adjust_segmented_data(s) do {} while (0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int nf_queue(struct sk_buff *skb,
|
|
||||||
struct nf_hook_ops *elem,
|
|
||||||
u_int8_t pf, unsigned int hook,
|
|
||||||
struct net_device *indev,
|
|
||||||
struct net_device *outdev,
|
|
||||||
int (*okfn)(struct sk_buff *),
|
|
||||||
unsigned int queuenum)
|
|
||||||
{
|
|
||||||
struct sk_buff *segs;
|
|
||||||
int err = -EINVAL;
|
|
||||||
unsigned int queued;
|
|
||||||
|
|
||||||
if (!skb_is_gso(skb))
|
|
||||||
return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
|
|
||||||
queuenum);
|
|
||||||
|
|
||||||
switch (pf) {
|
|
||||||
case NFPROTO_IPV4:
|
|
||||||
skb->protocol = htons(ETH_P_IP);
|
|
||||||
break;
|
|
||||||
case NFPROTO_IPV6:
|
|
||||||
skb->protocol = htons(ETH_P_IPV6);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
nf_bridge_adjust_skb_data(skb);
|
|
||||||
segs = skb_gso_segment(skb, 0);
|
|
||||||
/* Does not use PTR_ERR to limit the number of error codes that can be
|
|
||||||
* returned by nf_queue. For instance, callers rely on -ECANCELED to mean
|
|
||||||
* 'ignore this hook'.
|
|
||||||
*/
|
|
||||||
if (IS_ERR(segs))
|
|
||||||
goto out_err;
|
|
||||||
queued = 0;
|
|
||||||
err = 0;
|
|
||||||
do {
|
|
||||||
struct sk_buff *nskb = segs->next;
|
|
||||||
|
|
||||||
segs->next = NULL;
|
|
||||||
if (err == 0) {
|
|
||||||
nf_bridge_adjust_segmented_data(segs);
|
|
||||||
err = __nf_queue(segs, elem, pf, hook, indev,
|
|
||||||
outdev, okfn, queuenum);
|
|
||||||
}
|
|
||||||
if (err == 0)
|
|
||||||
queued++;
|
|
||||||
else
|
|
||||||
kfree_skb(segs);
|
|
||||||
segs = nskb;
|
|
||||||
} while (segs);
|
|
||||||
|
|
||||||
if (queued) {
|
|
||||||
kfree_skb(skb);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
out_err:
|
|
||||||
nf_bridge_adjust_segmented_data(skb);
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
|
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
|
||||||
{
|
{
|
||||||
struct sk_buff *skb = entry->skb;
|
struct sk_buff *skb = entry->skb;
|
||||||
|
@ -283,7 +205,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
break;
|
break;
|
||||||
case NF_QUEUE:
|
case NF_QUEUE:
|
||||||
err = __nf_queue(skb, elem, entry->pf, entry->hook,
|
err = nf_queue(skb, elem, entry->pf, entry->hook,
|
||||||
entry->indev, entry->outdev, entry->okfn,
|
entry->indev, entry->outdev, entry->okfn,
|
||||||
verdict >> NF_VERDICT_QBITS);
|
verdict >> NF_VERDICT_QBITS);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
|
|
|
@ -477,28 +477,13 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
|
__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
|
||||||
|
struct nf_queue_entry *entry)
|
||||||
{
|
{
|
||||||
struct sk_buff *nskb;
|
struct sk_buff *nskb;
|
||||||
struct nfqnl_instance *queue;
|
|
||||||
int err = -ENOBUFS;
|
int err = -ENOBUFS;
|
||||||
__be32 *packet_id_ptr;
|
__be32 *packet_id_ptr;
|
||||||
int failopen = 0;
|
int failopen = 0;
|
||||||
struct net *net = dev_net(entry->indev ?
|
|
||||||
entry->indev : entry->outdev);
|
|
||||||
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
|
|
||||||
|
|
||||||
/* rcu_read_lock()ed by nf_hook_slow() */
|
|
||||||
queue = instance_lookup(q, queuenum);
|
|
||||||
if (!queue) {
|
|
||||||
err = -ESRCH;
|
|
||||||
goto err_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (queue->copy_mode == NFQNL_COPY_NONE) {
|
|
||||||
err = -EINVAL;
|
|
||||||
goto err_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
|
nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
|
||||||
if (nskb == NULL) {
|
if (nskb == NULL) {
|
||||||
|
@ -547,6 +532,141 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct nf_queue_entry *
|
||||||
|
nf_queue_entry_dup(struct nf_queue_entry *e)
|
||||||
|
{
|
||||||
|
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
|
||||||
|
if (entry) {
|
||||||
|
if (nf_queue_entry_get_refs(entry))
|
||||||
|
return entry;
|
||||||
|
kfree(entry);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_BRIDGE_NETFILTER
|
||||||
|
/* When called from bridge netfilter, skb->data must point to MAC header
|
||||||
|
* before calling skb_gso_segment(). Else, original MAC header is lost
|
||||||
|
* and segmented skbs will be sent to wrong destination.
|
||||||
|
*/
|
||||||
|
static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
if (skb->nf_bridge)
|
||||||
|
__skb_push(skb, skb->network_header - skb->mac_header);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
if (skb->nf_bridge)
|
||||||
|
__skb_pull(skb, skb->network_header - skb->mac_header);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define nf_bridge_adjust_skb_data(s) do {} while (0)
|
||||||
|
#define nf_bridge_adjust_segmented_data(s) do {} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void free_entry(struct nf_queue_entry *entry)
|
||||||
|
{
|
||||||
|
nf_queue_entry_release_refs(entry);
|
||||||
|
kfree(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
__nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
|
||||||
|
struct sk_buff *skb, struct nf_queue_entry *entry)
|
||||||
|
{
|
||||||
|
int ret = -ENOMEM;
|
||||||
|
struct nf_queue_entry *entry_seg;
|
||||||
|
|
||||||
|
nf_bridge_adjust_segmented_data(skb);
|
||||||
|
|
||||||
|
if (skb->next == NULL) { /* last packet, no need to copy entry */
|
||||||
|
struct sk_buff *gso_skb = entry->skb;
|
||||||
|
entry->skb = skb;
|
||||||
|
ret = __nfqnl_enqueue_packet(net, queue, entry);
|
||||||
|
if (ret)
|
||||||
|
entry->skb = gso_skb;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
skb->next = NULL;
|
||||||
|
|
||||||
|
entry_seg = nf_queue_entry_dup(entry);
|
||||||
|
if (entry_seg) {
|
||||||
|
entry_seg->skb = skb;
|
||||||
|
ret = __nfqnl_enqueue_packet(net, queue, entry_seg);
|
||||||
|
if (ret)
|
||||||
|
free_entry(entry_seg);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
|
||||||
|
{
|
||||||
|
unsigned int queued;
|
||||||
|
struct nfqnl_instance *queue;
|
||||||
|
struct sk_buff *skb, *segs;
|
||||||
|
int err = -ENOBUFS;
|
||||||
|
struct net *net = dev_net(entry->indev ?
|
||||||
|
entry->indev : entry->outdev);
|
||||||
|
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
|
||||||
|
|
||||||
|
/* rcu_read_lock()ed by nf_hook_slow() */
|
||||||
|
queue = instance_lookup(q, queuenum);
|
||||||
|
if (!queue)
|
||||||
|
return -ESRCH;
|
||||||
|
|
||||||
|
if (queue->copy_mode == NFQNL_COPY_NONE)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!skb_is_gso(entry->skb))
|
||||||
|
return __nfqnl_enqueue_packet(net, queue, entry);
|
||||||
|
|
||||||
|
skb = entry->skb;
|
||||||
|
|
||||||
|
switch (entry->pf) {
|
||||||
|
case NFPROTO_IPV4:
|
||||||
|
skb->protocol = htons(ETH_P_IP);
|
||||||
|
break;
|
||||||
|
case NFPROTO_IPV6:
|
||||||
|
skb->protocol = htons(ETH_P_IPV6);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
nf_bridge_adjust_skb_data(skb);
|
||||||
|
segs = skb_gso_segment(skb, 0);
|
||||||
|
/* Does not use PTR_ERR to limit the number of error codes that can be
|
||||||
|
* returned by nf_queue. For instance, callers rely on -ECANCELED to
|
||||||
|
* mean 'ignore this hook'.
|
||||||
|
*/
|
||||||
|
if (IS_ERR(segs))
|
||||||
|
goto out_err;
|
||||||
|
queued = 0;
|
||||||
|
err = 0;
|
||||||
|
do {
|
||||||
|
struct sk_buff *nskb = segs->next;
|
||||||
|
if (err == 0)
|
||||||
|
err = __nfqnl_enqueue_packet_gso(net, queue,
|
||||||
|
segs, entry);
|
||||||
|
if (err == 0)
|
||||||
|
queued++;
|
||||||
|
else
|
||||||
|
kfree_skb(segs);
|
||||||
|
segs = nskb;
|
||||||
|
} while (segs);
|
||||||
|
|
||||||
|
if (queued) {
|
||||||
|
if (err) /* some segments are already queued */
|
||||||
|
free_entry(entry);
|
||||||
|
kfree_skb(skb);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
out_err:
|
||||||
|
nf_bridge_adjust_segmented_data(skb);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
|
nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue