Merge branch 'bpf-event-output-helper-improvements'
Daniel Borkmann says: ==================== BPF event output helper improvements This set adds improvements to the BPF event output helper to support non-linear data sampling, here specifically, for skb context. For details please see individual patches. The set is based against net-next tree. v1 -> v2: - Integrated and adapted Peter's diff into patch 1, updated the remaining ones accordingly. Thanks Peter! ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
e980a0764a
10 changed files with 179 additions and 62 deletions
|
@ -979,12 +979,15 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
|
|||
struct pt_regs regs;
|
||||
struct perf_sf_sde_regs *sde_regs;
|
||||
struct perf_sample_data data;
|
||||
struct perf_raw_record raw;
|
||||
struct perf_raw_record raw = {
|
||||
.frag = {
|
||||
.size = sfr->size,
|
||||
.data = sfr,
|
||||
},
|
||||
};
|
||||
|
||||
/* Setup perf sample */
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
raw.size = sfr->size;
|
||||
raw.data = sfr;
|
||||
data.raw = &raw;
|
||||
|
||||
/* Setup pt_regs to look like an CPU-measurement external interrupt
|
||||
|
|
|
@ -655,8 +655,12 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
|
|||
}
|
||||
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
raw.size = sizeof(u32) + ibs_data.size;
|
||||
raw.data = ibs_data.data;
|
||||
raw = (struct perf_raw_record){
|
||||
.frag = {
|
||||
.size = sizeof(u32) + ibs_data.size,
|
||||
.data = ibs_data.data,
|
||||
},
|
||||
};
|
||||
data.raw = &raw;
|
||||
}
|
||||
|
||||
|
|
|
@ -209,7 +209,12 @@ u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
|
|||
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
|
||||
|
||||
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
|
||||
const struct bpf_func_proto *bpf_get_event_output_proto(void);
|
||||
|
||||
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
|
||||
unsigned long len);
|
||||
|
||||
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
|
||||
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
DECLARE_PER_CPU(int, bpf_prog_active);
|
||||
|
|
|
@ -69,9 +69,22 @@ struct perf_callchain_entry_ctx {
|
|||
bool contexts_maxed;
|
||||
};
|
||||
|
||||
struct perf_raw_record {
|
||||
u32 size;
|
||||
typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
|
||||
unsigned long len);
|
||||
|
||||
struct perf_raw_frag {
|
||||
union {
|
||||
struct perf_raw_frag *next;
|
||||
unsigned long pad;
|
||||
};
|
||||
perf_copy_f copy;
|
||||
void *data;
|
||||
u32 size;
|
||||
} __packed;
|
||||
|
||||
struct perf_raw_record {
|
||||
struct perf_raw_frag frag;
|
||||
u32 size;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1283,6 +1296,11 @@ extern void perf_restore_debug_store(void);
|
|||
static inline void perf_restore_debug_store(void) { }
|
||||
#endif
|
||||
|
||||
static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
|
||||
{
|
||||
return frag->pad < sizeof(u64);
|
||||
}
|
||||
|
||||
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
|
||||
|
||||
/*
|
||||
|
|
|
@ -401,6 +401,8 @@ enum bpf_func_id {
|
|||
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
|
||||
#define BPF_F_INDEX_MASK 0xffffffffULL
|
||||
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
|
||||
/* BPF_FUNC_perf_event_output for sk_buff input context. */
|
||||
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
|
||||
|
||||
/* user accessible mirror of in-kernel sk_buff.
|
||||
* new fields can only be added to the end of this structure
|
||||
|
|
|
@ -1054,9 +1054,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto * __weak bpf_get_event_output_proto(void)
|
||||
u64 __weak
|
||||
bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
|
||||
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
|
||||
{
|
||||
return NULL;
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
/* Always built-in helper functions. */
|
||||
|
|
|
@ -5553,16 +5553,26 @@ void perf_output_sample(struct perf_output_handle *handle,
|
|||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_RAW) {
|
||||
if (data->raw) {
|
||||
u32 raw_size = data->raw->size;
|
||||
u32 real_size = round_up(raw_size + sizeof(u32),
|
||||
sizeof(u64)) - sizeof(u32);
|
||||
u64 zero = 0;
|
||||
struct perf_raw_record *raw = data->raw;
|
||||
|
||||
perf_output_put(handle, real_size);
|
||||
__output_copy(handle, data->raw->data, raw_size);
|
||||
if (real_size - raw_size)
|
||||
__output_copy(handle, &zero, real_size - raw_size);
|
||||
if (raw) {
|
||||
struct perf_raw_frag *frag = &raw->frag;
|
||||
|
||||
perf_output_put(handle, raw->size);
|
||||
do {
|
||||
if (frag->copy) {
|
||||
__output_custom(handle, frag->copy,
|
||||
frag->data, frag->size);
|
||||
} else {
|
||||
__output_copy(handle, frag->data,
|
||||
frag->size);
|
||||
}
|
||||
if (perf_raw_frag_last(frag))
|
||||
break;
|
||||
frag = frag->next;
|
||||
} while (1);
|
||||
if (frag->pad)
|
||||
__output_skip(handle, NULL, frag->pad);
|
||||
} else {
|
||||
struct {
|
||||
u32 size;
|
||||
|
@ -5687,14 +5697,28 @@ void perf_prepare_sample(struct perf_event_header *header,
|
|||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_RAW) {
|
||||
int size = sizeof(u32);
|
||||
struct perf_raw_record *raw = data->raw;
|
||||
int size;
|
||||
|
||||
if (data->raw)
|
||||
size += data->raw->size;
|
||||
else
|
||||
size += sizeof(u32);
|
||||
if (raw) {
|
||||
struct perf_raw_frag *frag = &raw->frag;
|
||||
u32 sum = 0;
|
||||
|
||||
header->size += round_up(size, sizeof(u64));
|
||||
do {
|
||||
sum += frag->size;
|
||||
if (perf_raw_frag_last(frag))
|
||||
break;
|
||||
frag = frag->next;
|
||||
} while (1);
|
||||
|
||||
size = round_up(sum + sizeof(u32), sizeof(u64));
|
||||
raw->size = size - sizeof(u32);
|
||||
frag->pad = raw->size - sum;
|
||||
} else {
|
||||
size = sizeof(u64);
|
||||
}
|
||||
|
||||
header->size += size;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
|
@ -7331,7 +7355,7 @@ static struct pmu perf_swevent = {
|
|||
static int perf_tp_filter_match(struct perf_event *event,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
void *record = data->raw->data;
|
||||
void *record = data->raw->frag.data;
|
||||
|
||||
/* only top level events have filters set */
|
||||
if (event->parent)
|
||||
|
@ -7387,8 +7411,10 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
|
|||
struct perf_event *event;
|
||||
|
||||
struct perf_raw_record raw = {
|
||||
.size = entry_size,
|
||||
.data = record,
|
||||
.frag = {
|
||||
.size = entry_size,
|
||||
.data = record,
|
||||
},
|
||||
};
|
||||
|
||||
perf_sample_data_init(&data, 0, 0);
|
||||
|
|
|
@ -123,10 +123,7 @@ static inline unsigned long perf_aux_size(struct ring_buffer *rb)
|
|||
return rb->aux_nr_pages << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
|
||||
static inline unsigned long \
|
||||
func_name(struct perf_output_handle *handle, \
|
||||
const void *buf, unsigned long len) \
|
||||
#define __DEFINE_OUTPUT_COPY_BODY(memcpy_func) \
|
||||
{ \
|
||||
unsigned long size, written; \
|
||||
\
|
||||
|
@ -152,6 +149,17 @@ func_name(struct perf_output_handle *handle, \
|
|||
return len; \
|
||||
}
|
||||
|
||||
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
|
||||
static inline unsigned long \
|
||||
func_name(struct perf_output_handle *handle, \
|
||||
const void *buf, unsigned long len) \
|
||||
__DEFINE_OUTPUT_COPY_BODY(memcpy_func)
|
||||
|
||||
static inline unsigned long
|
||||
__output_custom(struct perf_output_handle *handle, perf_copy_f copy_func,
|
||||
const void *buf, unsigned long len)
|
||||
__DEFINE_OUTPUT_COPY_BODY(copy_func)
|
||||
|
||||
static inline unsigned long
|
||||
memcpy_common(void *dst, const void *src, unsigned long n)
|
||||
{
|
||||
|
|
|
@ -233,24 +233,17 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = {
|
|||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
|
||||
static __always_inline u64
|
||||
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
|
||||
u64 flags, struct perf_raw_record *raw)
|
||||
{
|
||||
struct pt_regs *regs = (struct pt_regs *) (long) r1;
|
||||
struct bpf_map *map = (struct bpf_map *) (long) r2;
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
unsigned int cpu = smp_processor_id();
|
||||
u64 index = flags & BPF_F_INDEX_MASK;
|
||||
void *data = (void *) (long) r4;
|
||||
struct perf_sample_data sample_data;
|
||||
struct bpf_event_entry *ee;
|
||||
struct perf_event *event;
|
||||
struct perf_raw_record raw = {
|
||||
.size = size,
|
||||
.data = data,
|
||||
};
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
|
||||
return -EINVAL;
|
||||
if (index == BPF_F_CURRENT_CPU)
|
||||
index = cpu;
|
||||
if (unlikely(index >= array->map.max_entries))
|
||||
|
@ -269,11 +262,29 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
|
|||
return -EOPNOTSUPP;
|
||||
|
||||
perf_sample_data_init(&sample_data, 0, 0);
|
||||
sample_data.raw = &raw;
|
||||
sample_data.raw = raw;
|
||||
perf_event_output(event, &sample_data, regs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
|
||||
{
|
||||
struct pt_regs *regs = (struct pt_regs *)(long) r1;
|
||||
struct bpf_map *map = (struct bpf_map *)(long) r2;
|
||||
void *data = (void *)(long) r4;
|
||||
struct perf_raw_record raw = {
|
||||
.frag = {
|
||||
.size = size,
|
||||
.data = data,
|
||||
},
|
||||
};
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
|
||||
return -EINVAL;
|
||||
|
||||
return __bpf_perf_event_output(regs, map, flags, &raw);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_perf_event_output_proto = {
|
||||
.func = bpf_perf_event_output,
|
||||
.gpl_only = true,
|
||||
|
@ -287,29 +298,26 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
|
|||
|
||||
static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
|
||||
|
||||
static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
|
||||
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
|
||||
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
|
||||
{
|
||||
struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
|
||||
struct perf_raw_frag frag = {
|
||||
.copy = ctx_copy,
|
||||
.size = ctx_size,
|
||||
.data = ctx,
|
||||
};
|
||||
struct perf_raw_record raw = {
|
||||
.frag = {
|
||||
.next = ctx_size ? &frag : NULL,
|
||||
.size = meta_size,
|
||||
.data = meta,
|
||||
},
|
||||
};
|
||||
|
||||
perf_fetch_caller_regs(regs);
|
||||
|
||||
return bpf_perf_event_output((long)regs, r2, flags, r4, size);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_event_output_proto = {
|
||||
.func = bpf_event_output,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_CONST_MAP_PTR,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_STACK,
|
||||
.arg5_type = ARG_CONST_STACK_SIZE,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto *bpf_get_event_output_proto(void)
|
||||
{
|
||||
return &bpf_event_output_proto;
|
||||
return __bpf_perf_event_output(regs, map, flags, &raw);
|
||||
}
|
||||
|
||||
static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
|
|
|
@ -2025,6 +2025,47 @@ bool bpf_helper_changes_skb_data(void *func)
|
|||
return false;
|
||||
}
|
||||
|
||||
static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
|
||||
unsigned long len)
|
||||
{
|
||||
void *ptr = skb_header_pointer(skb, 0, len, dst_buff);
|
||||
|
||||
if (unlikely(!ptr))
|
||||
return len;
|
||||
if (ptr != dst_buff)
|
||||
memcpy(dst_buff, ptr, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
|
||||
u64 meta_size)
|
||||
{
|
||||
struct sk_buff *skb = (struct sk_buff *)(long) r1;
|
||||
struct bpf_map *map = (struct bpf_map *)(long) r2;
|
||||
u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
|
||||
void *meta = (void *)(long) r4;
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
|
||||
return -EINVAL;
|
||||
if (unlikely(skb_size > skb->len))
|
||||
return -EFAULT;
|
||||
|
||||
return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
|
||||
bpf_skb_copy);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_skb_event_output_proto = {
|
||||
.func = bpf_skb_event_output,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_CONST_MAP_PTR,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_STACK,
|
||||
.arg5_type = ARG_CONST_STACK_SIZE,
|
||||
};
|
||||
|
||||
static unsigned short bpf_tunnel_key_af(u64 flags)
|
||||
{
|
||||
return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
|
||||
|
@ -2357,7 +2398,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
|
|||
case BPF_FUNC_get_hash_recalc:
|
||||
return &bpf_get_hash_recalc_proto;
|
||||
case BPF_FUNC_perf_event_output:
|
||||
return bpf_get_event_output_proto();
|
||||
return &bpf_skb_event_output_proto;
|
||||
case BPF_FUNC_get_smp_processor_id:
|
||||
return &bpf_get_smp_processor_id_proto;
|
||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||
|
|
Loading…
Reference in a new issue