From 7442bec96f45f96da29b1ec705a308784805ec66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gustavo=20I=C3=B1iguez=20Goia?= Date: Mon, 29 Apr 2024 01:06:39 +0200 Subject: [PATCH] ebpf: performance improvement for opensnitch-procs We were sending to userspace unnecessary exit events, consuming unnecessary CPU cycles. We only intercept execve and execveat, but sched_process_exit is invoked by more functions (sched_process_exit, clone, ...), so we were receiving on the daemon events that we did nothing with them, apart from consuming CPU cycles. On some scenarios like on servers running saltstack (as salt-master), this caused to consume more CPU than needed. cherry picked from 15fcf6753516a1e22add87cb2b4f5de4a14540ec --- ebpf_prog/opensnitch-procs.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/ebpf_prog/opensnitch-procs.c b/ebpf_prog/opensnitch-procs.c index 2da48f7c..34972e16 100644 --- a/ebpf_prog/opensnitch-procs.c +++ b/ebpf_prog/opensnitch-procs.c @@ -11,10 +11,10 @@ struct bpf_map_def SEC("maps/proc-events") events = { }; struct bpf_map_def SEC("maps/execMap") execMap = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(struct data_t), - .max_entries = 256, + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(struct data_t), + .max_entries = 256, }; @@ -46,14 +46,12 @@ static __always_inline void __handle_exit_execve(struct trace_sys_exit_execve *c { u64 pid_tgid = bpf_get_current_pid_tgid(); struct data_t *proc = bpf_map_lookup_elem(&execMap, &pid_tgid); + // don't delete the pid from execMap here, delegate it to sched_process_exit if (proc == NULL) { return; } - if (ctx->ret != 0) { goto out; } + if (ctx->ret != 0) { return; } proc->ret_code = ctx->ret; bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc, sizeof(*proc)); - -out: - bpf_map_delete_elem(&execMap, &pid_tgid); } // https://0xax.gitbooks.io/linux-insides/content/SysCall/linux-syscall-4.html @@ -63,6 +61,14 @@ out: SEC("tracepoint/sched/sched_process_exit") int tracepoint__sched_sched_process_exit(struct pt_regs *ctx) { + u64 pid_tgid = bpf_get_current_pid_tgid(); + struct data_t *proc = bpf_map_lookup_elem(&execMap, &pid_tgid); + // if the pid is not in execMap cache (because it's not of a pid we've + // previously intercepted), do not send the event to userspace, because + // we won't do anything with it and it consumes CPU cycles (too much in some + // scenarios). + if (proc == NULL) { return 0; } + int zero = 0; struct data_t *data = bpf_map_lookup_elem(&heapstore, &zero); if (!data){ return 0; } @@ -71,7 +77,6 @@ int tracepoint__sched_sched_process_exit(struct pt_regs *ctx) data->type = EVENT_SCHED_EXIT; bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, data, sizeof(*data)); - u64 pid_tgid = bpf_get_current_pid_tgid(); bpf_map_delete_elem(&execMap, &pid_tgid); return 0; }; @@ -129,7 +134,7 @@ int tracepoint__syscalls_sys_enter_execve(struct trace_sys_enter_execve* ctx) #else // in case of failure adding the item to the map, send it directly u64 pid_tgid = bpf_get_current_pid_tgid(); - if (bpf_map_update_elem(&execMap, &pid_tgid, data, BPF_ANY) != 0) { + if (bpf_map_update_elem(&execMap, &pid_tgid, data, BPF_ANY) != 0) { // With some commands, this helper fails with error -28 (ENOSPC). Misleading error? cmd failed maybe? // BUG: after coming back from suspend state, this helper fails with error -95 (EOPNOTSUPP) @@ -180,7 +185,7 @@ int tracepoint__syscalls_sys_enter_execveat(struct trace_sys_enter_execveat* ctx #else // in case of failure adding the item to the map, send it directly u64 pid_tgid = bpf_get_current_pid_tgid(); - if (bpf_map_update_elem(&execMap, &pid_tgid, data, BPF_ANY) != 0) { + if (bpf_map_update_elem(&execMap, &pid_tgid, data, BPF_ANY) != 0) { // With some commands, this helper fails with error -28 (ENOSPC). Misleading error? cmd failed maybe? // BUG: after coming back from suspend state, this helper fails with error -95 (EOPNOTSUPP)