概述
eBPF 全程是扩展伯克利包过滤(extended Berkeley Packet Filter),用于实现内核不支持的功能,通常情况下你只能通过 ePBF 读取系统的各种信息(主要是系统调用和网络包),少数情况下允许你修改信息(主要是网络包)。
eBPF 有多种开发工具,例如常见的:
- BCC:基于 BPF 的 Linux 工具,支持完整的编写、编译、和加载BPF程序的工具链;
- libbpf-bootstrap:基于 libbpf 的 BPF 应用开发脚手架,同时支持一次编译,到处运行;
- Cilium:基于 Go 的开发和加载 eBPF 的工具;
关键名词
- kprobe:kernel probes
eBPF Maps
- eBPF Maps
- Some map types are defined as arrays, which always have a 4-byte index as the key type; other maps are hash tables that can use some arbitrary data type as the key.
- There are map types that are optimized for particular types of operations, such as first-in-first-out queues, first-in-last-out stacks, least-recently-used data storage, longest-prefix matching, and Bloom filters: https://github.com/iovisor/bcc/blob/master/docs/reference_guide.md#maps
helloworld
[root@liqiang.io]# cat test.py
#!/usr/bin/python
from bcc import BPF
program = r"""
int hello(void *ctx) {
bpf_trace_printk("Hello World!");
return 0;
}
"""
b = BPF(text=program)
syscall = b.get_syscall_fnname("execve")
b.attach_kprobe(event=syscall, fn_name="hello")
b.trace_print()
这一段程序中包含了两个部分,分别是:
- 加载进内核部分:就是 program 变量里面的内容
- 用户空间部分:其他部分
bpf 系统调用
BCC 提供了很简单的应用层接口,但是,实际上都是通过 bpf
系统调用完成的,而 bpf
系统调用的函数原型为:
int bpf(int cmd, union bpf_attr *attr, unsigned int size);
一些 bpf 系统调用(可以通过 strace -e bpf python3 test.py
看到)的示例:
[root@liqiang.io]# strace -e bpf python3 00_ring_buffer_config.py
bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7ffee2cf1330, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 116) = 3
bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0$\5\0\0$\5\0\0\377\3\0\0\1\0\0\0\0\0\0\10"..., btf_log_buf=NULL, btf_size=2363, btf_log_size=0, btf_log_level=0}, 28) = 3
bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7ffee2cf0fa0, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="libbpf_nametest"}, 64) = 4
bpf(BPF_MAP_CREATE, {map_type=0x1b /* BPF_MAP_TYPE_??? */, key_size=0, value_size=0, max_entries=4096, map_flags=0, inner_map_fd=0, map_name="output", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 72) = 4
bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=4, value_size=12, max_entries=10240, map_flags=0, inner_map_fd=0, map_name="config", map_ifindex=0, btf_fd=3, btf_key_type_id=1, btf_value_type_id=4, btf_vmlinux_value_type_id=0}, 72) = 5
bpf(BPF_MAP_UPDATE_ELEM, {map_fd=5, key=0x7fd55bc70f78, value=0x7fd55bc812b8, flags=BPF_ANY}, 32) = 0
bpf(BPF_MAP_UPDATE_ELEM, {map_fd=5, key=0x7fd55bc812b8, value=0x7fd55bc70f78, flags=BPF_ANY}, 32) = 0
bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_KPROBE, insn_cnt=41, insns=0x7fd55be38000, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(6, 1, 0), prog_flags=0, prog_name="hello", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=3, func_info_rec_size=8, func_info=0x555ef73a9e80, func_info_cnt=1, line_info_rec_size=16, line_info=0x555ef726a9b0, line_info_cnt=21, attach_btf_id=0, attach_prog_fd=0}, 128) = 6
bpf(BPF_OBJ_GET_INFO_BY_FD, {info={bpf_fd=4, info_len=88, info=0x7ffee2cf19d0}}, 16) = 0
tips
- the
bpf_trace_printk()
helper function in the kernel always sends output to the same predefined pseudofile location:/sys/kernel/debug/tracing/trace_pipe
- uapi/linux/bpf.h:包含了很多 eBPF Map 类型
- 内核的 eBPF 文档