接前一篇文章:BCC源码内容概览(1)
本文参考官网中的Contents部分的介绍。
BCC源码根目录的文件,其中一些是同时包含C和Python的单个文件,另一些是.c和.py的成对文件,还有一些是目录。
examples目录下的文件:
使用USDT探测跟踪Node.js HTTP服务器请求。
bcc/examples/tracing/nodejs_http_server_example.txt文件内容如下:
- # ./nodejs_http_server.py 24728
- TIME(s) COMM PID ARGS
- 24653324.561322998 node 24728 path:/index.html
- 24653335.343401998 node 24728 path:/images/welcome.png
- 24653340.510164998 node 24728 path:/images/favicon.png
跟踪内核函数并打印所有内核堆栈跟踪。
bcc/examples/tracing/stacksnoop_example.txt文件内容如下:
- Demonstrations of stacksnoop, the Linux eBPF/bcc version.
-
-
- This program traces the given kernel function and prints the kernel stack trace
- for every call. This tool is useful for studying low frequency kernel functions,
- to see how they were invoked. For example, tracing the submit_bio() call:
-
- # ./stacksnoop submit_bio
- TIME(s) SYSCALL
- 3592.838736000 submit_bio
- submit_bio
- submit_bh
- jbd2_journal_commit_transaction
- kjournald2
- kthread
- ret_from_fork
-
- This shows that submit_bio() was called by submit_bh(), which was called
- by jbd2_journal_commit_transaction(), and so on.
-
- For high frequency functions, see stackcount, which summarizes in-kernel for
- efficiency. If you don't know if your function is low or high frequency, try
- funccount.
- The -v option includes more fields, including the on-CPU process (COMM and PID):
- # ./stacksnoop -v submit_bio
- TIME(s) COMM PID CPU SYSCALL
- 3734.855027000 jbd2/dm-0-8 313 0 submit_bio
- submit_bio
- submit_bh
- jbd2_journal_commit_transaction
- kjournald2
- kthread
- ret_from_fork
- This identifies the application issuing the sync syscall: the jbd2 process
- (COMM column).
- Here's another example, showing the path to second_overflow() and on-CPU
- process:
-
- # ./stacksnoop -v second_overflow
- TIME(s) COMM PID CPU SYSCALL
- 3837.526433000
0 1 second_overflow - second_overflow
- tick_do_update_jiffies64
- tick_irq_enter
- irq_enter
- smp_apic_timer_interrupt
- apic_timer_interrupt
- default_idle
- arch_cpu_idle
- default_idle_call
- cpu_startup_entry
- start_secondary
-
- 3838.526953000
0 1 second_overflow - second_overflow
- tick_do_update_jiffies64
- tick_irq_enter
- irq_enter
- smp_apic_timer_interrupt
- apic_timer_interrupt
- default_idle
- arch_cpu_idle
- default_idle_call
- cpu_startup_entry
- start_secondary
-
- This fires every second (see TIME(s)), and is from tick_do_update_jiffies64().
-
-
- USAGE message:
-
- # ./stacksnoop -h
- usage: stacksnoop [-h] [-p PID] [-s] [-v] function
-
- Trace and print kernel stack traces for a kernel function
-
- positional arguments:
- function kernel function name
-
- optional arguments:
- -h, --help show this help message and exit
- -p PID, --pid PID trace this PID only
- -s, --offset show address offsets
- -v, --verbose print more fields
-
- examples:
- ./stacksnoop ext4_sync_fs # print kernel stack traces for ext4_sync_fs
- ./stacksnoop -s ext4_sync_fs # ... also show symbol offsets
- ./stacksnoop -v ext4_sync_fs # ... show extra columns
- ./stacksnoop -p 185 ext4_sync_fs # ... only when PID 185 is on-CPU
跟踪stat()系统调用。
bcc/tools/statsnoop.py文件内容如下:
- #!/usr/bin/env python
- # @lint-avoid-python-3-compatibility-imports
- #
- # statsnoop Trace stat() syscalls.
- # For Linux, uses BCC, eBPF. Embedded C.
- #
- # USAGE: statsnoop [-h] [-t] [-x] [-p PID]
- #
- # Copyright 2016 Netflix, Inc.
- # Licensed under the Apache License, Version 2.0 (the "License")
- #
- # 08-Feb-2016 Brendan Gregg Created this.
- # 17-Feb-2016 Allan McAleavy updated for BPF_PERF_OUTPUT
- # 29-Nov-2022 Rocky Xing Added stat() variants.
-
- from __future__ import print_function
- from bcc import BPF
- import argparse
-
- # arguments
- examples = """examples:
- ./statsnoop # trace all stat() syscalls
- ./statsnoop -t # include timestamps
- ./statsnoop -x # only show failed stats
- ./statsnoop -p 181 # only trace PID 181
- """
- parser = argparse.ArgumentParser(
- description="Trace stat() syscalls",
- formatter_class=argparse.RawDescriptionHelpFormatter,
- epilog=examples)
- parser.add_argument("-t", "--timestamp", action="store_true",
- help="include timestamp on output")
- parser.add_argument("-x", "--failed", action="store_true",
- help="only show failed stats")
- parser.add_argument("-p", "--pid",
- help="trace this PID only")
- parser.add_argument("--ebpf", action="store_true",
- help=argparse.SUPPRESS)
- args = parser.parse_args()
- debug = 0
-
- # define BPF program
- bpf_text = """
- #include
- #include
- #include
- struct val_t {
- const char *fname;
- };
- struct data_t {
- u32 pid;
- u64 ts_ns;
- int ret;
- char comm[TASK_COMM_LEN];
- char fname[NAME_MAX];
- };
- BPF_HASH(infotmp, u32, struct val_t);
- BPF_PERF_OUTPUT(events);
- static int trace_entry(struct pt_regs *ctx, const char __user *filename)
- {
- struct val_t val = {};
- u64 pid_tgid = bpf_get_current_pid_tgid();
- u32 pid = pid_tgid >> 32;
- u32 tid = (u32)pid_tgid;
- FILTER
- val.fname = filename;
- infotmp.update(&tid, &val);
- return 0;
- };
- int syscall__stat_entry(struct pt_regs *ctx, const char __user *filename)
- {
- return trace_entry(ctx, filename);
- }
- int syscall__statx_entry(struct pt_regs *ctx, int dfd, const char __user *filename)
- {
- return trace_entry(ctx, filename);
- }
- int trace_return(struct pt_regs *ctx)
- {
- u64 pid_tgid = bpf_get_current_pid_tgid();
- u32 tid = (u32)pid_tgid;
- struct val_t *valp;
- valp = infotmp.lookup(&tid);
- if (valp == 0) {
- // missed entry
- return 0;
- }
- struct data_t data = {.pid = pid_tgid >> 32};
- bpf_probe_read_user(&data.fname, sizeof(data.fname), (void *)valp->fname);
- bpf_get_current_comm(&data.comm, sizeof(data.comm));
- data.ts_ns = bpf_ktime_get_ns();
- data.ret = PT_REGS_RC(ctx);
- events.perf_submit(ctx, &data, sizeof(data));
- infotmp.delete(&tid);
- return 0;
- }
- """
- if args.pid:
- bpf_text = bpf_text.replace('FILTER',
- 'if (pid != %s) { return 0; }' % args.pid)
- else:
- bpf_text = bpf_text.replace('FILTER', '')
- if debug or args.ebpf:
- print(bpf_text)
- if args.ebpf:
- exit()
-
- # initialize BPF
- b = BPF(text=bpf_text)
-
- # for POSIX compliance, all architectures implement these
- # system calls but the name of the actual entry point may
- # be different for which we must check if the entry points
- # actually exist before attaching the probes
- def try_attach_syscall_probes(syscall):
- syscall_fnname = b.get_syscall_fnname(syscall)
- if BPF.ksymname(syscall_fnname) != -1:
- if syscall in ["statx", "fstatat64", "newfstatat"]:
- b.attach_kprobe(event=syscall_fnname, fn_name="syscall__statx_entry")
- else:
- b.attach_kprobe(event=syscall_fnname, fn_name="syscall__stat_entry")
- b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")
-
- try_attach_syscall_probes("stat")
- try_attach_syscall_probes("statx")
- try_attach_syscall_probes("statfs")
- try_attach_syscall_probes("newstat")
- try_attach_syscall_probes("newlstat")
- try_attach_syscall_probes("fstatat64")
- try_attach_syscall_probes("newfstatat")
-
- start_ts = 0
- prev_ts = 0
- delta = 0
-
- # header
- if args.timestamp:
- print("%-14s" % ("TIME(s)"), end="")
- print("%-7s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH"))
-
- # process event
- def print_event(cpu, data, size):
- event = b["events"].event(data)
- global start_ts
- global prev_ts
- global delta
- global cont
-
- # split return value into FD and errno columns
- if event.ret >= 0:
- if args.failed:
- return
- fd_s = event.ret
- err = 0
- else:
- fd_s = -1
- err = - event.ret
-
- if start_ts == 0:
- start_ts = event.ts_ns
-
- if args.timestamp:
- print("%-14.9f" % (float(event.ts_ns - start_ts) / 1000000000), end="")
-
- print("%-7d %-16s %4d %3d %s" % (event.pid,
- event.comm.decode('utf-8', 'replace'), fd_s, err,
- event.fname.decode('utf-8', 'replace')))
-
- # loop with callback to print_event
- b["events"].open_perf_buffer(print_event, page_cnt=64)
- while 1:
- try:
- b.perf_buffer_poll()
- except KeyboardInterrupt:
- exit()