[20260525]跟踪library cache lock library cache pin利用systemtap(21c)1.txt

[复制链接]
发表于 昨天 13:56 | 显示全部楼层 |阅读模式

马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。

您需要 登录 才可以下载或查看,没有账号?立即注册

×
[20260525]跟踪library cache lock library cache pin利用systemtap(21c)1.txt

--//客岁6月份做的测试,其时测试失败,其时的纪录如下。

$ cat lkpn21c.stp
global lk=0, pn=0
#globak off_set=0

#probe begin {
#    printf("Begin.\n")
#
#}

probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal")
{
    printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
}

probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal")
{
    printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
#   printf("%d\t%s: %s %x %x %d\n", pid(), execname(), ppfunc(), int_arg(1),long_arg(2),long_arg(3));
}

--//注:11g 偏移是0x1a8,21c 偏移是0x1c8.而且11g下测试乐成的。而21c碰到如下标题:

# stap -vv /home/oracle/study/202506/lkpn21c.stp -x 7045  --tmpdir=/tmp
Systemtap translator/driver (version 3.0/0.166, rpm 3.0-7.el7)
Copyright (C) 2005-2015 Red Hat, Inc. and others
This is free software; see the source for copying conditions.
enabled features: AVAHI BOOST_SHARED_PTR BOOST_STRING_REF DYNINST JAVA LIBRPM LIBSQLITE3 LIBVIRT LIBXML2 NLS NSS TR1_UNORDERED_MAP READLINE
Created temporary directory "/tmp"
Session arch: x86_64 release: 3.10.0-514.el7.x86_64
Searched for library macro files: "/usr/share/systemtap/tapset/linux/*.stpm", found: 5, processed: 5
Searched for library macro files: "/usr/share/systemtap/tapset/*.stpm", found: 8, processed: 8
Searched: "/usr/share/systemtap/tapset/linux/x86_64/*.stp", found: 3, processed: 3
Searched: "/usr/share/systemtap/tapset/linux/*.stp", found: 71, processed: 71
Searched: "/usr/share/systemtap/tapset/x86_64/*.stp", found: 13, processed: 13
Searched: "/usr/share/systemtap/tapset/*.stp", found: 38, processed: 38
Pass 1: parsed user script and 138 library scripts using 238692virt/49144res/3280shr/46668data kb, in 440usr/10sys/454real ms.
focused on module '/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle' = [0x400000-0x1a4dde80, bias 0 file /u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle ELF machine |x86_64 (code 62)
probe kgllkal@:-1 process=/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle reloc=.absolute pc=0x15367e90
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
focused on module '/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle' = [0x400000-0x1a4dde80, bias 0 file /u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle ELF machine |x86_64 (code 62)
probe kglpnal@:-1 process=/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle reloc=.absolute pc=0x1536c020
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
focused on module '/root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko' = [0x10000-0x102f8, bias 0 file /root/.systemtap/cache/bf/typequery_bfe12df2dcd5d4b9d91be5042cad3575_756.ko ELF machine |x86_64 (code 62)
Pass 2: analyzed script: 3 probes, 6 functions, 4 embeds, 4 globals using 342356virt/153984res/4420shr/150332data kb, in 840usr/110sys/950real ms.
Pass 3: using cached /root/.systemtap/cache/af/stap_af5bb0421c8020dbe450918af7003b8d_11636.c
Pass 4: using cached /root/.systemtap/cache/af/stap_af5bb0421c8020dbe450918af7003b8d_11636.ko
Pass 5: starting run.
Running /usr/bin/staprun -v -t 7045 -R /tmp/stap_af5bb0421c8020dbe450918af7003b8d_11636.ko
staprun:insert_module:183 Module stap_af5bb0421c8020dbe450918af7003b8d__7265 inserted from file /tmp/stap_af5bb0421c8020dbe450918af7003b8d_11636.ko
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal") inode-offset 0000000014f6c020 registration error (rc 0)
WARNING: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal") inode-offset 0000000014f67e90 registration error (rc 0)

--//末了的2个WARNING其时无法办理,前几天上kimi查询办理方法,给出几个发起:

1、根本缘故原由:registration error (rc 0) 是已知 Bug

这是 SystemTap 老版本(尤其是 3.x 及更早)在探测某些用户态函数入口时的经典标题。当 uprobe 实行在特定 offset 注册断点时,尽
管 offset 盘算准确,但大概由于以下缘故原由注册失败:
- 该地点处的指令界限不被 uprobe 辨认;
- Oracle 21c 编译天生的函数入口指令(如 endbr64、CET 干系指令或特定对齐方式)与旧版 SystemTap 不兼容;
- SystemTap 3.0 对当代 ELF 或 PIE 可实行文件的处理惩罚存在缺陷。

2、办理方案(按保举序次)

方案 1:利用 Mahmoud Hatem 的 uprobe_register Patch(最直接有效)

这是社区针对该标题的经典 workaround:通过 SystemTap 自身 hook 内核的 uprobe_register / uprobe_unregister,在注册前动态修
正 offset。对于你的场景,可以让 SystemTap 在碰到 Oracle 的 inode 和特定 offset 时,主动把探针今后挪几个字节(通常是挪到下
一个合法指令界限)。

步调:
1. 先获取 Oracle 可实行文件的 inode 号:

ls -i /u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle
# 假设输出:1234567 /u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle
   
2. 创建 fix_offset.stp:

#! /usr/bin/env stap -g

// 参数:$1 = oracle inode, $2 = 目的函数文件偏移, $3 = 修正量(通常 2 或 4)
probe kernel.function("uprobe_register"), kernel.function("uprobe_unregister")
{
    if ($inode->i_ino == $1) {
        if ($offset == $2) {
            printf("atching Oracle probe offset %x -> %x\n", $offset, $offset + $3);
            $offset = $offset + $3;
        }
    }
}
   
3. 在另一个终端先运行 patch 脚本(必要 -g 特权模式):

stap -g -v fix_offset.stp 1234567 0x14f67e90 2
   
4. 再运行你原来的 lkpn21c.stp 探针脚本。

如果 kglpnal 也报同样的错,可以同样为其文件偏移 0x14f6c020 增长一个条件分支,大概实行把 $3 设为 0x400000 以内的某个指令
对齐值。不外通常 +2 或 +4 就能办理指令界限标题。

--//概况就是指令界限不对齐标题。

方案 2:改用 statement 直接指定假造地点

如果 .function() 剖析出的入口地点注册失败,可以实行绕过符号剖析,直接用运行时假造地点探测。SystemTap 的
process().statement() 支持绝对地点:

stap
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").statement(0x15367e90)
{
    // ...
}

但注意:SystemTap 内部仍旧会把假造地点转回文件偏移再注册给 uprobe,以是此方法不愿定能绕过根本标题,只是扫除了符号剖析层
的匿伏错误。
--//注:该方案标题仍旧。编译直接报错。
$ cat lkpn21cy.stp
global lk=0, pn=0
#globak off_set=0

#probe begin {
#    printf("Begin.\n")
#
#}

#probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kgllkal")
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").statement(0x15367e90)
{
#   printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
    printf("spid=%d :%s lk_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++lk, register("rdx"),register("rcx"),user_string(register("rdx")+0x1c8));
}

#probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").function("kglpnal")
probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").statement(0x1536c020)
{
#   printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, long_arg(2),long_arg(3),user_string(long_arg(2)+0x1c8));
    printf("spid=%d :%s pn_count %d -- handle address: %x , mode = %d , kglnaobj : %s\n", pid(), ppfunc(),++pn, register("rdx"),register("rcx"),user_string(register("rdx")+0x1c8));
#   printf("%d\t%s: %s %x %x %d\n", pid(), execname(), ppfunc(), int_arg(1),long_arg(2),long_arg(3));
}

# stap   -vv /home/oracle/sqllaji/stap/lkpn21cy.stp -x 3603  --tmpdir=/tmp
Systemtap translator/driver (version 4.0/0.176, rpm 4.0-13.el7)
Copyright (C) 2005-2018 Red Hat, Inc. and others
This is free software; see the source for copying conditions.
tested kernel versions: 2.6.18 ... 4.19-rc7
enabled features: AVAHI BOOST_STRING_REF DYNINST BPF JAVA PYTHON2 LIBRPM LIBSQLITE3 LIBVIRT LIBXML2 NLS NSS READLINE
Created temporary directory "/tmp"
Session arch: x86_64 release: 3.10.0-514.el7.x86_64
Searched for library macro files: "/usr/share/systemtap/tapset/linux", found: 6, processed: 6
Searched for library macro files: "/usr/share/systemtap/tapset", found: 10, processed: 10
Searched: "/usr/share/systemtap/tapset/linux/x86_64", found: 20, processed: 20
Searched: "/usr/share/systemtap/tapset/linux", found: 404, processed: 404
Searched: "/usr/share/systemtap/tapset/x86_64", found: 13, processed: 13
Searched: "/usr/share/systemtap/tapset", found: 44, processed: 44
Pass 1: parsed user script and 497 library scripts using 274180virt/84220res/3372shr/81720data kb, in 810usr/40sys/852real ms.
derive-probes (location #0): process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").statement(355892880) of keyword at /home/oracle/sqllaji/stap/lkpn21cy.stp:10:1
semantic error: resolution failed in DWARF builder
   thrown from: elaborate.cxx:1077
semantic error: while resolving probe point: identifier 'process' at /home/oracle/sqllaji/stap/lkpn21cy.stp:10:7
   thrown from: elaborate.cxx:1065
        source: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").statement(0x15367e90)
                      ^
semantic error: no match
   thrown from: elaborate.cxx:1028
derive-probes (location #0): process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").statement(355909664) of keyword at /home/oracle/sqllaji/stap/lkpn21cy.stp:17:1
semantic error: resolution failed in DWARF builder
   thrown from: elaborate.cxx:1077
semantic error: while resolving probe point: identifier 'process' at :17:7
   thrown from: elaborate.cxx:1065
        source: probe process("/u01/app/oracle/product/21.0.0/dbhome_1/bin/oracle").statement(0x1536c020)
                      ^

semantic error: no match
   thrown from: elaborate.cxx:1028
Pass 2: analyzed script: 0 probes, 0 functions, 0 embeds, 2 globals using 377928virt/188700res/3924shr/185468data kb, in 690usr/30sys/727real ms.
Pass 2: analysis failed.  [man error::pass2]

方案 3:升级 SystemTap 版本

你当前利用的是 SystemTap 3.0(RHEL 7 / OEL 7 默认版本)。该版本对当代二进制文件和较新内核的 uprobe 支持存在不少已知缺陷。
如果环境允许,发起:

- 升级到 SystemTap 4.x 或 5.x(必要对应版本的内核 debuginfo 和编译工具链);
- 大概把探针脚本迁徙到 **bpftrace**,它对 uprobe 的支持更当代,且不必要编译内核模块。

--//克制写的太长,先列出办理方法。实行过程如下,先实行方案1,输出乱码,其时也没有细致分析。方案2失败。再实行方法3,先升级SystemTap到4.0版本。


免责声明:如果侵犯了您的权益,请联系站长及时删除侵权内容,谢谢合作!qidao123.com:ToB企服之家,中国第一个企服评测及软件市场,开放入驻,技术点评得现金.
回复

使用道具 举报

登录后关闭弹窗

登录参与点评抽奖  加入IT实名职场社区
去登录
快速回复 返回顶部 返回列表