diff mbox series

[RFC,bpf-next,3/5] libbpf: usdt lib wiring of xmm reads

Message ID 20220512074321.2090073-4-davemarchevsky@fb.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series bpf: add get_reg_val helper | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail merge-conflict
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply fail Patch does not apply to bpf-next

Commit Message

Dave Marchevsky May 12, 2022, 7:43 a.m. UTC
Handle xmm0,...,xmm15 registers when parsing USDT arguments. Currently
only the first 64 bits of the fetched value are returned as I haven't
seen the rest of the register used in practice.

This patch also handles floats in USDT arg spec by ignoring the fact
that they're floats and considering them scalar. Currently we can't do
float math in BPF programs anyways, so might as well support passing to
userspace and converting there.

We can use existing ARG_REG sscanf + logic, adding XMM-specific logic
when calc_pt_regs_off fails. If the reg is xmm, arg_spec's reg_off is
repurposed to hold reg_no, which is passed to bpf_get_reg_val. Since the
helper does the digging around in fxregs_state it's not necessary to
calculate offset in bpf code for these regs.

NOTE: Changes here cause verification failure for existing USDT tests.
Specifically, BPF_USDT prog 'usdt12' fails to verify due to too many
insns despite not having its insn count significantly changed.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
---
 tools/lib/bpf/usdt.bpf.h | 36 ++++++++++++++++++++--------
 tools/lib/bpf/usdt.c     | 51 ++++++++++++++++++++++++++++++++++++----
 2 files changed, 73 insertions(+), 14 deletions(-)

Comments

Alexei Starovoitov May 14, 2022, 12:43 a.m. UTC | #1
On Thu, May 12, 2022 at 12:43:19AM -0700, Dave Marchevsky wrote:
> +		err = bpf_get_reg_val(&val, sizeof(val),
> +				     ((u64)arg_spec->reg_off + BPF_GETREG_X86_XMM0) << 32,
> +				     btf_regs, btf_task);

That illustrated the point from patch 2.
The above is probably the typical usage.
Just BPF_GETREG_X86_XMM in uapi enum would have been enough.
Andrii Nakryiko May 16, 2022, 11:26 p.m. UTC | #2
On Thu, May 12, 2022 at 12:43 AM Dave Marchevsky <davemarchevsky@fb.com> wrote:
>
> Handle xmm0,...,xmm15 registers when parsing USDT arguments. Currently
> only the first 64 bits of the fetched value are returned as I haven't
> seen the rest of the register used in practice.
>
> This patch also handles floats in USDT arg spec by ignoring the fact
> that they're floats and considering them scalar. Currently we can't do
> float math in BPF programs anyways, so might as well support passing to
> userspace and converting there.
>
> We can use existing ARG_REG sscanf + logic, adding XMM-specific logic
> when calc_pt_regs_off fails. If the reg is xmm, arg_spec's reg_off is
> repurposed to hold reg_no, which is passed to bpf_get_reg_val. Since the
> helper does the digging around in fxregs_state it's not necessary to
> calculate offset in bpf code for these regs.
>
> NOTE: Changes here cause verification failure for existing USDT tests.
> Specifically, BPF_USDT prog 'usdt12' fails to verify due to too many
> insns despite not having its insn count significantly changed.
>
> Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
> ---
>  tools/lib/bpf/usdt.bpf.h | 36 ++++++++++++++++++++--------
>  tools/lib/bpf/usdt.c     | 51 ++++++++++++++++++++++++++++++++++++----
>  2 files changed, 73 insertions(+), 14 deletions(-)
>
> diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
> index 4181fddb3687..7b5ed4cbaa2f 100644
> --- a/tools/lib/bpf/usdt.bpf.h
> +++ b/tools/lib/bpf/usdt.bpf.h
> @@ -43,6 +43,7 @@ enum __bpf_usdt_arg_type {
>         BPF_USDT_ARG_CONST,
>         BPF_USDT_ARG_REG,
>         BPF_USDT_ARG_REG_DEREF,
> +       BPF_USDT_ARG_XMM_REG,
>  };
>
>  struct __bpf_usdt_arg_spec {
> @@ -129,7 +130,9 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
>  {
>         struct __bpf_usdt_spec *spec;
>         struct __bpf_usdt_arg_spec *arg_spec;
> -       unsigned long val;
> +       struct pt_regs *btf_regs;
> +       struct task_struct *btf_task;
> +       struct { __u64 a; __u64 unused; } val = {};
>         int err, spec_id;
>
>         *res = 0;
> @@ -151,7 +154,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
>                 /* Arg is just a constant ("-4@$-9" in USDT arg spec).
>                  * value is recorded in arg_spec->val_off directly.
>                  */
> -               val = arg_spec->val_off;
> +               val.a = arg_spec->val_off;
>                 break;
>         case BPF_USDT_ARG_REG:
>                 /* Arg is in a register (e.g, "8@%rax" in USDT arg spec),
> @@ -159,7 +162,20 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
>                  * struct pt_regs. To keep things simple user-space parts
>                  * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off.
>                  */
> -               err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
> +               err = bpf_probe_read_kernel(&val.a, sizeof(val.a), (void *)ctx + arg_spec->reg_off);
> +               if (err)
> +                       return err;
> +               break;
> +       case BPF_USDT_ARG_XMM_REG:

nit: a bit too XMM-specific name here, we probably want to keep it a bit

> +               /* Same as above, but arg is an xmm reg, so can't look
> +                * in pt_regs, need to use special helper.
> +                * reg_off is the regno ("xmm0" -> regno 0, etc)
> +                */
> +               btf_task = bpf_get_current_task_btf();
> +               btf_regs = (struct pt_regs *)bpf_task_pt_regs(btf_task);

I'd like to avoid taking dependency on bpf_get_current_task_btf() for
rare case of XMM register, which makes it impossible to do USDT on
older kernels. It seems like supporting reading registers from current
(and maybe current pt_regs context) should cover a lot of practical
uses.

> +               err = bpf_get_reg_val(&val, sizeof(val),

But regardless of the above, we'll need to use CO-RE to detect support
for this new BPF helper (probably using bpf_core_enum_value_exists()?)
to allow using USDTs on older kernels.


> +                                    ((u64)arg_spec->reg_off + BPF_GETREG_X86_XMM0) << 32,
> +                                    btf_regs, btf_task);
>                 if (err)
>                         return err;
>                 break;
> @@ -171,14 +187,14 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
>                  * from pt_regs, then do another user-space probe read to
>                  * fetch argument value itself.
>                  */
> -               err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
> +               err = bpf_probe_read_kernel(&val.a, sizeof(val.a), (void *)ctx + arg_spec->reg_off);
>                 if (err)
>                         return err;
> -               err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off);
> +               err = bpf_probe_read_user(&val.a, sizeof(val.a), (void *)val.a + arg_spec->val_off);

is the useful value in xmm register normally in lower 64-bits of it?
is it possible to just request reading just the first 64 bits from
bpf_get_reg_val() and avoid this ugly union?

>                 if (err)
>                         return err;
>  #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
> -               val >>= arg_spec->arg_bitshift;
> +               val.a >>= arg_spec->arg_bitshift;
>  #endif
>                 break;
>         default:

[...]

> +static int calc_xmm_regno(const char *reg_name)
> +{
> +       static struct {
> +               const char *name;
> +               __u16 regno;
> +       } xmm_reg_map[] = {
> +               { "xmm0",  0 },
> +               { "xmm1",  1 },
> +               { "xmm2",  2 },
> +               { "xmm3",  3 },
> +               { "xmm4",  4 },
> +               { "xmm5",  5 },
> +               { "xmm6",  6 },
> +               { "xmm7",  7 },
> +#ifdef __x86_64__
> +               { "xmm8",  8 },
> +               { "xmm9",  9 },
> +               { "xmm10",  10 },
> +               { "xmm11",  11 },
> +               { "xmm12",  12 },
> +               { "xmm13",  13 },
> +               { "xmm14",  14 },
> +               { "xmm15",  15 },

no-x86 arches parse this generically with sscanf(), seems like we can
do this simple approach here as well?


> +#endif
> +       };
> +       int i;
> +
> +       for (i = 0; i < ARRAY_SIZE(xmm_reg_map); i++) {
> +               if (strcmp(reg_name, xmm_reg_map[i].name) == 0)
> +                       return xmm_reg_map[i].regno;
> +       }
> +
>         return -ENOENT;
>  }
>

[...]
Dave Marchevsky May 18, 2022, 8:20 a.m. UTC | #3
On 5/16/22 7:26 PM, Andrii Nakryiko wrote:   
> On Thu, May 12, 2022 at 12:43 AM Dave Marchevsky <davemarchevsky@fb.com> wrote:
>>
>> Handle xmm0,...,xmm15 registers when parsing USDT arguments. Currently
>> only the first 64 bits of the fetched value are returned as I haven't
>> seen the rest of the register used in practice.
>>
>> This patch also handles floats in USDT arg spec by ignoring the fact
>> that they're floats and considering them scalar. Currently we can't do
>> float math in BPF programs anyways, so might as well support passing to
>> userspace and converting there.
>>
>> We can use existing ARG_REG sscanf + logic, adding XMM-specific logic
>> when calc_pt_regs_off fails. If the reg is xmm, arg_spec's reg_off is
>> repurposed to hold reg_no, which is passed to bpf_get_reg_val. Since the
>> helper does the digging around in fxregs_state it's not necessary to
>> calculate offset in bpf code for these regs.
>>
>> NOTE: Changes here cause verification failure for existing USDT tests.
>> Specifically, BPF_USDT prog 'usdt12' fails to verify due to too many
>> insns despite not having its insn count significantly changed.
>>
>> Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
>> ---
>>  tools/lib/bpf/usdt.bpf.h | 36 ++++++++++++++++++++--------
>>  tools/lib/bpf/usdt.c     | 51 ++++++++++++++++++++++++++++++++++++----
>>  2 files changed, 73 insertions(+), 14 deletions(-)
>>
>> diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
>> index 4181fddb3687..7b5ed4cbaa2f 100644
>> --- a/tools/lib/bpf/usdt.bpf.h
>> +++ b/tools/lib/bpf/usdt.bpf.h
>> @@ -43,6 +43,7 @@ enum __bpf_usdt_arg_type {
>>         BPF_USDT_ARG_CONST,
>>         BPF_USDT_ARG_REG,
>>         BPF_USDT_ARG_REG_DEREF,
>> +       BPF_USDT_ARG_XMM_REG,
>>  };
>>
>>  struct __bpf_usdt_arg_spec {
>> @@ -129,7 +130,9 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
>>  {
>>         struct __bpf_usdt_spec *spec;
>>         struct __bpf_usdt_arg_spec *arg_spec;
>> -       unsigned long val;
>> +       struct pt_regs *btf_regs;
>> +       struct task_struct *btf_task;
>> +       struct { __u64 a; __u64 unused; } val = {};
>>         int err, spec_id;
>>
>>         *res = 0;
>> @@ -151,7 +154,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
>>                 /* Arg is just a constant ("-4@$-9" in USDT arg spec).
>>                  * value is recorded in arg_spec->val_off directly.
>>                  */
>> -               val = arg_spec->val_off;
>> +               val.a = arg_spec->val_off;
>>                 break;
>>         case BPF_USDT_ARG_REG:
>>                 /* Arg is in a register (e.g, "8@%rax" in USDT arg spec),
>> @@ -159,7 +162,20 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
>>                  * struct pt_regs. To keep things simple user-space parts
>>                  * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off.
>>                  */
>> -               err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
>> +               err = bpf_probe_read_kernel(&val.a, sizeof(val.a), (void *)ctx + arg_spec->reg_off);
>> +               if (err)
>> +                       return err;
>> +               break;
>> +       case BPF_USDT_ARG_XMM_REG:
> 
> nit: a bit too XMM-specific name here, we probably want to keep it a bit

Agreed.

> 
>> +               /* Same as above, but arg is an xmm reg, so can't look
>> +                * in pt_regs, need to use special helper.
>> +                * reg_off is the regno ("xmm0" -> regno 0, etc)
>> +                */
>> +               btf_task = bpf_get_current_task_btf();
>> +               btf_regs = (struct pt_regs *)bpf_task_pt_regs(btf_task);
> 
> I'd like to avoid taking dependency on bpf_get_current_task_btf() for
> rare case of XMM register, which makes it impossible to do USDT on
> older kernels. It seems like supporting reading registers from current
> (and maybe current pt_regs context) should cover a lot of practical
> uses.
> 

Yep. We talked about this today. Will remove.

>> +               err = bpf_get_reg_val(&val, sizeof(val),
> 
> But regardless of the above, we'll need to use CO-RE to detect support
> for this new BPF helper (probably using bpf_core_enum_value_exists()?)
> to allow using USDTs on older kernels.
> 

Will add.

> 
>> +                                    ((u64)arg_spec->reg_off + BPF_GETREG_X86_XMM0) << 32,
>> +                                    btf_regs, btf_task);
>>                 if (err)
>>                         return err;
>>                 break;
>> @@ -171,14 +187,14 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
>>                  * from pt_regs, then do another user-space probe read to
>>                  * fetch argument value itself.
>>                  */
>> -               err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
>> +               err = bpf_probe_read_kernel(&val.a, sizeof(val.a), (void *)ctx + arg_spec->reg_off);
>>                 if (err)
>>                         return err;
>> -               err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off);
>> +               err = bpf_probe_read_user(&val.a, sizeof(val.a), (void *)val.a + arg_spec->val_off);
> 
> is the useful value in xmm register normally in lower 64-bits of it?
> is it possible to just request reading just the first 64 bits from
> bpf_get_reg_val() and avoid this ugly union?

For USDT usecase, I've only seen lower 64 bits used. Should be possible to just
grab those, will see if there's a clean way to integrate such an option.

> 
>>                 if (err)
>>                         return err;
>>  #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
>> -               val >>= arg_spec->arg_bitshift;
>> +               val.a >>= arg_spec->arg_bitshift;
>>  #endif
>>                 break;
>>         default:
> 
> [...]
> 
>> +static int calc_xmm_regno(const char *reg_name)
>> +{
>> +       static struct {
>> +               const char *name;
>> +               __u16 regno;
>> +       } xmm_reg_map[] = {
>> +               { "xmm0",  0 },
>> +               { "xmm1",  1 },
>> +               { "xmm2",  2 },
>> +               { "xmm3",  3 },
>> +               { "xmm4",  4 },
>> +               { "xmm5",  5 },
>> +               { "xmm6",  6 },
>> +               { "xmm7",  7 },
>> +#ifdef __x86_64__
>> +               { "xmm8",  8 },
>> +               { "xmm9",  9 },
>> +               { "xmm10",  10 },
>> +               { "xmm11",  11 },
>> +               { "xmm12",  12 },
>> +               { "xmm13",  13 },
>> +               { "xmm14",  14 },
>> +               { "xmm15",  15 },
> 
> no-x86 arches parse this generically with sscanf(), seems like we can
> do this simple approach here as well?
> 

Agreed.

> 
>> +#endif
>> +       };
>> +       int i;
>> +
>> +       for (i = 0; i < ARRAY_SIZE(xmm_reg_map); i++) {
>> +               if (strcmp(reg_name, xmm_reg_map[i].name) == 0)
>> +                       return xmm_reg_map[i].regno;
>> +       }
>> +
>>         return -ENOENT;
>>  }
>>
> 
> [...]
diff mbox series

Patch

diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
index 4181fddb3687..7b5ed4cbaa2f 100644
--- a/tools/lib/bpf/usdt.bpf.h
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -43,6 +43,7 @@  enum __bpf_usdt_arg_type {
 	BPF_USDT_ARG_CONST,
 	BPF_USDT_ARG_REG,
 	BPF_USDT_ARG_REG_DEREF,
+	BPF_USDT_ARG_XMM_REG,
 };
 
 struct __bpf_usdt_arg_spec {
@@ -129,7 +130,9 @@  int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
 {
 	struct __bpf_usdt_spec *spec;
 	struct __bpf_usdt_arg_spec *arg_spec;
-	unsigned long val;
+	struct pt_regs *btf_regs;
+	struct task_struct *btf_task;
+	struct { __u64 a; __u64 unused; } val = {};
 	int err, spec_id;
 
 	*res = 0;
@@ -151,7 +154,7 @@  int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
 		/* Arg is just a constant ("-4@$-9" in USDT arg spec).
 		 * value is recorded in arg_spec->val_off directly.
 		 */
-		val = arg_spec->val_off;
+		val.a = arg_spec->val_off;
 		break;
 	case BPF_USDT_ARG_REG:
 		/* Arg is in a register (e.g, "8@%rax" in USDT arg spec),
@@ -159,7 +162,20 @@  int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
 		 * struct pt_regs. To keep things simple user-space parts
 		 * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off.
 		 */
-		err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+		err = bpf_probe_read_kernel(&val.a, sizeof(val.a), (void *)ctx + arg_spec->reg_off);
+		if (err)
+			return err;
+		break;
+	case BPF_USDT_ARG_XMM_REG:
+		/* Same as above, but arg is an xmm reg, so can't look
+		 * in pt_regs, need to use special helper.
+		 * reg_off is the regno ("xmm0" -> regno 0, etc)
+		 */
+		btf_task = bpf_get_current_task_btf();
+		btf_regs = (struct pt_regs *)bpf_task_pt_regs(btf_task);
+		err = bpf_get_reg_val(&val, sizeof(val),
+				     ((u64)arg_spec->reg_off + BPF_GETREG_X86_XMM0) << 32,
+				     btf_regs, btf_task);
 		if (err)
 			return err;
 		break;
@@ -171,14 +187,14 @@  int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
 		 * from pt_regs, then do another user-space probe read to
 		 * fetch argument value itself.
 		 */
-		err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+		err = bpf_probe_read_kernel(&val.a, sizeof(val.a), (void *)ctx + arg_spec->reg_off);
 		if (err)
 			return err;
-		err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off);
+		err = bpf_probe_read_user(&val.a, sizeof(val.a), (void *)val.a + arg_spec->val_off);
 		if (err)
 			return err;
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-		val >>= arg_spec->arg_bitshift;
+		val.a >>= arg_spec->arg_bitshift;
 #endif
 		break;
 	default:
@@ -189,12 +205,12 @@  int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
 	 * necessary upper arg_bitshift bits, with sign extension if argument
 	 * is signed
 	 */
-	val <<= arg_spec->arg_bitshift;
+	val.a <<= arg_spec->arg_bitshift;
 	if (arg_spec->arg_signed)
-		val = ((long)val) >> arg_spec->arg_bitshift;
+		val.a = ((long)val.a) >> arg_spec->arg_bitshift;
 	else
-		val = val >> arg_spec->arg_bitshift;
-	*res = val;
+		val.a = val.a >> arg_spec->arg_bitshift;
+	*res = val.a;
 	return 0;
 }
 
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index f1c9339cfbbc..3cac48959ff9 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -199,6 +199,7 @@  enum usdt_arg_type {
 	USDT_ARG_CONST,
 	USDT_ARG_REG,
 	USDT_ARG_REG_DEREF,
+	USDT_ARG_XMM_REG,
 };
 
 /* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
@@ -1218,7 +1219,41 @@  static int calc_pt_regs_off(const char *reg_name)
 		}
 	}
 
-	pr_warn("usdt: unrecognized register '%s'\n", reg_name);
+	return -ENOENT;
+}
+
+static int calc_xmm_regno(const char *reg_name)
+{
+	static struct {
+		const char *name;
+		__u16 regno;
+	} xmm_reg_map[] = {
+		{ "xmm0",  0 },
+		{ "xmm1",  1 },
+		{ "xmm2",  2 },
+		{ "xmm3",  3 },
+		{ "xmm4",  4 },
+		{ "xmm5",  5 },
+		{ "xmm6",  6 },
+		{ "xmm7",  7 },
+#ifdef __x86_64__
+		{ "xmm8",  8 },
+		{ "xmm9",  9 },
+		{ "xmm10",  10 },
+		{ "xmm11",  11 },
+		{ "xmm12",  12 },
+		{ "xmm13",  13 },
+		{ "xmm14",  14 },
+		{ "xmm15",  15 },
+#endif
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(xmm_reg_map); i++) {
+		if (strcmp(reg_name, xmm_reg_map[i].name) == 0)
+			return xmm_reg_map[i].regno;
+	}
+
 	return -ENOENT;
 }
 
@@ -1234,18 +1269,26 @@  static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
 		arg->val_off = off;
 		reg_off = calc_pt_regs_off(reg_name);
 		free(reg_name);
-		if (reg_off < 0)
+		if (reg_off < 0) {
+			pr_warn("usdt: unrecognized register '%s'\n", reg_name);
 			return reg_off;
+		}
 		arg->reg_off = reg_off;
-	} else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, &reg_name, &len) == 2) {
+	} else if (sscanf(arg_str, " %d %*[f@] %%%ms %n", &arg_sz, &reg_name, &len) == 2) {
 		/* Register read case, e.g., -4@%eax */
 		arg->arg_type = USDT_ARG_REG;
 		arg->val_off = 0;
 
 		reg_off = calc_pt_regs_off(reg_name);
+		if (reg_off < 0) {
+			reg_off = calc_xmm_regno(reg_name);
+			arg->arg_type = USDT_ARG_XMM_REG;
+		}
 		free(reg_name);
-		if (reg_off < 0)
+		if (reg_off < 0) {
+			pr_warn("usdt: unrecognized register '%s'\n", reg_name);
 			return reg_off;
+		}
 		arg->reg_off = reg_off;
 	} else if (sscanf(arg_str, " %d @ $%ld %n", &arg_sz, &off, &len) == 2) {
 		/* Constant value case, e.g., 4@$71 */