diff mbox series

ceph/001: add extra check for remote object copies

Message ID 20220323160925.7142-1-lhenriques@suse.de (mailing list archive)
State New, archived
Headers show
Series ceph/001: add extra check for remote object copies | expand

Commit Message

Luis Henriques March 23, 2022, 4:09 p.m. UTC
Ceph kernel client now has a facility to check stats for certain operations.
One of these operations is the 'copyfrom', the operation that is used to offload
to the OSDs the copy of objects from, for example, the copy_file_range()
syscall.

This patch changes ceph/001 to add an extra check to verify that the copies
performed by the test are _really_ remote copies and not simple read+write
operations.

Signed-off-by: Luís Henriques <lhenriques@suse.de>
---
 common/ceph    | 10 ++++++++
 tests/ceph/001 | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)

Comments

Eryu Guan April 10, 2022, 3:15 p.m. UTC | #1
Hi Jeff,

On Wed, Mar 23, 2022 at 04:09:25PM +0000, Luís Henriques wrote:
> Ceph kernel client now has a facility to check stats for certain operations.
> One of these operations is the 'copyfrom', the operation that is used to offload
> to the OSDs the copy of objects from, for example, the copy_file_range()
> syscall.
> 
> This patch changes ceph/001 to add an extra check to verify that the copies
> performed by the test are _really_ remote copies and not simple read+write
> operations.
> 
> Signed-off-by: Luís Henriques <lhenriques@suse.de>

Would you please help review this cephfs test? Thanks!

Eryu

> ---
>  common/ceph    | 10 ++++++++
>  tests/ceph/001 | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 76 insertions(+)
> 
> diff --git a/common/ceph b/common/ceph
> index ca756dda8dd3..d6f24df177e7 100644
> --- a/common/ceph
> +++ b/common/ceph
> @@ -28,3 +28,13 @@ _require_ceph_vxattr_caps()
>  	$GETFATTR_PROG -n "ceph.caps" $TEST_DIR >/dev/null 2>&1 \
>  	  || _notrun "ceph.caps vxattr not supported"
>  }
> +
> +_ceph_get_cluster_fsid()
> +{
> +	$GETFATTR_PROG --only-values -n "ceph.cluster_fsid" $TEST_DIR 2>/dev/null
> +}
> +
> +_ceph_get_client_id()
> +{
> +	$GETFATTR_PROG --only-values -n "ceph.client_id" $TEST_DIR 2>/dev/null
> +}
> diff --git a/tests/ceph/001 b/tests/ceph/001
> index 5a828567d500..7970ce352bab 100755
> --- a/tests/ceph/001
> +++ b/tests/ceph/001
> @@ -30,6 +30,10 @@ workdir=$TEST_DIR/test-$seq
>  rm -rf $workdir
>  mkdir $workdir
>  
> +cluster_fsid=$(_ceph_get_cluster_fsid)
> +client_id=$(_ceph_get_client_id)
> +metrics_dir="$DEBUGFS_MNT/ceph/$cluster_fsid.$client_id/metrics"
> +
>  check_range()
>  {
>  	local file=$1
> @@ -40,8 +44,68 @@ check_range()
>  	[ $? -eq 0 ] && echo "file $file is not '$val' in [ $off0 $off1 ]"
>  }
>  
> +#
> +# The metrics file has the following fields:
> +#   1. item
> +#   2. total
> +#   3. avg_sz(bytes)
> +#   4. min_sz(bytes)
> +#   5. max_sz(bytes)
> +#   6. total_sz(bytes)
> +get_copyfrom_total_copies()
> +{
> +	local total=0
> +
> +	if [ -d $metrics_dir ]; then
> +		total=$(grep copyfrom $metrics_dir/size | tr -s '[:space:]' | cut -d ' ' -f 2)
> +	fi
> +	echo $total
> +}
> +get_copyfrom_total_size()
> +{
> +	local total=0
> +
> +	if [ -d $metrics_dir ]; then
> +		total=$(grep copyfrom $metrics_dir/size | tr -s '[:space:]' | cut -d ' ' -f 6)
> +	fi
> +	echo $total
> +}
> +
> +# This function checks that the metrics file has the expected values for number
> +# of remote object copies and the total size of the copies.  For this, it
> +# expects a input:
> +#   $1 - initial number copies in metrics file (field 'total')
> +#   $2 - initial total size in bytes in metrics file (field 'total_sz')
> +#   $3 - object size used for copies
> +#   $4 - number of remote objects copied
> +check_copyfrom_metrics()
> +{
> +	local c0=$1
> +	local s0=$2
> +	local objsz=$3
> +	local copies=$4
> +	local c1=$(get_copyfrom_total_copies)
> +	local s1=$(get_copyfrom_total_size)
> +	local sum
> +
> +	if [ ! -d $metrics_dir ]; then
> +		return # skip metrics check if debugfs isn't mounted
> +	fi
> +
> +	sum=$(($c0+$copies))
> +	if [ $sum -ne $c1 ]; then
> +		echo "Wrong number of remote copies.  Expected $sum, got $c1"
> +	fi
> +	sum=$(($s0+$copies*$objsz))
> +	if [ $sum -ne $s1 ]; then
> +		echo "Wrong size of remote copies.  Expected $sum, got $s1"
> +	fi
> +}
> +
>  run_copy_range_tests()
>  {
> +	total_copies=$(get_copyfrom_total_copies)
> +	total_size=$(get_copyfrom_total_size)
>  	objsz=$1
>  	halfobj=$(($objsz / 2))
>  	file="$workdir/file-$objsz"
> @@ -203,6 +267,8 @@ run_copy_range_tests()
>  	check_range $dest $(($objsz * 2 + $halfobj)) $objsz 63
>  	check_range $dest $(($objsz * 3 + $halfobj)) $halfobj 64
>  
> +	# Confirm that we've done a total of 24 object copies
> +	check_copyfrom_metrics $total_copies $total_size $objsz 24
>  }
>  
>  echo "Object size: 65536" # CEPH_MIN_STRIPE_UNIT
Jeff Layton April 15, 2022, 11:53 a.m. UTC | #2
On Wed, 2022-03-23 at 16:09 +0000, Luís Henriques wrote:
> Ceph kernel client now has a facility to check stats for certain operations.
> One of these operations is the 'copyfrom', the operation that is used to offload
> to the OSDs the copy of objects from, for example, the copy_file_range()
> syscall.
> 
> This patch changes ceph/001 to add an extra check to verify that the copies
> performed by the test are _really_ remote copies and not simple read+write
> operations.
> 
> Signed-off-by: Luís Henriques <lhenriques@suse.de>
> ---
>  common/ceph    | 10 ++++++++
>  tests/ceph/001 | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 76 insertions(+)
> 
> diff --git a/common/ceph b/common/ceph
> index ca756dda8dd3..d6f24df177e7 100644
> --- a/common/ceph
> +++ b/common/ceph
> @@ -28,3 +28,13 @@ _require_ceph_vxattr_caps()
>  	$GETFATTR_PROG -n "ceph.caps" $TEST_DIR >/dev/null 2>&1 \
>  	  || _notrun "ceph.caps vxattr not supported"
>  }
> +
> +_ceph_get_cluster_fsid()
> +{
> +	$GETFATTR_PROG --only-values -n "ceph.cluster_fsid" $TEST_DIR 2>/dev/null
> +}
> +
> +_ceph_get_client_id()
> +{
> +	$GETFATTR_PROG --only-values -n "ceph.client_id" $TEST_DIR 2>/dev/null
> +}
> diff --git a/tests/ceph/001 b/tests/ceph/001
> index 5a828567d500..7970ce352bab 100755
> --- a/tests/ceph/001
> +++ b/tests/ceph/001
> @@ -30,6 +30,10 @@ workdir=$TEST_DIR/test-$seq
>  rm -rf $workdir
>  mkdir $workdir
>  
> +cluster_fsid=$(_ceph_get_cluster_fsid)
> +client_id=$(_ceph_get_client_id)
> +metrics_dir="$DEBUGFS_MNT/ceph/$cluster_fsid.$client_id/metrics"
> +
>  check_range()
>  {
>  	local file=$1
> @@ -40,8 +44,68 @@ check_range()
>  	[ $? -eq 0 ] && echo "file $file is not '$val' in [ $off0 $off1 ]"
>  }
>  
> +#
> +# The metrics file has the following fields:
> +#   1. item
> +#   2. total
> +#   3. avg_sz(bytes)
> +#   4. min_sz(bytes)
> +#   5. max_sz(bytes)
> +#   6. total_sz(bytes)
> +get_copyfrom_total_copies()
> +{
> +	local total=0
> +
> +	if [ -d $metrics_dir ]; then
> +		total=$(grep copyfrom $metrics_dir/size | tr -s '[:space:]' | cut -d ' ' -f 2)

Some older kernels didn't keep copyfrom stats, but any kernel that has a
$metrics_dir should have stats for copyfrom. This should be fine.

> +	fi
> +	echo $total
> +}
> +get_copyfrom_total_size()
> +{
> +	local total=0
> +
> +	if [ -d $metrics_dir ]; then
> +		total=$(grep copyfrom $metrics_dir/size | tr -s '[:space:]' | cut -d ' ' -f 6)
> +	fi
> +	echo $total
> +}
> +
> +# This function checks that the metrics file has the expected values for number
> +# of remote object copies and the total size of the copies.  For this, it
> +# expects a input:
> +#   $1 - initial number copies in metrics file (field 'total')
> +#   $2 - initial total size in bytes in metrics file (field 'total_sz')
> +#   $3 - object size used for copies
> +#   $4 - number of remote objects copied
> +check_copyfrom_metrics()
> +{
> +	local c0=$1
> +	local s0=$2
> +	local objsz=$3
> +	local copies=$4
> +	local c1=$(get_copyfrom_total_copies)
> +	local s1=$(get_copyfrom_total_size)
> +	local sum
> +
> +	if [ ! -d $metrics_dir ]; then
> +		return # skip metrics check if debugfs isn't mounted
> +	fi
> +
> +	sum=$(($c0+$copies))
> +	if [ $sum -ne $c1 ]; then
> +		echo "Wrong number of remote copies.  Expected $sum, got $c1"
> +	fi
> +	sum=$(($s0+$copies*$objsz))
> +	if [ $sum -ne $s1 ]; then
> +		echo "Wrong size of remote copies.  Expected $sum, got $s1"
> +	fi
> +}
> +
>  run_copy_range_tests()
>  {
> +	total_copies=$(get_copyfrom_total_copies)
> +	total_size=$(get_copyfrom_total_size)
>  	objsz=$1
>  	halfobj=$(($objsz / 2))
>  	file="$workdir/file-$objsz"
> @@ -203,6 +267,8 @@ run_copy_range_tests()
>  	check_range $dest $(($objsz * 2 + $halfobj)) $objsz 63
>  	check_range $dest $(($objsz * 3 + $halfobj)) $halfobj 64
>  
> +	# Confirm that we've done a total of 24 object copies
> +	check_copyfrom_metrics $total_copies $total_size $objsz 24
>  }
>  
>  echo "Object size: 65536" # CEPH_MIN_STRIPE_UNIT

LGTM

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Luis Henriques April 18, 2022, 9:31 a.m. UTC | #3
On Fri, Apr 15, 2022 at 07:53:19AM -0400, Jeff Layton wrote:
> On Wed, 2022-03-23 at 16:09 +0000, Luís Henriques wrote:
> > Ceph kernel client now has a facility to check stats for certain operations.
> > One of these operations is the 'copyfrom', the operation that is used to offload
> > to the OSDs the copy of objects from, for example, the copy_file_range()
> > syscall.
> > 
> > This patch changes ceph/001 to add an extra check to verify that the copies
> > performed by the test are _really_ remote copies and not simple read+write
> > operations.
> > 
> > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > ---
> >  common/ceph    | 10 ++++++++
> >  tests/ceph/001 | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 76 insertions(+)
> > 
> > diff --git a/common/ceph b/common/ceph
> > index ca756dda8dd3..d6f24df177e7 100644
> > --- a/common/ceph
> > +++ b/common/ceph
> > @@ -28,3 +28,13 @@ _require_ceph_vxattr_caps()
> >  	$GETFATTR_PROG -n "ceph.caps" $TEST_DIR >/dev/null 2>&1 \
> >  	  || _notrun "ceph.caps vxattr not supported"
> >  }
> > +
> > +_ceph_get_cluster_fsid()
> > +{
> > +	$GETFATTR_PROG --only-values -n "ceph.cluster_fsid" $TEST_DIR 2>/dev/null
> > +}
> > +
> > +_ceph_get_client_id()
> > +{
> > +	$GETFATTR_PROG --only-values -n "ceph.client_id" $TEST_DIR 2>/dev/null
> > +}
> > diff --git a/tests/ceph/001 b/tests/ceph/001
> > index 5a828567d500..7970ce352bab 100755
> > --- a/tests/ceph/001
> > +++ b/tests/ceph/001
> > @@ -30,6 +30,10 @@ workdir=$TEST_DIR/test-$seq
> >  rm -rf $workdir
> >  mkdir $workdir
> >  
> > +cluster_fsid=$(_ceph_get_cluster_fsid)
> > +client_id=$(_ceph_get_client_id)
> > +metrics_dir="$DEBUGFS_MNT/ceph/$cluster_fsid.$client_id/metrics"
> > +
> >  check_range()
> >  {
> >  	local file=$1
> > @@ -40,8 +44,68 @@ check_range()
> >  	[ $? -eq 0 ] && echo "file $file is not '$val' in [ $off0 $off1 ]"
> >  }
> >  
> > +#
> > +# The metrics file has the following fields:
> > +#   1. item
> > +#   2. total
> > +#   3. avg_sz(bytes)
> > +#   4. min_sz(bytes)
> > +#   5. max_sz(bytes)
> > +#   6. total_sz(bytes)
> > +get_copyfrom_total_copies()
> > +{
> > +	local total=0
> > +
> > +	if [ -d $metrics_dir ]; then
> > +		total=$(grep copyfrom $metrics_dir/size | tr -s '[:space:]' | cut -d ' ' -f 2)
> 
> Some older kernels didn't keep copyfrom stats, but any kernel that has a
> $metrics_dir should have stats for copyfrom. This should be fine.
> 
> > +	fi
> > +	echo $total
> > +}
> > +get_copyfrom_total_size()
> > +{
> > +	local total=0
> > +
> > +	if [ -d $metrics_dir ]; then
> > +		total=$(grep copyfrom $metrics_dir/size | tr -s '[:space:]' | cut -d ' ' -f 6)
> > +	fi
> > +	echo $total
> > +}
> > +
> > +# This function checks that the metrics file has the expected values for number
> > +# of remote object copies and the total size of the copies.  For this, it
> > +# expects a input:
> > +#   $1 - initial number copies in metrics file (field 'total')
> > +#   $2 - initial total size in bytes in metrics file (field 'total_sz')
> > +#   $3 - object size used for copies
> > +#   $4 - number of remote objects copied
> > +check_copyfrom_metrics()
> > +{
> > +	local c0=$1
> > +	local s0=$2
> > +	local objsz=$3
> > +	local copies=$4
> > +	local c1=$(get_copyfrom_total_copies)
> > +	local s1=$(get_copyfrom_total_size)
> > +	local sum
> > +
> > +	if [ ! -d $metrics_dir ]; then
> > +		return # skip metrics check if debugfs isn't mounted
> > +	fi
> > +
> > +	sum=$(($c0+$copies))
> > +	if [ $sum -ne $c1 ]; then
> > +		echo "Wrong number of remote copies.  Expected $sum, got $c1"
> > +	fi
> > +	sum=$(($s0+$copies*$objsz))
> > +	if [ $sum -ne $s1 ]; then
> > +		echo "Wrong size of remote copies.  Expected $sum, got $s1"
> > +	fi
> > +}
> > +
> >  run_copy_range_tests()
> >  {
> > +	total_copies=$(get_copyfrom_total_copies)
> > +	total_size=$(get_copyfrom_total_size)
> >  	objsz=$1
> >  	halfobj=$(($objsz / 2))
> >  	file="$workdir/file-$objsz"
> > @@ -203,6 +267,8 @@ run_copy_range_tests()
> >  	check_range $dest $(($objsz * 2 + $halfobj)) $objsz 63
> >  	check_range $dest $(($objsz * 3 + $halfobj)) $halfobj 64
> >  
> > +	# Confirm that we've done a total of 24 object copies
> > +	check_copyfrom_metrics $total_copies $total_size $objsz 24
> >  }
> >  
> >  echo "Object size: 65536" # CEPH_MIN_STRIPE_UNIT
> 
> LGTM
> 
> Reviewed-by: Jeff Layton <jlayton@kernel.org>

Thanks, Jeff!

Cheers,
--
Luís
diff mbox series

Patch

diff --git a/common/ceph b/common/ceph
index ca756dda8dd3..d6f24df177e7 100644
--- a/common/ceph
+++ b/common/ceph
@@ -28,3 +28,13 @@  _require_ceph_vxattr_caps()
 	$GETFATTR_PROG -n "ceph.caps" $TEST_DIR >/dev/null 2>&1 \
 	  || _notrun "ceph.caps vxattr not supported"
 }
+
+_ceph_get_cluster_fsid()
+{
+	$GETFATTR_PROG --only-values -n "ceph.cluster_fsid" $TEST_DIR 2>/dev/null
+}
+
+_ceph_get_client_id()
+{
+	$GETFATTR_PROG --only-values -n "ceph.client_id" $TEST_DIR 2>/dev/null
+}
diff --git a/tests/ceph/001 b/tests/ceph/001
index 5a828567d500..7970ce352bab 100755
--- a/tests/ceph/001
+++ b/tests/ceph/001
@@ -30,6 +30,10 @@  workdir=$TEST_DIR/test-$seq
 rm -rf $workdir
 mkdir $workdir
 
+cluster_fsid=$(_ceph_get_cluster_fsid)
+client_id=$(_ceph_get_client_id)
+metrics_dir="$DEBUGFS_MNT/ceph/$cluster_fsid.$client_id/metrics"
+
 check_range()
 {
 	local file=$1
@@ -40,8 +44,68 @@  check_range()
 	[ $? -eq 0 ] && echo "file $file is not '$val' in [ $off0 $off1 ]"
 }
 
+#
+# The metrics file has the following fields:
+#   1. item
+#   2. total
+#   3. avg_sz(bytes)
+#   4. min_sz(bytes)
+#   5. max_sz(bytes)
+#   6. total_sz(bytes)
+get_copyfrom_total_copies()
+{
+	local total=0
+
+	if [ -d $metrics_dir ]; then
+		total=$(grep copyfrom $metrics_dir/size | tr -s '[:space:]' | cut -d ' ' -f 2)
+	fi
+	echo $total
+}
+get_copyfrom_total_size()
+{
+	local total=0
+
+	if [ -d $metrics_dir ]; then
+		total=$(grep copyfrom $metrics_dir/size | tr -s '[:space:]' | cut -d ' ' -f 6)
+	fi
+	echo $total
+}
+
+# This function checks that the metrics file has the expected values for number
+# of remote object copies and the total size of the copies.  For this, it
+# expects a input:
+#   $1 - initial number copies in metrics file (field 'total')
+#   $2 - initial total size in bytes in metrics file (field 'total_sz')
+#   $3 - object size used for copies
+#   $4 - number of remote objects copied
+check_copyfrom_metrics()
+{
+	local c0=$1
+	local s0=$2
+	local objsz=$3
+	local copies=$4
+	local c1=$(get_copyfrom_total_copies)
+	local s1=$(get_copyfrom_total_size)
+	local sum
+
+	if [ ! -d $metrics_dir ]; then
+		return # skip metrics check if debugfs isn't mounted
+	fi
+
+	sum=$(($c0+$copies))
+	if [ $sum -ne $c1 ]; then
+		echo "Wrong number of remote copies.  Expected $sum, got $c1"
+	fi
+	sum=$(($s0+$copies*$objsz))
+	if [ $sum -ne $s1 ]; then
+		echo "Wrong size of remote copies.  Expected $sum, got $s1"
+	fi
+}
+
 run_copy_range_tests()
 {
+	total_copies=$(get_copyfrom_total_copies)
+	total_size=$(get_copyfrom_total_size)
 	objsz=$1
 	halfobj=$(($objsz / 2))
 	file="$workdir/file-$objsz"
@@ -203,6 +267,8 @@  run_copy_range_tests()
 	check_range $dest $(($objsz * 2 + $halfobj)) $objsz 63
 	check_range $dest $(($objsz * 3 + $halfobj)) $halfobj 64
 
+	# Confirm that we've done a total of 24 object copies
+	check_copyfrom_metrics $total_copies $total_size $objsz 24
 }
 
 echo "Object size: 65536" # CEPH_MIN_STRIPE_UNIT