diff mbox series

[1/2] blk-cgroup: fix list corruption from resetting io stat

Message ID 20240515013157.443672-2-ming.lei@redhat.com (mailing list archive)
State New
Headers show
Series blk-cgroup: two fixes on list corruption | expand

Commit Message

Ming Lei May 15, 2024, 1:31 a.m. UTC
Since commit 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()"),
each iostat instance is added to blkcg percpu list, so blkcg_reset_stats()
can't reset the stat instance by memset(), otherwise the llist may be
corrupted.

Fix the issue by only resetting the counter part.

Cc: Tejun Heo <tj@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Jay Shin <jaeshin@redhat.com>
Fixes: 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-cgroup.c | 58 ++++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 23 deletions(-)

Comments

Waiman Long May 15, 2024, 1:59 p.m. UTC | #1
On 5/14/24 21:31, Ming Lei wrote:
> Since commit 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()"),
> each iostat instance is added to blkcg percpu list, so blkcg_reset_stats()
> can't reset the stat instance by memset(), otherwise the llist may be
> corrupted.
>
> Fix the issue by only resetting the counter part.
>
> Cc: Tejun Heo <tj@kernel.org>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Jay Shin <jaeshin@redhat.com>
> Fixes: 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()")
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>   block/blk-cgroup.c | 58 ++++++++++++++++++++++++++++------------------
>   1 file changed, 35 insertions(+), 23 deletions(-)
>
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index 059467086b13..86752b1652b5 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -619,12 +619,45 @@ static void blkg_destroy_all(struct gendisk *disk)
>   	spin_unlock_irq(&q->queue_lock);
>   }
>   
> +static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
> +{
> +	int i;
> +
> +	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
> +		dst->bytes[i] = src->bytes[i];
> +		dst->ios[i] = src->ios[i];
> +	}
> +}
> +
> +static void __blkg_clear_stat(struct blkg_iostat_set *bis)
> +{
> +	struct blkg_iostat cur = {0};
> +	unsigned long flags;
> +
> +	flags = u64_stats_update_begin_irqsave(&bis->sync);
> +	blkg_iostat_set(&bis->cur, &cur);
> +	blkg_iostat_set(&bis->last, &cur);
> +	u64_stats_update_end_irqrestore(&bis->sync, flags);
> +}
> +
> +static void blkg_clear_stat(struct blkcg_gq *blkg)
> +{
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu) {
> +		struct blkg_iostat_set *s = per_cpu_ptr(blkg->iostat_cpu, cpu);
> +
> +		__blkg_clear_stat(s);
> +	}
> +	__blkg_clear_stat(&blkg->iostat);
> +}
> +
>   static int blkcg_reset_stats(struct cgroup_subsys_state *css,
>   			     struct cftype *cftype, u64 val)
>   {
>   	struct blkcg *blkcg = css_to_blkcg(css);
>   	struct blkcg_gq *blkg;
> -	int i, cpu;
> +	int i;
>   
>   	mutex_lock(&blkcg_pol_mutex);
>   	spin_lock_irq(&blkcg->lock);
> @@ -635,18 +668,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
>   	 * anyway.  If you get hit by a race, retry.
>   	 */
>   	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
> -		for_each_possible_cpu(cpu) {
> -			struct blkg_iostat_set *bis =
> -				per_cpu_ptr(blkg->iostat_cpu, cpu);
> -			memset(bis, 0, sizeof(*bis));
> -
> -			/* Re-initialize the cleared blkg_iostat_set */
> -			u64_stats_init(&bis->sync);
> -			bis->blkg = blkg;
> -		}
> -		memset(&blkg->iostat, 0, sizeof(blkg->iostat));
> -		u64_stats_init(&blkg->iostat.sync);
> -
> +		blkg_clear_stat(blkg);
>   		for (i = 0; i < BLKCG_MAX_POLS; i++) {
>   			struct blkcg_policy *pol = blkcg_policy[i];
>   
> @@ -949,16 +971,6 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
>   }
>   EXPORT_SYMBOL_GPL(blkg_conf_exit);
>   
> -static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
> -{
> -	int i;
> -
> -	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
> -		dst->bytes[i] = src->bytes[i];
> -		dst->ios[i] = src->ios[i];
> -	}
> -}
> -
>   static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
>   {
>   	int i;
Reviewed-by: Waiman Long <longman@redhat.com>
Tejun Heo May 15, 2024, 4:49 p.m. UTC | #2
On Wed, May 15, 2024 at 09:31:56AM +0800, Ming Lei wrote:
> Since commit 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()"),
> each iostat instance is added to blkcg percpu list, so blkcg_reset_stats()
> can't reset the stat instance by memset(), otherwise the llist may be
> corrupted.
> 
> Fix the issue by only resetting the counter part.
> 
> Cc: Tejun Heo <tj@kernel.org>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Jay Shin <jaeshin@redhat.com>
> Fixes: 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()")
> Signed-off-by: Ming Lei <ming.lei@redhat.com>

Acked-by: Tejun Heo <tj@kernel.org>

Thanks.
diff mbox series

Patch

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 059467086b13..86752b1652b5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -619,12 +619,45 @@  static void blkg_destroy_all(struct gendisk *disk)
 	spin_unlock_irq(&q->queue_lock);
 }
 
+static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+	int i;
+
+	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+		dst->bytes[i] = src->bytes[i];
+		dst->ios[i] = src->ios[i];
+	}
+}
+
+static void __blkg_clear_stat(struct blkg_iostat_set *bis)
+{
+	struct blkg_iostat cur = {0};
+	unsigned long flags;
+
+	flags = u64_stats_update_begin_irqsave(&bis->sync);
+	blkg_iostat_set(&bis->cur, &cur);
+	blkg_iostat_set(&bis->last, &cur);
+	u64_stats_update_end_irqrestore(&bis->sync, flags);
+}
+
+static void blkg_clear_stat(struct blkcg_gq *blkg)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct blkg_iostat_set *s = per_cpu_ptr(blkg->iostat_cpu, cpu);
+
+		__blkg_clear_stat(s);
+	}
+	__blkg_clear_stat(&blkg->iostat);
+}
+
 static int blkcg_reset_stats(struct cgroup_subsys_state *css,
 			     struct cftype *cftype, u64 val)
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
 	struct blkcg_gq *blkg;
-	int i, cpu;
+	int i;
 
 	mutex_lock(&blkcg_pol_mutex);
 	spin_lock_irq(&blkcg->lock);
@@ -635,18 +668,7 @@  static int blkcg_reset_stats(struct cgroup_subsys_state *css,
 	 * anyway.  If you get hit by a race, retry.
 	 */
 	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
-		for_each_possible_cpu(cpu) {
-			struct blkg_iostat_set *bis =
-				per_cpu_ptr(blkg->iostat_cpu, cpu);
-			memset(bis, 0, sizeof(*bis));
-
-			/* Re-initialize the cleared blkg_iostat_set */
-			u64_stats_init(&bis->sync);
-			bis->blkg = blkg;
-		}
-		memset(&blkg->iostat, 0, sizeof(blkg->iostat));
-		u64_stats_init(&blkg->iostat.sync);
-
+		blkg_clear_stat(blkg);
 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
 			struct blkcg_policy *pol = blkcg_policy[i];
 
@@ -949,16 +971,6 @@  void blkg_conf_exit(struct blkg_conf_ctx *ctx)
 }
 EXPORT_SYMBOL_GPL(blkg_conf_exit);
 
-static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
-{
-	int i;
-
-	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
-		dst->bytes[i] = src->bytes[i];
-		dst->ios[i] = src->ios[i];
-	}
-}
-
 static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
 {
 	int i;