diff mbox series

[v3,5/8] block/write-threshold: don't use aio context lock

Message ID 20210506090621.11848-6-vsementsov@virtuozzo.com (mailing list archive)
State New, archived
Headers show
Series block: refactor write threshold | expand

Commit Message

Vladimir Sementsov-Ogievskiy May 6, 2021, 9:06 a.m. UTC
Instead of relying on aio context lock, let's make use of atomic
operations.

The tricky place is bdrv_write_threshold_check_write(): we want
atomically unset bs->write_threshold_offset iff
  offset + bytes > bs->write_threshold_offset
We don't have such atomic operation, so let's go in a loop:

1. fetch wtr atomically
2. if condition satisfied, try cmpxchg (if not satisfied, we are done,
   don't send event)
3. if cmpxchg succeeded, we are done (send event), else go to [1]

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 include/block/block_int.h       |  6 +++++-
 include/block/write-threshold.h |  6 ++++++
 block/write-threshold.c         | 34 +++++++++++++++++----------------
 3 files changed, 29 insertions(+), 17 deletions(-)

Comments

Paolo Bonzini May 7, 2021, 1:45 p.m. UTC | #1
On 06/05/21 11:06, Vladimir Sementsov-Ogievskiy wrote:
>   void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset,
>                                         int64_t bytes)
>   {
>       int64_t end = offset + bytes;
> -    uint64_t wtr = bs->write_threshold_offset;
> +    uint64_t wtr;
>   
> -    if (wtr > 0 && end > wtr) {
> -        qapi_event_send_block_write_threshold(bs->node_name, end - wtr, wtr);
> +retry:
> +    wtr = bdrv_write_threshold_get(bs);
> +    if (wtr == 0 || wtr >= end) {
> +        return;
> +    }
>   
> -        /* autodisable to avoid flooding the monitor */
> -        bdrv_write_threshold_set(bs, 0);
> +    /* autodisable to avoid flooding the monitor */
> +    if (qatomic_cmpxchg(&bs->write_threshold_offset, wtr, 0) != wtr) {
> +        /* bs->write_threshold_offset changed in parallel */
> +        goto retry;
>       }
> +
> +    /* We have cleared bs->write_threshold_offset, so let's send event */
> +    qapi_event_send_block_write_threshold(bs->node_name, end - wtr, wtr);
>   }
> 

This has the problem that 64-bit atomics are not always possible on 
32-bit builds.  We can use a spinlock (and probably just drop this patch 
for now).

Paolo
Vladimir Sementsov-Ogievskiy May 10, 2021, 9:30 a.m. UTC | #2
07.05.2021 16:45, Paolo Bonzini wrote:
> On 06/05/21 11:06, Vladimir Sementsov-Ogievskiy wrote:
>>   void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset,
>>                                         int64_t bytes)
>>   {
>>       int64_t end = offset + bytes;
>> -    uint64_t wtr = bs->write_threshold_offset;
>> +    uint64_t wtr;
>> -    if (wtr > 0 && end > wtr) {
>> -        qapi_event_send_block_write_threshold(bs->node_name, end - wtr, wtr);
>> +retry:
>> +    wtr = bdrv_write_threshold_get(bs);
>> +    if (wtr == 0 || wtr >= end) {
>> +        return;
>> +    }
>> -        /* autodisable to avoid flooding the monitor */
>> -        bdrv_write_threshold_set(bs, 0);
>> +    /* autodisable to avoid flooding the monitor */
>> +    if (qatomic_cmpxchg(&bs->write_threshold_offset, wtr, 0) != wtr) {
>> +        /* bs->write_threshold_offset changed in parallel */
>> +        goto retry;
>>       }
>> +
>> +    /* We have cleared bs->write_threshold_offset, so let's send event */
>> +    qapi_event_send_block_write_threshold(bs->node_name, end - wtr, wtr);
>>   }
>>
> 
> This has the problem that 64-bit atomics are not always possible on 32-bit builds.  We can use a spinlock (and probably just drop this patch for now).
> 
> Paolo
> 

OK, let's just drop it for now, the series originally not intended to make something thread-safe, but only to clear the way for.

(And honestly I doubt that write-threshold worth the complexity of this atomic cmpxchg retry loop, mutex would be simpler anyway)
diff mbox series

Patch

diff --git a/include/block/block_int.h b/include/block/block_int.h
index eab352f363..e3f3d79f5b 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -954,7 +954,11 @@  struct BlockDriverState {
      */
     int64_t total_sectors;
 
-    /* threshold limit for writes, in bytes. "High water mark". */
+    /*
+     * Threshold limit for writes, in bytes. "High water mark".
+     * Don't access directly, use bdrw_write_threshold* interface.
+     * Protected by atomic access, no lock is needed.
+     */
     uint64_t write_threshold_offset;
 
     /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h
index c60b9954cd..28c35a1c05 100644
--- a/include/block/write-threshold.h
+++ b/include/block/write-threshold.h
@@ -24,6 +24,8 @@ 
  * To be used with thin-provisioned block devices.
  *
  * Use threshold_bytes == 0 to disable.
+ *
+ * Function is thread-safe, no lock is needed.
  */
 void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes);
 
@@ -32,6 +34,8 @@  void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes);
  *
  * Get the configured write threshold, in bytes.
  * Zero means no threshold configured.
+ *
+ * Function is thread-safe, no lock is needed.
  */
 uint64_t bdrv_write_threshold_get(const BlockDriverState *bs);
 
@@ -40,6 +44,8 @@  uint64_t bdrv_write_threshold_get(const BlockDriverState *bs);
  *
  * Check whether the specified request exceeds the write threshold.
  * If it is, send corresponding event and disable write threshold checking.
+ *
+ * Function is thread-safe, no lock is needed.
  */
 void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset,
                                       int64_t bytes);
diff --git a/block/write-threshold.c b/block/write-threshold.c
index 65a6acd142..8b46bb9a75 100644
--- a/block/write-threshold.c
+++ b/block/write-threshold.c
@@ -2,6 +2,7 @@ 
  * QEMU System Emulator block write threshold notification
  *
  * Copyright Red Hat, Inc. 2014
+ * Copyright (c) 2021 Virtuozzo International GmbH.
  *
  * Authors:
  *  Francesco Romani <fromani@redhat.com>
@@ -14,6 +15,7 @@ 
 #include "block/block_int.h"
 #include "qemu/coroutine.h"
 #include "block/write-threshold.h"
+#include "qemu/atomic.h"
 #include "qemu/notify.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-block-core.h"
@@ -21,45 +23,45 @@ 
 
 uint64_t bdrv_write_threshold_get(const BlockDriverState *bs)
 {
-    return bs->write_threshold_offset;
+    return qatomic_read(&bs->write_threshold_offset);
 }
 
 void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes)
 {
-    bs->write_threshold_offset = threshold_bytes;
+    qatomic_set(&bs->write_threshold_offset, threshold_bytes);
 }
 
 void qmp_block_set_write_threshold(const char *node_name,
                                    uint64_t threshold_bytes,
                                    Error **errp)
 {
-    BlockDriverState *bs;
-    AioContext *aio_context;
-
-    bs = bdrv_find_node(node_name);
+    BlockDriverState *bs = bdrv_find_node(node_name);
     if (!bs) {
         error_setg(errp, "Device '%s' not found", node_name);
         return;
     }
 
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
-
     bdrv_write_threshold_set(bs, threshold_bytes);
-
-    aio_context_release(aio_context);
 }
 
 void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset,
                                       int64_t bytes)
 {
     int64_t end = offset + bytes;
-    uint64_t wtr = bs->write_threshold_offset;
+    uint64_t wtr;
 
-    if (wtr > 0 && end > wtr) {
-        qapi_event_send_block_write_threshold(bs->node_name, end - wtr, wtr);
+retry:
+    wtr = bdrv_write_threshold_get(bs);
+    if (wtr == 0 || wtr >= end) {
+        return;
+    }
 
-        /* autodisable to avoid flooding the monitor */
-        bdrv_write_threshold_set(bs, 0);
+    /* autodisable to avoid flooding the monitor */
+    if (qatomic_cmpxchg(&bs->write_threshold_offset, wtr, 0) != wtr) {
+        /* bs->write_threshold_offset changed in parallel */
+        goto retry;
     }
+
+    /* We have cleared bs->write_threshold_offset, so let's send event */
+    qapi_event_send_block_write_threshold(bs->node_name, end - wtr, wtr);
 }