diff mbox series

[v1,29/29] virtio-mem: Big Block Mode (BBM) - safe memory hotunplug

Message ID 20201012125323.17509-30-david@redhat.com (mailing list archive)
State New, archived
Headers show
Series virtio-mem: Big Block Mode (BBM) | expand

Commit Message

David Hildenbrand Oct. 12, 2020, 12:53 p.m. UTC
Let's add a safe mechanism to unplug memory, avoiding long/endless loops
when trying to offline memory - similar to in SBM.

Fake-offline all memory (via alloc_contig_range()) before trying to
offline+remove it. Use this mode as default, but allow to enable the other
mode explicitly (which could give better memory hotunplug guarantees in
some environments).

The "unsafe" mode can be enabled e.g., via virtio_mem.bbm_safe_unplug=0
on the cmdline.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 drivers/virtio/virtio_mem.c | 97 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 95 insertions(+), 2 deletions(-)

Comments

Wei Yang Oct. 19, 2020, 7:54 a.m. UTC | #1
On Mon, Oct 12, 2020 at 02:53:23PM +0200, David Hildenbrand wrote:
>Let's add a safe mechanism to unplug memory, avoiding long/endless loops
>when trying to offline memory - similar to in SBM.
>
>Fake-offline all memory (via alloc_contig_range()) before trying to
>offline+remove it. Use this mode as default, but allow to enable the other
>mode explicitly (which could give better memory hotunplug guarantees in

I don't get the point how unsafe mode would have a better guarantees?

>some environments).
>
>The "unsafe" mode can be enabled e.g., via virtio_mem.bbm_safe_unplug=0
>on the cmdline.
>
>Cc: "Michael S. Tsirkin" <mst@redhat.com>
>Cc: Jason Wang <jasowang@redhat.com>
>Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
>Cc: Michal Hocko <mhocko@kernel.org>
>Cc: Oscar Salvador <osalvador@suse.de>
>Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
>Cc: Andrew Morton <akpm@linux-foundation.org>
>Signed-off-by: David Hildenbrand <david@redhat.com>
>---
> drivers/virtio/virtio_mem.c | 97 ++++++++++++++++++++++++++++++++++++-
> 1 file changed, 95 insertions(+), 2 deletions(-)
>
>diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
>index 6bcd0acbff32..09f11489be6f 100644
>--- a/drivers/virtio/virtio_mem.c
>+++ b/drivers/virtio/virtio_mem.c
>@@ -37,6 +37,11 @@ module_param(bbm_block_size, ulong, 0444);
> MODULE_PARM_DESC(bbm_block_size,
> 		 "Big Block size in bytes. Default is 0 (auto-detection).");
> 
>+static bool bbm_safe_unplug = true;
>+module_param(bbm_safe_unplug, bool, 0444);
>+MODULE_PARM_DESC(bbm_safe_unplug,
>+	     "Use a safe unplug mechanism in BBM, avoiding long/endless loops");
>+
> /*
>  * virtio-mem currently supports the following modes of operation:
>  *
>@@ -87,6 +92,8 @@ enum virtio_mem_bbm_bb_state {
> 	VIRTIO_MEM_BBM_BB_PLUGGED,
> 	/* Plugged and added to Linux. */
> 	VIRTIO_MEM_BBM_BB_ADDED,
>+	/* All online parts are fake-offline, ready to remove. */
>+	VIRTIO_MEM_BBM_BB_FAKE_OFFLINE,
> 	VIRTIO_MEM_BBM_BB_COUNT
> };
> 
>@@ -889,6 +896,32 @@ static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm,
> 	}
> }
> 
>+static void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm,
>+						unsigned long bb_id,
>+						unsigned long pfn,
>+						unsigned long nr_pages)
>+{
>+	/*
>+	 * When marked as "fake-offline", all online memory of this device block
>+	 * is allocated by us. Otherwise, we don't have any memory allocated.
>+	 */
>+	if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
>+	    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
>+		return;
>+	virtio_mem_fake_offline_going_offline(pfn, nr_pages);
>+}
>+
>+static void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm,
>+						 unsigned long bb_id,
>+						 unsigned long pfn,
>+						 unsigned long nr_pages)
>+{
>+	if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
>+	    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
>+		return;
>+	virtio_mem_fake_offline_cancel_offline(pfn, nr_pages);
>+}
>+
> /*
>  * This callback will either be called synchronously from add_memory() or
>  * asynchronously (e.g., triggered via user space). We have to be careful
>@@ -949,6 +982,10 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
> 		vm->hotplug_active = true;
> 		if (vm->in_sbm)
> 			virtio_mem_sbm_notify_going_offline(vm, id);
>+		else
>+			virtio_mem_bbm_notify_going_offline(vm, id,
>+							    mhp->start_pfn,
>+							    mhp->nr_pages);
> 		break;
> 	case MEM_GOING_ONLINE:
> 		mutex_lock(&vm->hotplug_mutex);
>@@ -999,6 +1036,10 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
> 			break;
> 		if (vm->in_sbm)
> 			virtio_mem_sbm_notify_cancel_offline(vm, id);
>+		else
>+			virtio_mem_bbm_notify_cancel_offline(vm, id,
>+							     mhp->start_pfn,
>+							     mhp->nr_pages);
> 		vm->hotplug_active = false;
> 		mutex_unlock(&vm->hotplug_mutex);
> 		break;
>@@ -1189,7 +1230,13 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
> 			do_online = virtio_mem_sbm_test_sb_plugged(vm, id,
> 								   sb_id, 1);
> 		} else {
>-			do_online = true;
>+			/*
>+			 * If the whole block is marked fake offline, keep
>+			 * everything that way.
>+			 */
>+			id = virtio_mem_phys_to_bb_id(vm, addr);
>+			do_online = virtio_mem_bbm_get_bb_state(vm, id) !=
>+				    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE;
> 		}
> 		if (do_online)
> 			generic_online_page(page, order);
>@@ -1969,15 +2016,50 @@ static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
> static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
> 						       unsigned long bb_id)
> {
>+	const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
>+	const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
>+	unsigned long end_pfn = start_pfn + nr_pages;
>+	unsigned long pfn;
>+	struct page *page;
> 	int rc;
> 
> 	if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
> 			 VIRTIO_MEM_BBM_BB_ADDED))
> 		return -EINVAL;
> 
>+	if (bbm_safe_unplug) {
>+		/*
>+		 * Start by fake-offlining all memory. Once we marked the device
>+		 * block as fake-offline, all newly onlined memory will
>+		 * automatically be kept fake-offline. Protect from concurrent
>+		 * onlining/offlining until we have a consistent state.
>+		 */
>+		mutex_lock(&vm->hotplug_mutex);
>+		virtio_mem_bbm_set_bb_state(vm, bb_id,
>+					    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
>+

State is set here.

>+		for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>+			page = pfn_to_online_page(pfn);
>+			if (!page)
>+				continue;
>+
>+			rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION);
>+			if (rc) {
>+				end_pfn = pfn;
>+				goto rollback_safe_unplug;
>+			}
>+		}
>+		mutex_unlock(&vm->hotplug_mutex);
>+	}
>+
> 	rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
>-	if (rc)
>+	if (rc) {
>+		if (bbm_safe_unplug) {
>+			mutex_lock(&vm->hotplug_mutex);
>+			goto rollback_safe_unplug;
>+		}
> 		return rc;
>+	}
> 
> 	rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
> 	if (rc)

And changed to PLUGGED or UNUSED based on rc.

>@@ -1987,6 +2069,17 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
> 		virtio_mem_bbm_set_bb_state(vm, bb_id,
> 					    VIRTIO_MEM_BBM_BB_UNUSED);
> 	return rc;
>+
>+rollback_safe_unplug:
>+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>+		page = pfn_to_online_page(pfn);
>+		if (!page)
>+			continue;
>+		virtio_mem_fake_online(pfn, PAGES_PER_SECTION);
>+	}
>+	virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);

And changed to ADDED if failed.

>+	mutex_unlock(&vm->hotplug_mutex);
>+	return rc;
> }

So in which case, the bbm state is FAKE_OFFLINE during
virtio_mem_bbm_notify_going_offline() and
virtio_mem_bbm_notify_cancel_offline() ?

> 
> /*
>-- 
>2.26.2
David Hildenbrand Oct. 19, 2020, 8:50 a.m. UTC | #2
On 19.10.20 09:54, Wei Yang wrote:
> On Mon, Oct 12, 2020 at 02:53:23PM +0200, David Hildenbrand wrote:
>> Let's add a safe mechanism to unplug memory, avoiding long/endless loops
>> when trying to offline memory - similar to in SBM.
>>
>> Fake-offline all memory (via alloc_contig_range()) before trying to
>> offline+remove it. Use this mode as default, but allow to enable the other
>> mode explicitly (which could give better memory hotunplug guarantees in
> 
> I don't get the point how unsafe mode would have a better guarantees?

It's primarily only relevant when there is a lot of concurrent action
going on while unplugging memory. Using alloc_contig_range() on
ZONE_MOVABLE can fail more easily than memory offlining.

alloc_contig_range() doesn't try as hard as memory offlining code to
isolate memory. There are known issues with temporary page pinning
(e.g., when a process dies) and the PCP. (mostly discovered via CMA
allocations)

See the TODO I add in patch #14.

[...]
>>
>> +	if (bbm_safe_unplug) {
>> +		/*
>> +		 * Start by fake-offlining all memory. Once we marked the device
>> +		 * block as fake-offline, all newly onlined memory will
>> +		 * automatically be kept fake-offline. Protect from concurrent
>> +		 * onlining/offlining until we have a consistent state.
>> +		 */
>> +		mutex_lock(&vm->hotplug_mutex);
>> +		virtio_mem_bbm_set_bb_state(vm, bb_id,
>> +					    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
>> +
> 
> State is set here.
> 
>> +		for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>> +			page = pfn_to_online_page(pfn);
>> +			if (!page)
>> +				continue;
>> +
>> +			rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION);
>> +			if (rc) {
>> +				end_pfn = pfn;
>> +				goto rollback_safe_unplug;
>> +			}
>> +		}
>> +		mutex_unlock(&vm->hotplug_mutex);
>> +	}
>> +
>> 	rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
>> -	if (rc)
>> +	if (rc) {
>> +		if (bbm_safe_unplug) {
>> +			mutex_lock(&vm->hotplug_mutex);
>> +			goto rollback_safe_unplug;
>> +		}
>> 		return rc;
>> +	}
>>
>> 	rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
>> 	if (rc)
> 
> And changed to PLUGGED or UNUSED based on rc.

Right, after offlining+remove succeeded. So no longer added to Linux.

The final state depends on the success of the unplug request towards the
hypervisor.

> 
>> @@ -1987,6 +2069,17 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
>> 		virtio_mem_bbm_set_bb_state(vm, bb_id,
>> 					    VIRTIO_MEM_BBM_BB_UNUSED);
>> 	return rc;
>> +
>> +rollback_safe_unplug:
>> +	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>> +		page = pfn_to_online_page(pfn);
>> +		if (!page)
>> +			continue;
>> +		virtio_mem_fake_online(pfn, PAGES_PER_SECTION);
>> +	}
>> +	virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);
> 
> And changed to ADDED if failed.

Right, back to the initial state when entering this function.

> 
>> +	mutex_unlock(&vm->hotplug_mutex);
>> +	return rc;
>> }
> 
> So in which case, the bbm state is FAKE_OFFLINE during
> virtio_mem_bbm_notify_going_offline() and
> virtio_mem_bbm_notify_cancel_offline() ?

Exactly, so we can do our magic with fake-offline pages and our
virtio_mem_bbm_offline_and_remove_bb() can actually succeed.
Wei Yang Oct. 20, 2020, 12:23 a.m. UTC | #3
On Mon, Oct 19, 2020 at 10:50:39AM +0200, David Hildenbrand wrote:
>On 19.10.20 09:54, Wei Yang wrote:
>> On Mon, Oct 12, 2020 at 02:53:23PM +0200, David Hildenbrand wrote:
>>> Let's add a safe mechanism to unplug memory, avoiding long/endless loops
>>> when trying to offline memory - similar to in SBM.
>>>
>>> Fake-offline all memory (via alloc_contig_range()) before trying to
>>> offline+remove it. Use this mode as default, but allow to enable the other
>>> mode explicitly (which could give better memory hotunplug guarantees in
>> 
>> I don't get the point how unsafe mode would have a better guarantees?
>
>It's primarily only relevant when there is a lot of concurrent action
>going on while unplugging memory. Using alloc_contig_range() on
>ZONE_MOVABLE can fail more easily than memory offlining.
>
>alloc_contig_range() doesn't try as hard as memory offlining code to
>isolate memory. There are known issues with temporary page pinning
>(e.g., when a process dies) and the PCP. (mostly discovered via CMA
>allocations)
>
>See the TODO I add in patch #14.
>
>[...]
>>>
>>> +	if (bbm_safe_unplug) {
>>> +		/*
>>> +		 * Start by fake-offlining all memory. Once we marked the device
>>> +		 * block as fake-offline, all newly onlined memory will
>>> +		 * automatically be kept fake-offline. Protect from concurrent
>>> +		 * onlining/offlining until we have a consistent state.
>>> +		 */
>>> +		mutex_lock(&vm->hotplug_mutex);
>>> +		virtio_mem_bbm_set_bb_state(vm, bb_id,
>>> +					    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
>>> +
>> 
>> State is set here.
>> 
>>> +		for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>>> +			page = pfn_to_online_page(pfn);
>>> +			if (!page)
>>> +				continue;
>>> +
>>> +			rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION);
>>> +			if (rc) {
>>> +				end_pfn = pfn;
>>> +				goto rollback_safe_unplug;
>>> +			}
>>> +		}
>>> +		mutex_unlock(&vm->hotplug_mutex);
>>> +	}
>>> +
>>> 	rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
>>> -	if (rc)
>>> +	if (rc) {
>>> +		if (bbm_safe_unplug) {
>>> +			mutex_lock(&vm->hotplug_mutex);
>>> +			goto rollback_safe_unplug;
>>> +		}
>>> 		return rc;
>>> +	}
>>>
>>> 	rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
>>> 	if (rc)
>> 
>> And changed to PLUGGED or UNUSED based on rc.
>
>Right, after offlining+remove succeeded. So no longer added to Linux.
>
>The final state depends on the success of the unplug request towards the
>hypervisor.
>
>> 
>>> @@ -1987,6 +2069,17 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
>>> 		virtio_mem_bbm_set_bb_state(vm, bb_id,
>>> 					    VIRTIO_MEM_BBM_BB_UNUSED);
>>> 	return rc;
>>> +
>>> +rollback_safe_unplug:
>>> +	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>>> +		page = pfn_to_online_page(pfn);
>>> +		if (!page)
>>> +			continue;
>>> +		virtio_mem_fake_online(pfn, PAGES_PER_SECTION);
>>> +	}
>>> +	virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);
>> 
>> And changed to ADDED if failed.
>
>Right, back to the initial state when entering this function.
>
>> 
>>> +	mutex_unlock(&vm->hotplug_mutex);
>>> +	return rc;
>>> }
>> 
>> So in which case, the bbm state is FAKE_OFFLINE during
>> virtio_mem_bbm_notify_going_offline() and
>> virtio_mem_bbm_notify_cancel_offline() ?
>
>Exactly, so we can do our magic with fake-offline pages and our
>virtio_mem_bbm_offline_and_remove_bb() can actually succeed.

Ah, my fault. The exact code flow is this:

    virtio_mem_bbm_offline_remove_and_unplug_bb()
        virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
	virtio_mem_fake_offline(pfn, PAGES_PER_SECTION)
	virtio_mem_bbm_offline_and_remove_bb(vm, bb_id)
	    offline and trigger memory notification  --- 1)

The notification is necessary at 1) to release the refcount, which is grabbed
during fake offline.

>
>
>-- 
>Thanks,
>
>David / dhildenb
Wei Yang Oct. 20, 2020, 12:24 a.m. UTC | #4
On Mon, Oct 12, 2020 at 02:53:23PM +0200, David Hildenbrand wrote:
>Let's add a safe mechanism to unplug memory, avoiding long/endless loops
>when trying to offline memory - similar to in SBM.
>
>Fake-offline all memory (via alloc_contig_range()) before trying to
>offline+remove it. Use this mode as default, but allow to enable the other
>mode explicitly (which could give better memory hotunplug guarantees in
>some environments).
>
>The "unsafe" mode can be enabled e.g., via virtio_mem.bbm_safe_unplug=0
>on the cmdline.
>
>Cc: "Michael S. Tsirkin" <mst@redhat.com>
>Cc: Jason Wang <jasowang@redhat.com>
>Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
>Cc: Michal Hocko <mhocko@kernel.org>
>Cc: Oscar Salvador <osalvador@suse.de>
>Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
>Cc: Andrew Morton <akpm@linux-foundation.org>
>Signed-off-by: David Hildenbrand <david@redhat.com>

Reviewed-by: Wei Yang <richard.weiyang@linux.alibaba.com>
diff mbox series

Patch

diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 6bcd0acbff32..09f11489be6f 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -37,6 +37,11 @@  module_param(bbm_block_size, ulong, 0444);
 MODULE_PARM_DESC(bbm_block_size,
 		 "Big Block size in bytes. Default is 0 (auto-detection).");
 
+static bool bbm_safe_unplug = true;
+module_param(bbm_safe_unplug, bool, 0444);
+MODULE_PARM_DESC(bbm_safe_unplug,
+	     "Use a safe unplug mechanism in BBM, avoiding long/endless loops");
+
 /*
  * virtio-mem currently supports the following modes of operation:
  *
@@ -87,6 +92,8 @@  enum virtio_mem_bbm_bb_state {
 	VIRTIO_MEM_BBM_BB_PLUGGED,
 	/* Plugged and added to Linux. */
 	VIRTIO_MEM_BBM_BB_ADDED,
+	/* All online parts are fake-offline, ready to remove. */
+	VIRTIO_MEM_BBM_BB_FAKE_OFFLINE,
 	VIRTIO_MEM_BBM_BB_COUNT
 };
 
@@ -889,6 +896,32 @@  static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm,
 	}
 }
 
+static void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm,
+						unsigned long bb_id,
+						unsigned long pfn,
+						unsigned long nr_pages)
+{
+	/*
+	 * When marked as "fake-offline", all online memory of this device block
+	 * is allocated by us. Otherwise, we don't have any memory allocated.
+	 */
+	if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+	    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
+		return;
+	virtio_mem_fake_offline_going_offline(pfn, nr_pages);
+}
+
+static void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm,
+						 unsigned long bb_id,
+						 unsigned long pfn,
+						 unsigned long nr_pages)
+{
+	if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+	    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
+		return;
+	virtio_mem_fake_offline_cancel_offline(pfn, nr_pages);
+}
+
 /*
  * This callback will either be called synchronously from add_memory() or
  * asynchronously (e.g., triggered via user space). We have to be careful
@@ -949,6 +982,10 @@  static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
 		vm->hotplug_active = true;
 		if (vm->in_sbm)
 			virtio_mem_sbm_notify_going_offline(vm, id);
+		else
+			virtio_mem_bbm_notify_going_offline(vm, id,
+							    mhp->start_pfn,
+							    mhp->nr_pages);
 		break;
 	case MEM_GOING_ONLINE:
 		mutex_lock(&vm->hotplug_mutex);
@@ -999,6 +1036,10 @@  static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
 			break;
 		if (vm->in_sbm)
 			virtio_mem_sbm_notify_cancel_offline(vm, id);
+		else
+			virtio_mem_bbm_notify_cancel_offline(vm, id,
+							     mhp->start_pfn,
+							     mhp->nr_pages);
 		vm->hotplug_active = false;
 		mutex_unlock(&vm->hotplug_mutex);
 		break;
@@ -1189,7 +1230,13 @@  static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
 			do_online = virtio_mem_sbm_test_sb_plugged(vm, id,
 								   sb_id, 1);
 		} else {
-			do_online = true;
+			/*
+			 * If the whole block is marked fake offline, keep
+			 * everything that way.
+			 */
+			id = virtio_mem_phys_to_bb_id(vm, addr);
+			do_online = virtio_mem_bbm_get_bb_state(vm, id) !=
+				    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE;
 		}
 		if (do_online)
 			generic_online_page(page, order);
@@ -1969,15 +2016,50 @@  static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
 static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
 						       unsigned long bb_id)
 {
+	const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
+	const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
+	unsigned long end_pfn = start_pfn + nr_pages;
+	unsigned long pfn;
+	struct page *page;
 	int rc;
 
 	if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
 			 VIRTIO_MEM_BBM_BB_ADDED))
 		return -EINVAL;
 
+	if (bbm_safe_unplug) {
+		/*
+		 * Start by fake-offlining all memory. Once we marked the device
+		 * block as fake-offline, all newly onlined memory will
+		 * automatically be kept fake-offline. Protect from concurrent
+		 * onlining/offlining until we have a consistent state.
+		 */
+		mutex_lock(&vm->hotplug_mutex);
+		virtio_mem_bbm_set_bb_state(vm, bb_id,
+					    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
+
+		for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+			page = pfn_to_online_page(pfn);
+			if (!page)
+				continue;
+
+			rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION);
+			if (rc) {
+				end_pfn = pfn;
+				goto rollback_safe_unplug;
+			}
+		}
+		mutex_unlock(&vm->hotplug_mutex);
+	}
+
 	rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
-	if (rc)
+	if (rc) {
+		if (bbm_safe_unplug) {
+			mutex_lock(&vm->hotplug_mutex);
+			goto rollback_safe_unplug;
+		}
 		return rc;
+	}
 
 	rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
 	if (rc)
@@ -1987,6 +2069,17 @@  static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
 		virtio_mem_bbm_set_bb_state(vm, bb_id,
 					    VIRTIO_MEM_BBM_BB_UNUSED);
 	return rc;
+
+rollback_safe_unplug:
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		page = pfn_to_online_page(pfn);
+		if (!page)
+			continue;
+		virtio_mem_fake_online(pfn, PAGES_PER_SECTION);
+	}
+	virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);
+	mutex_unlock(&vm->hotplug_mutex);
+	return rc;
 }
 
 /*