@@ -61,22 +61,8 @@ static inline unsigned int folio_batch_space(struct folio_batch *fbatch)
return PAGEVEC_SIZE - fbatch->nr;
}
-/**
- * folio_batch_add() - Add a folio to a batch.
- * @fbatch: The folio batch.
- * @folio: The folio to add.
- *
- * The folio is added to the end of the batch.
- * The batch must have previously been initialised using folio_batch_init().
- *
- * Return: The number of slots still available.
- */
-static inline unsigned folio_batch_add(struct folio_batch *fbatch,
- struct folio *folio)
-{
- fbatch->folios[fbatch->nr++] = folio;
- return folio_batch_space(fbatch);
-}
+unsigned int folio_batch_add(struct folio_batch *fbatch,
+ struct folio *folio);
/**
* folio_batch_next - Return the next folio to process.
@@ -401,6 +401,7 @@ extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_cpu_zone(struct zone *zone);
extern void lru_add_drain_all(void);
+extern void lru_add_and_bh_lrus_drain(void);
void folio_deactivate(struct folio *folio);
void folio_mark_lazyfree(struct folio *folio);
extern void swap_setup(void);
@@ -8,6 +8,8 @@
*
*/
+#include <linux/swap.h>
+
enum hk_flags {
HK_FLAG_DOMAIN = BIT(HK_TYPE_DOMAIN),
HK_FLAG_MANAGED_IRQ = BIT(HK_TYPE_MANAGED_IRQ),
@@ -253,6 +255,7 @@ __setup("isolcpus=", housekeeping_isolcpus_setup);
#if defined(CONFIG_NO_HZ_FULL)
static void isolated_task_work(struct callback_head *head)
{
+ lru_add_and_bh_lrus_drain();
}
int __isolated_task_work_queue(void)
@@ -37,6 +37,7 @@
#include <linux/page_idle.h>
#include <linux/local_lock.h>
#include <linux/buffer_head.h>
+#include <linux/sched/isolation.h>
#include "internal.h"
@@ -155,6 +156,29 @@ static void lru_add(struct lruvec *lruvec, struct folio *folio)
trace_mm_lru_insertion(folio);
}
+/**
+ * folio_batch_add() - Add a folio to a batch.
+ * @fbatch: The folio batch.
+ * @folio: The folio to add.
+ *
+ * The folio is added to the end of the batch.
+ * The batch must have previously been initialised using folio_batch_init().
+ *
+ * Return: The number of slots still available.
+ */
+unsigned int folio_batch_add(struct folio_batch *fbatch,
+ struct folio *folio)
+{
+ unsigned int ret;
+
+ fbatch->folios[fbatch->nr++] = folio;
+ ret = folio_batch_space(fbatch);
+ isolated_task_work_queue();
+
+ return ret;
+}
+EXPORT_SYMBOL(folio_batch_add);
+
static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
{
int i;
@@ -738,7 +762,7 @@ void lru_add_drain(void)
* the same cpu. It shouldn't be a problem in !SMP case since
* the core is only one and the locks will disable preemption.
*/
-static void lru_add_and_bh_lrus_drain(void)
+void lru_add_and_bh_lrus_drain(void)
{
local_lock(&cpu_fbatches.lock);
lru_add_drain_cpu(smp_processor_id());
@@ -864,6 +888,10 @@ static inline void __lru_add_drain_all(bool force_all_cpus)
for_each_online_cpu(cpu) {
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
+ /* Isolated CPUs handle their cache upon return to userspace */
+ if (!housekeeping_cpu(cpu, HK_TYPE_KERNEL_NOISE))
+ continue;
+
if (cpu_needs_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
queue_work_on(cpu, mm_percpu_wq, work);
LRU batching can be source of disturbances for isolated workloads running in the userspace because it requires kernel worker to handle that and that would preempt the said task. The primary source for such disruption would be __lru_add_drain_all which could be triggered from non-isolated CPUs. Why would an isolated CPU have anything on the pcp cache? Many syscalls allocate pages that might end there. A typical and unavoidable one would be fork/exec leaving pages on the cache behind just waiting for somebody to drain. Address the problem by noting a batch has been added to the cache and schedule draining upon return to userspace so the work is done while the syscall is still executing and there are no suprises while the task runs in the userspace where it doesn't want to be preempted. Signed-off-by: Frederic Weisbecker <frederic@kernel.org> --- include/linux/pagevec.h | 18 ++---------------- include/linux/swap.h | 1 + kernel/sched/isolation.c | 3 +++ mm/swap.c | 30 +++++++++++++++++++++++++++++- 4 files changed, 35 insertions(+), 17 deletions(-)