[34/42] lustre: ldlm: pool fixes

Message ID	1601942781-24950-35-git-send-email-jsimmons@infradead.org (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=bqMh=DN=lists.lustre.org=lustre-devel-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 8E891206F4 From: James Simmons <jsimmons@infradead.org> To: Andreas Dilger <adilger@whamcloud.com>, Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.com> Date: Mon, 5 Oct 2020 20:06:13 -0400 Message-Id: <1601942781-24950-35-git-send-email-jsimmons@infradead.org> In-Reply-To: <1601942781-24950-1-git-send-email-jsimmons@infradead.org> References: <1601942781-24950-1-git-send-email-jsimmons@infradead.org> Subject: [lustre-devel] [PATCH 34/42] lustre: ldlm: pool fixes Precedence: list Cc: Vitaly Fertman <c17818@cray.com>, Lustre Development List <lustre-devel@lists.lustre.org> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: lustre-devel-bounces@lists.lustre.org Sender: "lustre-devel" <lustre-devel-bounces@lists.lustre.org>
Series	lustre: OpenSFS backport for Oct 4 2020 \| expand [00/42] lustre: OpenSFS backport for Oct 4 2020 [01/42] lustre: ptlrpc: don't require CONFIG_CRYPTO_CRC32 [02/42] lustre: dom: lock cancel to drop pages [03/42] lustre: sec: use memchr_inv() to check if page is zero. [04/42] lustre: mdc: fix lovea for replay [05/42] lustre: llite: add test to check client deadlock selinux [06/42] lnet: use init_wait(), not init_waitqueue_entry() [07/42] lustre: lov: make various lov_object.c function static. [08/42] lustre: llite: return -ENODATA if no default layout [09/42] lnet: libcfs: don't save journal_info in dumplog thread. [10/42] lustre: ldlm: lru code cleanup [11/42] lustre: ldlm: cancel LRU improvement [12/42] lnet: Do not set preferred NI for MR peer [13/42] lustre: ptlrpc: prefer crc32_le() over CryptoAPI [14/42] lnet: call event handlers without res_lock [15/42] lnet: Conditionally attach rspt in LNetPut & LNetGet [16/42] lustre: llite: reuse same cl_dio_aio for one IO [17/42] lustre: llite: move iov iter forward by ourself [18/42] lustre: llite: report client stats sumsq [19/42] lnet: Support checking for MD leaks. [20/42] lnet: don't read debugfs lnet stats when shutting down [21/42] lnet: Loosen restrictions on LNet Health params [22/42] lnet: Fix reference leak in lnet_select_pathway [23/42] lustre: llite: prune invalid dentries [24/42] lnet: Do not overwrite destination when routing [25/42] lustre: lov: don't use inline for operations functions. [26/42] lustre: osc: don't allow negative grants [27/42] lustre: mgc: Use IR for client->MDS/OST connections [28/42] lustre: ldlm: don't use a locks without l_ast_data [29/42] lustre: lov: discard unused lov_dump_lmm* functions [30/42] lustre: lov: guard against class_exp2obd() returning NULL. [31/42] lustre: clio: don't call aio_complete() in lustre upon errors [32/42] lustre: llite: it_lock_bits should be bit-wise tested [33/42] lustre: ldlm: control lru_size for extent lock [34/42] lustre: ldlm: pool fixes [35/42] lustre: ldlm: pool recalc forceful call [36/42] lustre: don't take spinlock to read a 'long'. [37/42] lustre: osc: Do ELC on locks with no OSC object [38/42] lnet: deadlock on LNet shutdown [39/42] lustre: update version to 2.13.56 [40/42] lustre: llite: increase readahead default values [41/42] lustre: obdclass: don't initialize obj for zero FID [42/42] lustre: obdclass: fixes and improvements for jobid.

diff --git a/fs/lustre/include/lustre_dlm.h b/fs/lustre/include/lustre_dlm.h index 682035a..bc6785f 100644 --- a/fs/lustre/include/lustre_dlm.h +++ b/fs/lustre/include/lustre_dlm.h @@ -250,8 +250,8 @@ struct ldlm_pool { u64 pl_server_lock_volume; /** Current biggest client lock volume. Protected by pl_lock. */ u64 pl_client_lock_volume; - /** Lock volume factor. SLV on client is calculated as following: - * server_slv * lock_volume_factor. + /** Lock volume factor, shown in percents in procfs, but internally + * Client SLV calculated as: server_slv * lock_volume_factor >> 8. */ atomic_t pl_lock_volume_factor; /** Time when last SLV from server was obtained. */ diff --git a/fs/lustre/ldlm/ldlm_pool.c b/fs/lustre/ldlm/ldlm_pool.c index 9e2a006..c37948a 100644 --- a/fs/lustre/ldlm/ldlm_pool.c +++ b/fs/lustre/ldlm/ldlm_pool.c @@ -209,13 +209,13 @@ static inline int ldlm_pool_t2gsp(unsigned int t) * * \pre ->pl_lock is locked. */ -static void ldlm_pool_recalc_stats(struct ldlm_pool *pl) +static void ldlm_pool_recalc_stats(struct ldlm_pool *pl, timeout_t period) { int grant_plan = pl->pl_grant_plan; u64 slv = pl->pl_server_lock_volume; int granted = atomic_read(&pl->pl_granted); - int grant_rate = atomic_read(&pl->pl_grant_rate); - int cancel_rate = atomic_read(&pl->pl_cancel_rate); + int grant_rate = atomic_read(&pl->pl_grant_rate) / period; + int cancel_rate = atomic_read(&pl->pl_cancel_rate) / period; lprocfs_counter_add(pl->pl_stats, LDLM_POOL_SLV_STAT, slv); @@ -254,10 +254,10 @@ static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl) */ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) { - time64_t recalc_interval_sec; + timeout_t recalc_interval_sec; int ret; - recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; + recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; if (recalc_interval_sec < pl->pl_recalc_period) return 0; @@ -265,7 +265,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) /* * Check if we need to recalc lists now. */ - recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; + recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; if (recalc_interval_sec < pl->pl_recalc_period) { spin_unlock(&pl->pl_lock); return 0; @@ -292,7 +292,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) * Time of LRU resizing might be longer than period, * so update after LRU resizing rather than before it. */ - pl->pl_recalc_time = ktime_get_real_seconds(); + pl->pl_recalc_time = ktime_get_seconds(); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, recalc_interval_sec); spin_unlock(&pl->pl_lock); @@ -321,7 +321,9 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, /* * Make sure that pool knows last SLV and Limit from obd. */ + spin_lock(&pl->pl_lock); ldlm_cli_pool_pop_slv(pl); + spin_unlock(&pl->pl_lock); spin_lock(&ns->ns_lock); unused = ns->ns_nr_unused; @@ -341,23 +343,25 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, /** * Pool recalc wrapper. Will call either client or server pool recalc callback * depending what pool @pl is used. + * + * Returns time in seconds for the next recalc of this pool */ -static int ldlm_pool_recalc(struct ldlm_pool *pl) +static timeout_t ldlm_pool_recalc(struct ldlm_pool *pl) { - u32 recalc_interval_sec; + timeout_t recalc_interval_sec; int count; - recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; + recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { spin_lock(&pl->pl_lock); - recalc_interval_sec = ktime_get_real_seconds() - + recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { /* - * Update pool statistics every 1s. + * Update pool statistics every recalc interval. */ - ldlm_pool_recalc_stats(pl); + ldlm_pool_recalc_stats(pl, recalc_interval_sec); /* * Zero out all rates and speed for the last period. @@ -374,20 +378,7 @@ static int ldlm_pool_recalc(struct ldlm_pool *pl) count); } - recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() + - pl->pl_recalc_period; - if (recalc_interval_sec <= 0) { - /* DEBUG: should be re-removed after LU-4536 is fixed */ - CDEBUG(D_DLMTRACE, - "%s: Negative interval(%ld), too short period(%ld)\n", - pl->pl_name, (long)recalc_interval_sec, - (long)pl->pl_recalc_period); - - /* Prevent too frequent recalculation. */ - recalc_interval_sec = 1; - } - - return recalc_interval_sec; + return pl->pl_recalc_time + pl->pl_recalc_period; } /* @@ -421,6 +412,7 @@ static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused) int granted, grant_rate, cancel_rate; int grant_speed, lvf; struct ldlm_pool *pl = m->private; + timeout_t period; u64 slv, clv; u32 limit; @@ -429,8 +421,11 @@ static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused) clv = pl->pl_client_lock_volume; limit = atomic_read(&pl->pl_limit); granted = atomic_read(&pl->pl_granted); - grant_rate = atomic_read(&pl->pl_grant_rate); - cancel_rate = atomic_read(&pl->pl_cancel_rate); + period = ktime_get_seconds() - pl->pl_recalc_time; + if (period <= 0) + period = 1; + grant_rate = atomic_read(&pl->pl_grant_rate) / period; + cancel_rate = atomic_read(&pl->pl_cancel_rate) / period; grant_speed = grant_rate - cancel_rate; lvf = atomic_read(&pl->pl_lock_volume_factor); spin_unlock(&pl->pl_lock); @@ -439,7 +434,7 @@ static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused) " SLV: %llu\n" " CLV: %llu\n" " LVF: %d\n", - pl->pl_name, slv, clv, lvf); + pl->pl_name, slv, clv, (lvf * 100) >> 8); seq_printf(m, " GR: %d\n CR: %d\n GS: %d\n" " G: %d\n L: %d\n", @@ -457,11 +452,15 @@ static ssize_t grant_speed_show(struct kobject *kobj, struct attribute *attr, struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj); int grant_speed; + timeout_t period; spin_lock(&pl->pl_lock); /* serialize with ldlm_pool_recalc */ - grant_speed = atomic_read(&pl->pl_grant_rate) - - atomic_read(&pl->pl_cancel_rate); + period = ktime_get_seconds() - pl->pl_recalc_time; + if (period <= 0) + period = 1; + grant_speed = (atomic_read(&pl->pl_grant_rate) - + atomic_read(&pl->pl_cancel_rate)) / period; spin_unlock(&pl->pl_lock); return sprintf(buf, "%d\n", grant_speed); } @@ -477,6 +476,9 @@ static ssize_t grant_speed_show(struct kobject *kobj, struct attribute *attr, LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(server_lock_volume, u64); LUSTRE_RO_ATTR(server_lock_volume); +LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(client_lock_volume, u64); +LUSTRE_RO_ATTR(client_lock_volume); + LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(limit, atomic); LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(limit, atomic); LUSTRE_RW_ATTR(limit); @@ -490,16 +492,56 @@ static ssize_t grant_speed_show(struct kobject *kobj, struct attribute *attr, LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(grant_rate, atomic); LUSTRE_RO_ATTR(grant_rate); -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(lock_volume_factor, atomic); -LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(lock_volume_factor, atomic); +static ssize_t lock_volume_factor_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj); + unsigned long tmp; + + tmp = (atomic_read(&pl->pl_lock_volume_factor) * 100) >> 8; + return sprintf(buf, "%lu\n", tmp); +} + +static ssize_t lock_volume_factor_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, + size_t count) +{ + struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj); + unsigned long tmp; + int rc; + + rc = kstrtoul(buffer, 10, &tmp); + if (rc < 0) + return rc; + + tmp = (tmp << 8) / 100; + atomic_set(&pl->pl_lock_volume_factor, tmp); + + return count; +} LUSTRE_RW_ATTR(lock_volume_factor); +static ssize_t recalc_time_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", + ktime_get_seconds() - pl->pl_recalc_time); +} +LUSTRE_RO_ATTR(recalc_time); + /* These are for pools in /sys/fs/lustre/ldlm/namespaces/.../pool */ static struct attribute *ldlm_pl_attrs[] = { &lustre_attr_grant_speed.attr, &lustre_attr_grant_plan.attr, &lustre_attr_recalc_period.attr, &lustre_attr_server_lock_volume.attr, + &lustre_attr_client_lock_volume.attr, + &lustre_attr_recalc_time.attr, &lustre_attr_limit.attr, &lustre_attr_granted.attr, &lustre_attr_cancel_rate.attr, @@ -625,8 +667,8 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, spin_lock_init(&pl->pl_lock); atomic_set(&pl->pl_granted, 0); - pl->pl_recalc_time = ktime_get_real_seconds(); - atomic_set(&pl->pl_lock_volume_factor, 1); + pl->pl_recalc_time = ktime_get_seconds(); + atomic_set(&pl->pl_lock_volume_factor, 1 << 8); atomic_set(&pl->pl_grant_rate, 0); atomic_set(&pl->pl_cancel_rate, 0); @@ -867,7 +909,7 @@ static void ldlm_pools_recalc(struct work_struct *ws) struct ldlm_namespace *ns; struct ldlm_namespace *ns_old = NULL; /* seconds of sleep if no active namespaces */ - time64_t time = LDLM_POOL_CLI_DEF_RECALC_PERIOD; + timeout_t delay = LDLM_POOL_CLI_DEF_RECALC_PERIOD; int nr; /* @@ -933,11 +975,8 @@ static void ldlm_pools_recalc(struct work_struct *ws) * After setup is done - recalc the pool. */ if (!skip) { - time64_t ttime = ldlm_pool_recalc(&ns->ns_pool); - - if (ttime < time) - time = ttime; - + delay = min(delay, + ldlm_pool_recalc(&ns->ns_pool)); ldlm_namespace_put(ns); } } @@ -945,12 +984,14 @@ static void ldlm_pools_recalc(struct work_struct *ws) /* Wake up the blocking threads from time to time. */ ldlm_bl_thread_wakeup(); - schedule_delayed_work(&ldlm_recalc_pools, time * HZ); + schedule_delayed_work(&ldlm_recalc_pools, delay * HZ); } static int ldlm_pools_thread_start(void) { - schedule_delayed_work(&ldlm_recalc_pools, 0); + time64_t delay = LDLM_POOL_CLI_DEF_RECALC_PERIOD; + + schedule_delayed_work(&ldlm_recalc_pools, delay); return 0; } diff --git a/fs/lustre/ldlm/ldlm_request.c b/fs/lustre/ldlm/ldlm_request.c index c235915..a8d6df1 100644 --- a/fs/lustre/ldlm/ldlm_request.c +++ b/fs/lustre/ldlm/ldlm_request.c @@ -1388,7 +1388,7 @@ static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, lvf = ldlm_pool_get_lvf(pl); la = div_u64(ktime_to_ns(ktime_sub(cur, lock->l_last_used)), NSEC_PER_SEC); - lv = lvf * la * ns->ns_nr_unused; + lv = lvf * la * ns->ns_nr_unused >> 8; /* Inform pool about current CLV to see it via debugfs. */ ldlm_pool_set_clv(pl, lv);

[34/42] lustre: ldlm: pool fixes

Commit Message

Patch