From 0cc5f291e6632fc5838a472060ff9569e8ebe113 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Thu, 3 Sep 2015 13:58:29 +0200 Subject: [PATCH 218/257] ZFS ARC: Move the ARC cache reaping out of the hot path ... and use a time-based rate limiting approach that can be controlled with the vfs.zfs.arc_reap_delay_min sysctl. Previously memory pressure could result in several thousand ARC reaping attempts per second. This wasn't merely inefficient but also caused lock contention and poor latency for ZFS operations that relied on arc_get_data_buf(). For details see: https://www.fabiankeil.de/gehacktes/electrobsd/zfs-arc-tuning/ Obtained from: ElectroBSD --- .../contrib/opensolaris/uts/common/fs/zfs/arc.c | 88 ++++++++++++++++------ 1 file changed, 67 insertions(+), 21 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index 613a4ebdf1d1..8886ddcfeb89 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -4023,8 +4023,21 @@ extern kmem_cache_t *zio_buf_cache[]; extern kmem_cache_t *zio_data_buf_cache[]; extern kmem_cache_t *range_seg_cache; -static void __used -reap_arc_caches() +/* + * Used by arc_kmem_reap_now() and consider_reaping_arc_caches() + * to limit the time spent reaping. + * + * The arc_reaping_in_progress is a (somewhat racy) left over from a + * previous version of this code which could trigger multiple ARC cache + * reapings in parallel which should be avoided to reduce lock + * contention. It's hasn't been removed yet to encourage further + * experimenting. + */ +static unsigned int arc_reaping_in_progress = 0; +static sbintime_t last_reaping = 0; + +static void __noinline +reap_arc_caches(void) { size_t i; kmem_cache_t *prev_cache = NULL; @@ -4050,6 +4063,8 @@ static __noinline void arc_kmem_reap_now(void) { DTRACE_PROBE(arc__kmem_reap_start); + arc_reaping_in_progress++; + #ifdef _KERNEL if (arc_meta_used >= arc_meta_limit) { /* @@ -4077,9 +4092,49 @@ arc_kmem_reap_now(void) vmem_qcache_reap(zio_arena); } #endif +#ifdef _KERNEL + last_reaping = getsbinuptime(); +#endif + arc_reaping_in_progress = 0; DTRACE_PROBE(arc__kmem_reap_end); } + +/* + * Declared writable to allow resetting it. + * XXX: Should probably be a uint64 and integrated with kstat. + */ +static unsigned int arc_cache_reapings_skipped = 0; +SYSCTL_UINT(_vfs_zfs, OID_AUTO, arc_cache_reapings_skipped, CTLFLAG_RW, + &arc_cache_reapings_skipped, 0, "Number of times the ARC caches have not been reaped due to the reap delay"); + +static unsigned int min_arc_reap_delay = 200; +SYSCTL_UINT(_vfs_zfs, OID_AUTO, arc_reap_delay_min, CTLFLAG_RW, + &min_arc_reap_delay, 200, "Minimum delay between ARC cache reapings (milliseconds)"); + +static void __noinline +consider_reaping_arc_caches(void) +{ +#ifdef _KERNEL + sbintime_t now; + + if (arc_reaping_in_progress) { + /* Already reaping in another thread. */ + arc_cache_reapings_skipped++; + return; + } + + now = getsbinuptime(); + if ((now - last_reaping) / SBT_1MS < min_arc_reap_delay) + { + /* Too soon to reap again. */ + arc_cache_reapings_skipped++; + return; + } +#endif + arc_kmem_reap_now(); +} + /* * Threads can block in arc_get_data_buf() waiting for this thread to evict * enough data and signal them to proceed. When this happens, the threads in @@ -4101,28 +4156,11 @@ arc_reclaim_thread(void *dummy __unused) { hrtime_t growtime = 0; callb_cpr_t cpr; - int autoreap = 0; CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG); mutex_enter(&arc_reclaim_lock); while (!arc_reclaim_thread_exit) { -#ifdef _KERNEL -/* KD 2015-02-10 - * Protect against UMA free memory bloat. We already do this on a low-memory - * basis in the allocator; it has to happen there rather than here due to - * response time considerations. Make the call here once every 10 passes as - * well; this reclaims unused UMA buffers every 10 seconds on an idle system - * and more frequently if the reclaim thread gets woken up by low RAM - * conditions. - */ - if ((zio_use_uma) && (autoreap++ == 10)) { - autoreap = 0; - DTRACE_PROBE(arc__reclaim_timed_reap); - reap_arc_caches(); - } -#endif /* _KERNEL */ - int64_t free_memory = arc_available_memory(); uint64_t evicted = 0; @@ -4153,8 +4191,6 @@ arc_reclaim_thread(void *dummy __unused) */ growtime = gethrtime() + SEC2NSEC(arc_grow_retry); - arc_kmem_reap_now(); - /* * If we are still low on memory, shrink the ARC * so that we have arc_shrink_min free space. @@ -4177,6 +4213,16 @@ arc_reclaim_thread(void *dummy __unused) evicted = arc_adjust(); + /* + * XXX: 2016-10-10: This was moved from arc_user_evicts_thread() + * which got deleted by the commit to keep the + * ARC data compressed. + * + * Consider reaping the ARC caches at least once per + * second, but more often when signalled under pressure. + */ + consider_reaping_arc_caches(); + mutex_enter(&arc_reclaim_lock); /* -- 2.11.0