From 0764538017d3904ab7decd0ccf07059d9f41b6ed Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Fri, 8 Jan 2016 13:30:21 +0100 Subject: [PATCH 223/257] ZFS ARC: Sync with FreeBSD bug #187594 (ZFS ARC behavior problem and fix) Main patch author: Karl Denninger PR: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=187594 PATCH URL: https://bz-attachments.freebsd.org/attachment.cgi?id=164051&action=diff&format=raw&headers=1 --- .../contrib/opensolaris/uts/common/fs/zfs/arc.c | 60 +++++++++++++++++++++- .../contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c | 9 +++- .../contrib/opensolaris/uts/common/fs/zfs/zio.c | 3 ++ 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index 55a1ca4c1ff3..1a55d5ceca01 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -366,6 +366,14 @@ int zfs_arc_shrink_shift = 0; int zfs_arc_p_min_shift = 0; uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ u_int zfs_arc_free_target = 0; +u_int zfs_arc_wakeup_pager = 0; +u_int zfs_arc_wakeup_delay = 500; + +#define WAKE_PAGER +#ifdef WAKE_PAGER +#define WAKE_PAGER_CONSTANT 10 / 9 /* Pager wakeup threshold */ +static int arc_init_done = 0; /* We know arc_warm is valid */ +#endif /* WAKE_PAGER */ /* Absolute min for arc min / max is 16MB. */ static uint64_t arc_abs_min = 16 << 20; @@ -383,6 +391,9 @@ arc_free_target_init(void *unused __unused) { zfs_arc_free_target = vm_pageout_wakeup_thresh + ((vm_cnt.v_free_target - vm_pageout_wakeup_thresh) / 2); +#ifdef WAKE_PAGER + zfs_arc_wakeup_pager = zfs_arc_free_target * WAKE_PAGER_CONSTANT; +#endif /* WAKE_PAGER */ } SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, arc_free_target_init, NULL); @@ -406,6 +417,12 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, compressed_arc_enabled, CTLFLAG_RDTUN, SYSCTL_INT(_vfs_zfs, OID_AUTO, dynamic_write_buffer, CTLFLAG_RWTUN, &zfs_dynamic_write_buffer, 0, "Dynamically restrict dirty data when memory is low"); +#ifdef WAKE_PAGER +SYSCTL_UINT(_vfs_zfs, OID_AUTO, arc_wakeup_pager, CTLFLAG_RWTUN, + &zfs_arc_wakeup_pager, 0, "Wake VM below this number of pages"); +SYSCTL_UINT(_vfs_zfs, OID_AUTO, arc_wakeup_delay, CTLFLAG_RWTUN, + &zfs_arc_wakeup_delay, 0, "May wake up VM once this number of MS"); +#endif /* WAKE_PAGER */ /* * We don't have a tunable for arc_free_target due to the dependency on @@ -433,6 +450,9 @@ sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS) return (EINVAL); zfs_arc_free_target = val; +#ifdef WAKE_PAGER + zfs_arc_wakeup_pager = zfs_arc_free_target * WAKE_PAGER_CONSTANT; +#endif /* WAKE_PAGER */ return (0); } @@ -3866,6 +3886,11 @@ int64_t arc_pages_pp_reserve = 64; int64_t arc_swapfs_reserve = 64; /* + * Declare file-local static for event processor bypass + */ +static unsigned int arc_no_wake_event = 0; + +/* * Return the amount of memory that can be consumed before reclaim will be * needed. Positive if there is sufficient free memory, negative indicates * the amount of memory that needs to be freed up. @@ -3878,6 +3903,10 @@ arc_available_memory(void) free_memory_reason_t r = FMR_UNKNOWN; #ifdef _KERNEL +#ifdef WAKE_PAGER + sbintime_t now; + static sbintime_t last_pagedaemon_wake = 0; +#endif /* WAKE_PAGER */ if (needfree > 0) { n = PAGESIZE * (-needfree); if (n < lowest) { @@ -3896,6 +3925,26 @@ arc_available_memory(void) r = FMR_LOTSFREE; } +#ifdef WAKE_PAGER +/* + * If memory is less than the ARC wakeup threshold and time has expired since + * the last time we woke the pager... Do not execute until the ARC warms up. + */ + if ((arc_init_done) && + (((int64_t) freemem - zfs_arc_wakeup_pager) < 0) && + (arc_warm == B_TRUE) + ) { + now = getsbinuptime(); + if ((now - last_pagedaemon_wake) / SBT_1MS > zfs_arc_wakeup_delay) { + last_pagedaemon_wake = now; + arc_no_wake_event++; /* Set bypass flag for ARC */ + DTRACE_PROBE(arc__wake_pagedaemon); + pagedaemon_wakeup(); /* Wake the pager */ + } + } + +#endif /* WAKE_PAGER */ + #ifdef illumos /* * check that we're out of range of the pageout scanner. It starts to @@ -4004,6 +4053,7 @@ arc_available_memory(void) last_free_memory = lowest; last_free_reason = r; DTRACE_PROBE2(arc__available_memory, int64_t, lowest, int, r); + return (lowest); } @@ -4034,9 +4084,10 @@ extern kmem_cache_t *range_seg_cache; * experimenting. */ static unsigned int arc_reaping_in_progress = 0; +static unsigned int arc_pagedaemon_ignore = 0; static sbintime_t last_reaping = 0; -static void __noinline +static __noinline void reap_arc_caches(void) { size_t i; @@ -5985,6 +6036,10 @@ static void arc_lowmem(void *arg __unused, int howto __unused) { + if (arc_no_wake_event) { /* Don't do it if we woke the pager */ + arc_no_wake_event = 0; /* Just clear the flag */ + return; + } mutex_enter(&arc_reclaim_lock); /* XXX: Memory deficit should be passed as argument. */ needfree = btoc(arc_c >> arc_shrink_shift); @@ -6299,6 +6354,9 @@ arc_init(void) printf(" in /boot/loader.conf.\n"); } #endif +#ifdef WAKE_PAGER + arc_init_done++; /* For anyone who wants to know */ +#endif /* WAKE_PAGER */ } void diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c index 6e89c7d3b6a5..691a651c8ef7 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c @@ -1123,8 +1123,13 @@ dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty) ASSERT3U(dirty, <, zfs_dirty_data_max_internal); now = gethrtime(); - min_tx_time = zfs_delay_scale * - (dirty - delay_min_bytes) / (zfs_dirty_data_max_internal - dirty); + if (dirty >= zfs_dirty_data_max_internal) {/* No scaling if overcommitted */ + min_tx_time = zfs_delay_scale * + (dirty - delay_min_bytes); + } else { + min_tx_time = zfs_delay_scale * + (dirty - delay_min_bytes) / (zfs_dirty_data_max_internal - dirty); + } if (now > tx->tx_start + min_tx_time) return; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c index d5ab54271619..41664263f91a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c @@ -45,6 +45,9 @@ SYSCTL_DECL(_vfs_zfs); SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO"); +/* KD 2015-07-15 Change class to "int" from "static int" as we reference + * this as an extern elsewhere + */ #if defined(__amd64__) int zio_use_uma = 1; #else -- 2.11.0