From 4cf99ad8cb71e6c570c3cc36cc09cc2d4303a4b5 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 27 Mar 2019 12:51:26 -0700 Subject: bring back NFS fix When doing the 5.0 rebase, I mistakenly thought a conflict meant a patch had been merged. Turns out, it was an actual (minor) conflict. Bring back the rebased patch which can actually be dropped with 5.1. --- kernel.spec | 3 + nfsv4.1-avoid-false-retries.patch | 527 +++++++++++++++++++------------------- 2 files changed, 263 insertions(+), 267 deletions(-) diff --git a/kernel.spec b/kernel.spec index adefdf04f..12f577110 100644 --- a/kernel.spec +++ b/kernel.spec @@ -616,6 +616,9 @@ Patch512: 0001-Revert-drm-i915-fbdev-Actually-configure-untiled-dis.patch # rhbz 1689750, patch submitted upstream Patch513: 0001-virt-vbox-Implement-passing-requestor-info-to-the-ho.patch +# rhbz 1683382 +Patch515: nfsv4.1-avoid-false-retries.patch + # END OF PATCH DEFINITIONS %endif diff --git a/nfsv4.1-avoid-false-retries.patch b/nfsv4.1-avoid-false-retries.patch index b136df0e7..ebec54dab 100644 --- a/nfsv4.1-avoid-false-retries.patch +++ b/nfsv4.1-avoid-false-retries.patch @@ -1,267 +1,260 @@ -From d72ad5fe4c2206458d127cc31e690318cf2e2731 Mon Sep 17 00:00:00 2001 -From: Trond Myklebust -Date: Wed, 20 Jun 2018 17:53:34 -0400 -Subject: [PATCH] NFSv4.1: Avoid false retries when RPC calls are interrupted - -A 'false retry' in NFSv4.1 occurs when the client attempts to transmit a -new RPC call using a slot+sequence number combination that references an -already cached one. Currently, the Linux NFS client will do this if a -user process interrupts an RPC call that is in progress. -The problem with doing so is that we defeat the main mechanism used by -the server to differentiate between a new call and a replayed one. Even -if the server is able to perfectly cache the arguments of the old call, -it cannot know if the client intended to replay or send a new call. - -The obvious fix is to bump the sequence number pre-emptively if an -RPC call is interrupted, but in order to deal with the corner cases -where the interrupted call is not actually received and processed by -the server, we need to interpret the error NFS4ERR_SEQ_MISORDERED -as a sign that we need to either wait or locate a correct sequence -number that lies between the value we sent, and the last value that -was acked by a SEQUENCE call on that slot. - -Signed-off-by: Trond Myklebust ---- - fs/nfs/nfs4proc.c | 105 ++++++++++++++++++++----------------------- - fs/nfs/nfs4session.c | 5 ++- - fs/nfs/nfs4session.h | 5 ++- - 3 files changed, 55 insertions(+), 60 deletions(-) - -diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c -index 867457d..c11e059 100644 ---- a/fs/nfs/nfs4proc.c -+++ b/fs/nfs/nfs4proc.c -@@ -730,13 +730,25 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) - res->sr_slot = NULL; - } - -+static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot, -+ u32 seqnr) -+{ -+ if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0) -+ slot->seq_nr_highest_sent = seqnr; -+} -+static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, -+ u32 seqnr) -+{ -+ slot->seq_nr_highest_sent = seqnr; -+ slot->seq_nr_last_acked = seqnr; -+} -+ - static int nfs41_sequence_process(struct rpc_task *task, - struct nfs4_sequence_res *res) - { - struct nfs4_session *session; - struct nfs4_slot *slot = res->sr_slot; - struct nfs_client *clp; -- bool interrupted = false; - int ret = 1; - - if (slot == NULL) -@@ -747,16 +759,12 @@ static int nfs41_sequence_process(struct rpc_task *task, - - session = slot->table->session; - -- if (slot->interrupted) { -- if (res->sr_status != -NFS4ERR_DELAY) -- slot->interrupted = 0; -- interrupted = true; -- } -- - trace_nfs4_sequence_done(session, res); - /* Check the SEQUENCE operation status */ - switch (res->sr_status) { - case 0: -+ /* Mark this sequence number as having been acked */ -+ nfs4_slot_sequence_acked(slot, slot->seq_nr); - /* Update the slot's sequence and clientid lease timer */ - slot->seq_done = 1; - clp = session->clp; -@@ -771,9 +779,9 @@ static int nfs41_sequence_process(struct rpc_task *task, - * sr_status remains 1 if an RPC level error occurred. - * The server may or may not have processed the sequence - * operation.. -- * Mark the slot as having hosted an interrupted RPC call. - */ -- slot->interrupted = 1; -+ nfs4_slot_sequence_record_sent(slot, slot->seq_nr); -+ slot->seq_done = 1; - goto out; - case -NFS4ERR_DELAY: - /* The server detected a resend of the RPC call and -@@ -784,6 +792,7 @@ static int nfs41_sequence_process(struct rpc_task *task, - __func__, - slot->slot_nr, - slot->seq_nr); -+ nfs4_slot_sequence_acked(slot, slot->seq_nr); - goto out_retry; - case -NFS4ERR_RETRY_UNCACHED_REP: - case -NFS4ERR_SEQ_FALSE_RETRY: -@@ -791,6 +800,7 @@ static int nfs41_sequence_process(struct rpc_task *task, - * The server thinks we tried to replay a request. - * Retry the call after bumping the sequence ID. - */ -+ nfs4_slot_sequence_acked(slot, slot->seq_nr); - goto retry_new_seq; - case -NFS4ERR_BADSLOT: - /* -@@ -801,21 +811,28 @@ static int nfs41_sequence_process(struct rpc_task *task, - goto session_recover; - goto retry_nowait; - case -NFS4ERR_SEQ_MISORDERED: -+ nfs4_slot_sequence_record_sent(slot, slot->seq_nr); - /* -- * Was the last operation on this sequence interrupted? -- * If so, retry after bumping the sequence number. -+ * Were one or more calls using this slot interrupted? -+ * If the server never received the request, then our -+ * transmitted slot sequence number may be too high. - */ -- if (interrupted) -- goto retry_new_seq; -- /* -- * Could this slot have been previously retired? -- * If so, then the server may be expecting seq_nr = 1! -- */ -- if (slot->seq_nr != 1) { -- slot->seq_nr = 1; -+ if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) { -+ slot->seq_nr--; - goto retry_nowait; - } -- goto session_recover; -+ /* -+ * RFC5661: -+ * A retry might be sent while the original request is -+ * still in progress on the replier. The replier SHOULD -+ * deal with the issue by returning NFS4ERR_DELAY as the -+ * reply to SEQUENCE or CB_SEQUENCE operation, but -+ * implementations MAY return NFS4ERR_SEQ_MISORDERED. -+ * -+ * Restart the search after a delay. -+ */ -+ slot->seq_nr = slot->seq_nr_highest_sent; -+ goto out_retry; - default: - /* Just update the slot sequence no. */ - slot->seq_done = 1; -@@ -906,17 +923,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { - .rpc_call_done = nfs41_call_sync_done, - }; - --static void --nfs4_sequence_process_interrupted(struct nfs_client *client, -- struct nfs4_slot *slot, struct rpc_cred *cred) --{ -- struct rpc_task *task; -- -- task = _nfs41_proc_sequence(client, cred, slot, true); -- if (!IS_ERR(task)) -- rpc_put_task_async(task); --} -- - #else /* !CONFIG_NFS_V4_1 */ - - static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) -@@ -937,14 +943,6 @@ int nfs4_sequence_done(struct rpc_task *task, - } - EXPORT_SYMBOL_GPL(nfs4_sequence_done); - --static void --nfs4_sequence_process_interrupted(struct nfs_client *client, -- struct nfs4_slot *slot, struct rpc_cred *cred) --{ -- WARN_ON_ONCE(1); -- slot->interrupted = 0; --} -- - #endif /* !CONFIG_NFS_V4_1 */ - - static -@@ -982,26 +980,19 @@ int nfs4_setup_sequence(struct nfs_client *client, - task->tk_timeout = 0; - } - -- for (;;) { -- spin_lock(&tbl->slot_tbl_lock); -- /* The state manager will wait until the slot table is empty */ -- if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) -- goto out_sleep; -- -- slot = nfs4_alloc_slot(tbl); -- if (IS_ERR(slot)) { -- /* Try again in 1/4 second */ -- if (slot == ERR_PTR(-ENOMEM)) -- task->tk_timeout = HZ >> 2; -- goto out_sleep; -- } -- spin_unlock(&tbl->slot_tbl_lock); -+ spin_lock(&tbl->slot_tbl_lock); -+ /* The state manager will wait until the slot table is empty */ -+ if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) -+ goto out_sleep; - -- if (likely(!slot->interrupted)) -- break; -- nfs4_sequence_process_interrupted(client, -- slot, task->tk_msg.rpc_cred); -+ slot = nfs4_alloc_slot(tbl); -+ if (IS_ERR(slot)) { -+ /* Try again in 1/4 second */ -+ if (slot == ERR_PTR(-ENOMEM)) -+ task->tk_timeout = HZ >> 2; -+ goto out_sleep; - } -+ spin_unlock(&tbl->slot_tbl_lock); - - nfs4_sequence_attach_slot(args, res, slot); - -diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c -index 769b856..fdb75da 100644 ---- a/fs/nfs/nfs4session.c -+++ b/fs/nfs/nfs4session.c -@@ -110,6 +110,8 @@ static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl, - slot->table = tbl; - slot->slot_nr = slotid; - slot->seq_nr = seq_init; -+ slot->seq_nr_highest_sent = seq_init; -+ slot->seq_nr_last_acked = seq_init - 1; - } - return slot; - } -@@ -276,7 +278,8 @@ static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, - p = &tbl->slots; - while (*p) { - (*p)->seq_nr = ivalue; -- (*p)->interrupted = 0; -+ (*p)->seq_nr_highest_sent = ivalue; -+ (*p)->seq_nr_last_acked = ivalue - 1; - p = &(*p)->next; - } - tbl->highest_used_slotid = NFS4_NO_SLOT; -diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h -index 3c550f2..230509b 100644 ---- a/fs/nfs/nfs4session.h -+++ b/fs/nfs/nfs4session.h -@@ -23,8 +23,9 @@ struct nfs4_slot { - unsigned long generation; - u32 slot_nr; - u32 seq_nr; -- unsigned int interrupted : 1, -- privileged : 1, -+ u32 seq_nr_last_acked; -+ u32 seq_nr_highest_sent; -+ unsigned int privileged : 1, - seq_done : 1; - }; - --- -2.20.1 - --- -Trond Myklebust -Linux NFS client maintainer, Hammerspace -trond.myklebust@hammerspace.com - - - +From 675f11241a9f5b434effc7aee9eb84bf3d17d685 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Wed, 20 Jun 2018 17:53:34 -0400 +Subject: [PATCH] NFSv4.1: Avoid false retries when RPC calls are interrupted + +A 'false retry' in NFSv4.1 occurs when the client attempts to transmit a +new RPC call using a slot+sequence number combination that references an +already cached one. Currently, the Linux NFS client will do this if a +user process interrupts an RPC call that is in progress. +The problem with doing so is that we defeat the main mechanism used by +the server to differentiate between a new call and a replayed one. Even +if the server is able to perfectly cache the arguments of the old call, +it cannot know if the client intended to replay or send a new call. + +The obvious fix is to bump the sequence number pre-emptively if an +RPC call is interrupted, but in order to deal with the corner cases +where the interrupted call is not actually received and processed by +the server, we need to interpret the error NFS4ERR_SEQ_MISORDERED +as a sign that we need to either wait or locate a correct sequence +number that lies between the value we sent, and the last value that +was acked by a SEQUENCE call on that slot. + +Signed-off-by: Trond Myklebust +--- + fs/nfs/nfs4proc.c | 105 ++++++++++++++++++++----------------------- + fs/nfs/nfs4session.c | 5 ++- + fs/nfs/nfs4session.h | 5 ++- + 3 files changed, 55 insertions(+), 60 deletions(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 64ac80ec6b7b..3a6a9c9ee369 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -730,13 +730,25 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) + res->sr_slot = NULL; + } + ++static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot, ++ u32 seqnr) ++{ ++ if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0) ++ slot->seq_nr_highest_sent = seqnr; ++} ++static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, ++ u32 seqnr) ++{ ++ slot->seq_nr_highest_sent = seqnr; ++ slot->seq_nr_last_acked = seqnr; ++} ++ + static int nfs41_sequence_process(struct rpc_task *task, + struct nfs4_sequence_res *res) + { + struct nfs4_session *session; + struct nfs4_slot *slot = res->sr_slot; + struct nfs_client *clp; +- bool interrupted = false; + int ret = 1; + + if (slot == NULL) +@@ -747,16 +759,12 @@ static int nfs41_sequence_process(struct rpc_task *task, + + session = slot->table->session; + +- if (slot->interrupted) { +- if (res->sr_status != -NFS4ERR_DELAY) +- slot->interrupted = 0; +- interrupted = true; +- } +- + trace_nfs4_sequence_done(session, res); + /* Check the SEQUENCE operation status */ + switch (res->sr_status) { + case 0: ++ /* Mark this sequence number as having been acked */ ++ nfs4_slot_sequence_acked(slot, slot->seq_nr); + /* Update the slot's sequence and clientid lease timer */ + slot->seq_done = 1; + clp = session->clp; +@@ -771,9 +779,9 @@ static int nfs41_sequence_process(struct rpc_task *task, + * sr_status remains 1 if an RPC level error occurred. + * The server may or may not have processed the sequence + * operation.. +- * Mark the slot as having hosted an interrupted RPC call. + */ +- slot->interrupted = 1; ++ nfs4_slot_sequence_record_sent(slot, slot->seq_nr); ++ slot->seq_done = 1; + goto out; + case -NFS4ERR_DELAY: + /* The server detected a resend of the RPC call and +@@ -784,6 +792,7 @@ static int nfs41_sequence_process(struct rpc_task *task, + __func__, + slot->slot_nr, + slot->seq_nr); ++ nfs4_slot_sequence_acked(slot, slot->seq_nr); + goto out_retry; + case -NFS4ERR_RETRY_UNCACHED_REP: + case -NFS4ERR_SEQ_FALSE_RETRY: +@@ -791,6 +800,7 @@ static int nfs41_sequence_process(struct rpc_task *task, + * The server thinks we tried to replay a request. + * Retry the call after bumping the sequence ID. + */ ++ nfs4_slot_sequence_acked(slot, slot->seq_nr); + goto retry_new_seq; + case -NFS4ERR_BADSLOT: + /* +@@ -801,21 +811,28 @@ static int nfs41_sequence_process(struct rpc_task *task, + goto session_recover; + goto retry_nowait; + case -NFS4ERR_SEQ_MISORDERED: ++ nfs4_slot_sequence_record_sent(slot, slot->seq_nr); + /* +- * Was the last operation on this sequence interrupted? +- * If so, retry after bumping the sequence number. ++ * Were one or more calls using this slot interrupted? ++ * If the server never received the request, then our ++ * transmitted slot sequence number may be too high. + */ +- if (interrupted) +- goto retry_new_seq; +- /* +- * Could this slot have been previously retired? +- * If so, then the server may be expecting seq_nr = 1! +- */ +- if (slot->seq_nr != 1) { +- slot->seq_nr = 1; ++ if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) { ++ slot->seq_nr--; + goto retry_nowait; + } +- goto session_recover; ++ /* ++ * RFC5661: ++ * A retry might be sent while the original request is ++ * still in progress on the replier. The replier SHOULD ++ * deal with the issue by returning NFS4ERR_DELAY as the ++ * reply to SEQUENCE or CB_SEQUENCE operation, but ++ * implementations MAY return NFS4ERR_SEQ_MISORDERED. ++ * ++ * Restart the search after a delay. ++ */ ++ slot->seq_nr = slot->seq_nr_highest_sent; ++ goto out_retry; + default: + /* Just update the slot sequence no. */ + slot->seq_done = 1; +@@ -906,17 +923,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { + .rpc_call_done = nfs41_call_sync_done, + }; + +-static void +-nfs4_sequence_process_interrupted(struct nfs_client *client, +- struct nfs4_slot *slot, const struct cred *cred) +-{ +- struct rpc_task *task; +- +- task = _nfs41_proc_sequence(client, cred, slot, true); +- if (!IS_ERR(task)) +- rpc_put_task_async(task); +-} +- + #else /* !CONFIG_NFS_V4_1 */ + + static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) +@@ -937,14 +943,6 @@ int nfs4_sequence_done(struct rpc_task *task, + } + EXPORT_SYMBOL_GPL(nfs4_sequence_done); + +-static void +-nfs4_sequence_process_interrupted(struct nfs_client *client, +- struct nfs4_slot *slot, const struct cred *cred) +-{ +- WARN_ON_ONCE(1); +- slot->interrupted = 0; +-} +- + #endif /* !CONFIG_NFS_V4_1 */ + + static void nfs41_sequence_res_init(struct nfs4_sequence_res *res) +@@ -985,26 +983,19 @@ int nfs4_setup_sequence(struct nfs_client *client, + task->tk_timeout = 0; + } + +- for (;;) { +- spin_lock(&tbl->slot_tbl_lock); +- /* The state manager will wait until the slot table is empty */ +- if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) +- goto out_sleep; +- +- slot = nfs4_alloc_slot(tbl); +- if (IS_ERR(slot)) { +- /* Try again in 1/4 second */ +- if (slot == ERR_PTR(-ENOMEM)) +- task->tk_timeout = HZ >> 2; +- goto out_sleep; +- } +- spin_unlock(&tbl->slot_tbl_lock); ++ spin_lock(&tbl->slot_tbl_lock); ++ /* The state manager will wait until the slot table is empty */ ++ if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) ++ goto out_sleep; + +- if (likely(!slot->interrupted)) +- break; +- nfs4_sequence_process_interrupted(client, +- slot, task->tk_msg.rpc_cred); ++ slot = nfs4_alloc_slot(tbl); ++ if (IS_ERR(slot)) { ++ /* Try again in 1/4 second */ ++ if (slot == ERR_PTR(-ENOMEM)) ++ task->tk_timeout = HZ >> 2; ++ goto out_sleep; + } ++ spin_unlock(&tbl->slot_tbl_lock); + + nfs4_sequence_attach_slot(args, res, slot); + +diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c +index a5489d70a724..39962c19744f 100644 +--- a/fs/nfs/nfs4session.c ++++ b/fs/nfs/nfs4session.c +@@ -110,6 +110,8 @@ static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl, + slot->table = tbl; + slot->slot_nr = slotid; + slot->seq_nr = seq_init; ++ slot->seq_nr_highest_sent = seq_init; ++ slot->seq_nr_last_acked = seq_init - 1; + } + return slot; + } +@@ -276,7 +278,8 @@ static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, + p = &tbl->slots; + while (*p) { + (*p)->seq_nr = ivalue; +- (*p)->interrupted = 0; ++ (*p)->seq_nr_highest_sent = ivalue; ++ (*p)->seq_nr_last_acked = ivalue - 1; + p = &(*p)->next; + } + tbl->highest_used_slotid = NFS4_NO_SLOT; +diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h +index 3c550f297561..230509b77121 100644 +--- a/fs/nfs/nfs4session.h ++++ b/fs/nfs/nfs4session.h +@@ -23,8 +23,9 @@ struct nfs4_slot { + unsigned long generation; + u32 slot_nr; + u32 seq_nr; +- unsigned int interrupted : 1, +- privileged : 1, ++ u32 seq_nr_last_acked; ++ u32 seq_nr_highest_sent; ++ unsigned int privileged : 1, + seq_done : 1; + }; + +-- +2.20.1 + -- cgit