summaryrefslogtreecommitdiff
path: root/net/sunrpc
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c2
-rw-r--r--net/sunrpc/clnt.c3
-rw-r--r--net/sunrpc/sched.c4
-rw-r--r--net/sunrpc/svc.c111
-rw-r--r--net/sunrpc/svc_xprt.c1
-rw-r--r--net/sunrpc/sysctl.c4
-rw-r--r--net/sunrpc/xprtrdma/Makefile2
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c9
-rw-r--r--net/sunrpc/xprtrdma/ib_client.c183
-rw-r--r--net/sunrpc/xprtrdma/module.c18
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c83
-rw-r--r--net/sunrpc/xprtrdma/verbs.c100
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h5
15 files changed, 399 insertions, 131 deletions
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 06d8ee0db000..4eb19c3a54c7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -168,7 +168,7 @@ static int krb5_DK(const struct gss_krb5_enctype *gk5e,
goto err_return;
blocksize = crypto_sync_skcipher_blocksize(cipher);
if (crypto_sync_skcipher_setkey(cipher, inkey->data, inkey->len))
- goto err_return;
+ goto err_free_cipher;
ret = -ENOMEM;
inblockdata = kmalloc(blocksize, gfp_mask);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cfd1b1bf7e35..09f29a95f2bc 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2326,12 +2326,13 @@ call_transmit_status(struct rpc_task *task)
task->tk_action = call_transmit;
task->tk_status = 0;
break;
- case -ECONNREFUSED:
case -EHOSTDOWN:
case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPERM:
+ break;
+ case -ECONNREFUSED:
if (RPC_IS_SOFTCONN(task)) {
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt,
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6debf4fd42d4..cef623ea1506 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -369,8 +369,10 @@ static void rpc_make_runnable(struct workqueue_struct *wq,
if (RPC_IS_ASYNC(task)) {
INIT_WORK(&task->u.tk_work, rpc_async_schedule);
queue_work(wq, &task->u.tk_work);
- } else
+ } else {
+ smp_mb__after_atomic();
wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
+ }
}
/*
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d9cda1e53a01..88a59cfa5583 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -72,57 +72,100 @@ static struct svc_pool_map svc_pool_map = {
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
static int
-param_set_pool_mode(const char *val, const struct kernel_param *kp)
+__param_set_pool_mode(const char *val, struct svc_pool_map *m)
{
- int *ip = (int *)kp->arg;
- struct svc_pool_map *m = &svc_pool_map;
- int err;
+ int err, mode;
mutex_lock(&svc_pool_map_mutex);
- err = -EBUSY;
- if (m->count)
- goto out;
-
err = 0;
if (!strncmp(val, "auto", 4))
- *ip = SVC_POOL_AUTO;
+ mode = SVC_POOL_AUTO;
else if (!strncmp(val, "global", 6))
- *ip = SVC_POOL_GLOBAL;
+ mode = SVC_POOL_GLOBAL;
else if (!strncmp(val, "percpu", 6))
- *ip = SVC_POOL_PERCPU;
+ mode = SVC_POOL_PERCPU;
else if (!strncmp(val, "pernode", 7))
- *ip = SVC_POOL_PERNODE;
+ mode = SVC_POOL_PERNODE;
else
err = -EINVAL;
+ if (err)
+ goto out;
+
+ if (m->count == 0)
+ m->mode = mode;
+ else if (mode != m->mode)
+ err = -EBUSY;
out:
mutex_unlock(&svc_pool_map_mutex);
return err;
}
static int
-param_get_pool_mode(char *buf, const struct kernel_param *kp)
+param_set_pool_mode(const char *val, const struct kernel_param *kp)
+{
+ struct svc_pool_map *m = kp->arg;
+
+ return __param_set_pool_mode(val, m);
+}
+
+int sunrpc_set_pool_mode(const char *val)
+{
+ return __param_set_pool_mode(val, &svc_pool_map);
+}
+EXPORT_SYMBOL(sunrpc_set_pool_mode);
+
+/**
+ * sunrpc_get_pool_mode - get the current pool_mode for the host
+ * @buf: where to write the current pool_mode
+ * @size: size of @buf
+ *
+ * Grab the current pool_mode from the svc_pool_map and write
+ * the resulting string to @buf. Returns the number of characters
+ * written to @buf (a'la snprintf()).
+ */
+int
+sunrpc_get_pool_mode(char *buf, size_t size)
{
- int *ip = (int *)kp->arg;
+ struct svc_pool_map *m = &svc_pool_map;
- switch (*ip)
+ switch (m->mode)
{
case SVC_POOL_AUTO:
- return sysfs_emit(buf, "auto\n");
+ return snprintf(buf, size, "auto");
case SVC_POOL_GLOBAL:
- return sysfs_emit(buf, "global\n");
+ return snprintf(buf, size, "global");
case SVC_POOL_PERCPU:
- return sysfs_emit(buf, "percpu\n");
+ return snprintf(buf, size, "percpu");
case SVC_POOL_PERNODE:
- return sysfs_emit(buf, "pernode\n");
+ return snprintf(buf, size, "pernode");
default:
- return sysfs_emit(buf, "%d\n", *ip);
+ return snprintf(buf, size, "%d", m->mode);
}
}
+EXPORT_SYMBOL(sunrpc_get_pool_mode);
+
+static int
+param_get_pool_mode(char *buf, const struct kernel_param *kp)
+{
+ char str[16];
+ int len;
+
+ len = sunrpc_get_pool_mode(str, ARRAY_SIZE(str));
+
+ /* Ensure we have room for newline and NUL */
+ len = min_t(int, len, ARRAY_SIZE(str) - 2);
+
+ /* tack on the newline */
+ str[len] = '\n';
+ str[len + 1] = '\0';
+
+ return sysfs_emit(buf, "%s", str);
+}
module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
- &svc_pool_map.mode, 0644);
+ &svc_pool_map, 0644);
/*
* Detect best pool mapping mode heuristically,
@@ -250,10 +293,8 @@ svc_pool_map_get(void)
int npools = -1;
mutex_lock(&svc_pool_map_mutex);
-
if (m->count++) {
mutex_unlock(&svc_pool_map_mutex);
- WARN_ON_ONCE(m->npools <= 1);
return m->npools;
}
@@ -275,32 +316,21 @@ svc_pool_map_get(void)
m->mode = SVC_POOL_GLOBAL;
}
m->npools = npools;
-
- if (npools == 1)
- /* service is unpooled, so doesn't hold a reference */
- m->count--;
-
mutex_unlock(&svc_pool_map_mutex);
return npools;
}
/*
- * Drop a reference to the global map of cpus to pools, if
- * pools were in use, i.e. if npools > 1.
+ * Drop a reference to the global map of cpus to pools.
* When the last reference is dropped, the map data is
- * freed; this allows the sysadmin to change the pool
- * mode using the pool_mode module option without
- * rebooting or re-loading sunrpc.ko.
+ * freed; this allows the sysadmin to change the pool.
*/
static void
-svc_pool_map_put(int npools)
+svc_pool_map_put(void)
{
struct svc_pool_map *m = &svc_pool_map;
- if (npools <= 1)
- return;
mutex_lock(&svc_pool_map_mutex);
-
if (!--m->count) {
kfree(m->to_pool);
m->to_pool = NULL;
@@ -308,7 +338,6 @@ svc_pool_map_put(int npools)
m->pool_to = NULL;
m->npools = 0;
}
-
mutex_unlock(&svc_pool_map_mutex);
}
@@ -553,9 +582,10 @@ struct svc_serv *svc_create_pooled(struct svc_program *prog,
serv = __svc_create(prog, stats, bufsize, npools, threadfn);
if (!serv)
goto out_err;
+ serv->sv_is_pooled = true;
return serv;
out_err:
- svc_pool_map_put(npools);
+ svc_pool_map_put();
return NULL;
}
EXPORT_SYMBOL_GPL(svc_create_pooled);
@@ -585,7 +615,8 @@ svc_destroy(struct svc_serv **servp)
cache_clean_deferred(serv);
- svc_pool_map_put(serv->sv_nrpools);
+ if (serv->sv_is_pooled)
+ svc_pool_map_put();
for (i = 0; i < serv->sv_nrpools; i++) {
struct svc_pool *pool = &serv->sv_pools[i];
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index dd86d7f1e97e..d3735ab3e6d1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -157,6 +157,7 @@ int svc_print_xprts(char *buf, int maxlen)
*/
void svc_xprt_deferred_close(struct svc_xprt *xprt)
{
+ trace_svc_xprt_close(xprt);
if (!test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags))
svc_xprt_enqueue(xprt);
}
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 5f3170a1c9bb..bdb587a72422 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(nlm_debug);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-static int proc_do_xprt(struct ctl_table *table, int write,
+static int proc_do_xprt(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[256];
@@ -62,7 +62,7 @@ static int proc_do_xprt(struct ctl_table *table, int write,
}
static int
-proc_dodebug(struct ctl_table *table, int write, void *buffer, size_t *lenp,
+proc_dodebug(const struct ctl_table *table, int write, void *buffer, size_t *lenp,
loff_t *ppos)
{
char tmpbuf[20], *s = NULL;
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 55b21bae866d..3232aa23cdb4 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
-rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
+rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o \
svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
svc_rdma_pcl.o module.o
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index ffbf99894970..31434aeb8e29 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -54,7 +54,7 @@ static void frwr_cid_init(struct rpcrdma_ep *ep,
cid->ci_completion_id = mr->mr_ibmr->res.id;
}
-static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
+static void frwr_mr_unmap(struct rpcrdma_mr *mr)
{
if (mr->mr_device) {
trace_xprtrdma_mr_unmap(mr);
@@ -73,7 +73,7 @@ void frwr_mr_release(struct rpcrdma_mr *mr)
{
int rc;
- frwr_mr_unmap(mr->mr_xprt, mr);
+ frwr_mr_unmap(mr);
rc = ib_dereg_mr(mr->mr_ibmr);
if (rc)
@@ -84,7 +84,7 @@ void frwr_mr_release(struct rpcrdma_mr *mr)
static void frwr_mr_put(struct rpcrdma_mr *mr)
{
- frwr_mr_unmap(mr->mr_xprt, mr);
+ frwr_mr_unmap(mr);
/* The MR is returned to the req's MR free list instead
* of to the xprt's MR free list. No spinlock is needed.
@@ -92,7 +92,8 @@ static void frwr_mr_put(struct rpcrdma_mr *mr)
rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
}
-/* frwr_reset - Place MRs back on the free list
+/**
+ * frwr_reset - Place MRs back on @req's free list
* @req: request to reset
*
* Used after a failed marshal. For FRWR, this means the MRs
diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c
new file mode 100644
index 000000000000..8507cd4d8921
--- /dev/null
+++ b/net/sunrpc/xprtrdma/ib_client.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (c) 2024 Oracle. All rights reserved.
+ */
+
+/* #include <linux/module.h>
+#include <linux/slab.h> */
+#include <linux/xarray.h>
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/completion.h>
+
+#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/rdma_rn.h>
+
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
+
+/* Per-ib_device private data for rpcrdma */
+struct rpcrdma_device {
+ struct kref rd_kref;
+ unsigned long rd_flags;
+ struct ib_device *rd_device;
+ struct xarray rd_xa;
+ struct completion rd_done;
+};
+
+#define RPCRDMA_RD_F_REMOVING (0)
+
+static struct ib_client rpcrdma_ib_client;
+
+/*
+ * Listeners have no associated device, so we never register them.
+ * Note that ib_get_client_data() does not check if @device is
+ * NULL for us.
+ */
+static struct rpcrdma_device *rpcrdma_get_client_data(struct ib_device *device)
+{
+ if (!device)
+ return NULL;
+ return ib_get_client_data(device, &rpcrdma_ib_client);
+}
+
+/**
+ * rpcrdma_rn_register - register to get device removal notifications
+ * @device: device to monitor
+ * @rn: notification object that wishes to be notified
+ * @done: callback to notify caller of device removal
+ *
+ * Returns zero on success. The callback in rn_done is guaranteed
+ * to be invoked when the device is removed, unless this notification
+ * is unregistered first.
+ *
+ * On failure, a negative errno is returned.
+ */
+int rpcrdma_rn_register(struct ib_device *device,
+ struct rpcrdma_notification *rn,
+ void (*done)(struct rpcrdma_notification *rn))
+{
+ struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
+
+ if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags))
+ return -ENETUNREACH;
+
+ if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0)
+ return -ENOMEM;
+ kref_get(&rd->rd_kref);
+ rn->rn_done = done;
+ trace_rpcrdma_client_register(device, rn);
+ return 0;
+}
+
+static void rpcrdma_rn_release(struct kref *kref)
+{
+ struct rpcrdma_device *rd = container_of(kref, struct rpcrdma_device,
+ rd_kref);
+
+ trace_rpcrdma_client_completion(rd->rd_device);
+ complete(&rd->rd_done);
+}
+
+/**
+ * rpcrdma_rn_unregister - stop device removal notifications
+ * @device: monitored device
+ * @rn: notification object that no longer wishes to be notified
+ */
+void rpcrdma_rn_unregister(struct ib_device *device,
+ struct rpcrdma_notification *rn)
+{
+ struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
+
+ if (!rd)
+ return;
+
+ trace_rpcrdma_client_unregister(device, rn);
+ xa_erase(&rd->rd_xa, rn->rn_index);
+ kref_put(&rd->rd_kref, rpcrdma_rn_release);
+}
+
+/**
+ * rpcrdma_add_one - ib_client device insertion callback
+ * @device: device about to be inserted
+ *
+ * Returns zero on success. xprtrdma private data has been allocated
+ * for this device. On failure, a negative errno is returned.
+ */
+static int rpcrdma_add_one(struct ib_device *device)
+{
+ struct rpcrdma_device *rd;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+
+ kref_init(&rd->rd_kref);
+ xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC);
+ rd->rd_device = device;
+ init_completion(&rd->rd_done);
+ ib_set_client_data(device, &rpcrdma_ib_client, rd);
+
+ trace_rpcrdma_client_add_one(device);
+ return 0;
+}
+
+/**
+ * rpcrdma_remove_one - ib_client device removal callback
+ * @device: device about to be removed
+ * @client_data: this module's private per-device data
+ *
+ * Upon return, all transports associated with @device have divested
+ * themselves from IB hardware resources.
+ */
+static void rpcrdma_remove_one(struct ib_device *device,
+ void *client_data)
+{
+ struct rpcrdma_device *rd = client_data;
+ struct rpcrdma_notification *rn;
+ unsigned long index;
+
+ trace_rpcrdma_client_remove_one(device);
+
+ set_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags);
+ xa_for_each(&rd->rd_xa, index, rn)
+ rn->rn_done(rn);
+
+ /*
+ * Wait only if there are still outstanding notification
+ * registrants for this device.
+ */
+ if (!refcount_dec_and_test(&rd->rd_kref.refcount)) {
+ trace_rpcrdma_client_wait_on(device);
+ wait_for_completion(&rd->rd_done);
+ }
+
+ trace_rpcrdma_client_remove_one_done(device);
+ kfree(rd);
+}
+
+static struct ib_client rpcrdma_ib_client = {
+ .name = "rpcrdma",
+ .add = rpcrdma_add_one,
+ .remove = rpcrdma_remove_one,
+};
+
+/**
+ * rpcrdma_ib_client_unregister - unregister ib_client for xprtrdma
+ *
+ * cel: watch for orphaned rpcrdma_device objects on module unload
+ */
+void rpcrdma_ib_client_unregister(void)
+{
+ ib_unregister_client(&rpcrdma_ib_client);
+}
+
+/**
+ * rpcrdma_ib_client_register - register ib_client for rpcrdma
+ *
+ * Returns zero on success, or a negative errno.
+ */
+int rpcrdma_ib_client_register(void)
+{
+ return ib_register_client(&rpcrdma_ib_client);
+}
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 45c5b41ac8dc..697f571d4c01 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/rdma_rn.h>
#include <asm/swab.h>
@@ -30,21 +31,32 @@ static void __exit rpc_rdma_cleanup(void)
{
xprt_rdma_cleanup();
svc_rdma_cleanup();
+ rpcrdma_ib_client_unregister();
}
static int __init rpc_rdma_init(void)
{
int rc;
+ rc = rpcrdma_ib_client_register();
+ if (rc)
+ goto out_rc;
+
rc = svc_rdma_init();
if (rc)
- goto out;
+ goto out_ib_client;
rc = xprt_rdma_init();
if (rc)
- svc_rdma_cleanup();
+ goto out_svc_rdma;
-out:
+ return 0;
+
+out_svc_rdma:
+ svc_rdma_cleanup();
+out_ib_client:
+ rpcrdma_ib_client_unregister();
+out_rc:
return rc;
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 190a4de239c8..1478c41c7e9d 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1471,8 +1471,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_ep->re_max_requests)
credits = r_xprt->rx_ep->re_max_requests;
- rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
- false);
+ rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1));
if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits);
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 474f7a98fe9e..58ae6ec4f25b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -74,7 +74,7 @@ enum {
SVCRDMA_COUNTER_BUFSIZ = sizeof(unsigned long long),
};
-static int svcrdma_counter_handler(struct ctl_table *table, int write,
+static int svcrdma_counter_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct percpu_counter *stat = (struct percpu_counter *)table->data;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 2b1c16b9547d..f15750cacacf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -65,6 +65,8 @@
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net, int node);
+static int svc_rdma_listen_handler(struct rdma_cm_id *cma_id,
+ struct rdma_cm_event *event);
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
@@ -122,6 +124,41 @@ static void qp_event_handler(struct ib_event *event, void *context)
}
}
+static struct rdma_cm_id *
+svc_rdma_create_listen_id(struct net *net, struct sockaddr *sap,
+ void *context)
+{
+ struct rdma_cm_id *listen_id;
+ int ret;
+
+ listen_id = rdma_create_id(net, svc_rdma_listen_handler, context,
+ RDMA_PS_TCP, IB_QPT_RC);
+ if (IS_ERR(listen_id))
+ return listen_id;
+
+ /* Allow both IPv4 and IPv6 sockets to bind a single port
+ * at the same time.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ ret = rdma_set_afonly(listen_id, 1);
+ if (ret)
+ goto out_destroy;
+#endif
+ ret = rdma_bind_addr(listen_id, sap);
+ if (ret)
+ goto out_destroy;
+
+ ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
+ if (ret)
+ goto out_destroy;
+
+ return listen_id;
+
+out_destroy:
+ rdma_destroy_id(listen_id);
+ return ERR_PTR(ret);
+}
+
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net, int node)
{
@@ -247,17 +284,31 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
*
* Return values:
* %0: Do not destroy @cma_id
- * %1: Destroy @cma_id (never returned here)
+ * %1: Destroy @cma_id
*
* NB: There is never a DEVICE_REMOVAL event for INADDR_ANY listeners.
*/
static int svc_rdma_listen_handler(struct rdma_cm_id *cma_id,
struct rdma_cm_event *event)
{
+ struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr;
+ struct svcxprt_rdma *cma_xprt = cma_id->context;
+ struct svc_xprt *cma_rdma = &cma_xprt->sc_xprt;
+ struct rdma_cm_id *listen_id;
+
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
handle_connect_req(cma_id, &event->param.conn);
break;
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ listen_id = svc_rdma_create_listen_id(cma_rdma->xpt_net,
+ sap, cma_xprt);
+ if (IS_ERR(listen_id)) {
+ pr_err("Listener dead, address change failed for device %s\n",
+ cma_id->device->name);
+ } else
+ cma_xprt->sc_cm_id = listen_id;
+ return 1;
default:
break;
}
@@ -307,7 +358,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
{
struct rdma_cm_id *listen_id;
struct svcxprt_rdma *cma_xprt;
- int ret;
if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
return ERR_PTR(-EAFNOSUPPORT);
@@ -317,30 +367,13 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
- listen_id = rdma_create_id(net, svc_rdma_listen_handler, cma_xprt,
- RDMA_PS_TCP, IB_QPT_RC);
+ listen_id = svc_rdma_create_listen_id(net, sa, cma_xprt);
if (IS_ERR(listen_id)) {
- ret = PTR_ERR(listen_id);
- goto err0;
+ kfree(cma_xprt);
+ return (struct svc_xprt *)listen_id;
}
-
- /* Allow both IPv4 and IPv6 sockets to bind a single port
- * at the same time.
- */
-#if IS_ENABLED(CONFIG_IPV6)
- ret = rdma_set_afonly(listen_id, 1);
- if (ret)
- goto err1;
-#endif
- ret = rdma_bind_addr(listen_id, sa);
- if (ret)
- goto err1;
cma_xprt->sc_cm_id = listen_id;
- ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
- if (ret)
- goto err1;
-
/*
* We need to use the address from the cm_id in case the
* caller specified 0 for the port number.
@@ -349,12 +382,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
return &cma_xprt->sc_xprt;
-
- err1:
- rdma_destroy_id(listen_id);
- err0:
- kfree(cma_xprt);
- return ERR_PTR(ret);
}
/*
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 432557a553e7..63262ef0c2e3 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -49,14 +49,14 @@
* o buffer memory
*/
+#include <linux/bitops.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/log2.h>
-#include <asm-generic/barrier.h>
-#include <asm/bitops.h>
+#include <asm/barrier.h>
#include <rdma/ib_cm.h>
@@ -69,13 +69,15 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_sendctx *sc);
static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt);
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep);
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_ep_get(struct rpcrdma_ep *ep);
static int rpcrdma_ep_put(struct rpcrdma_ep *ep);
static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction,
+ int node);
+static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction);
static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
@@ -222,7 +224,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
static int
rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
- struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr;
struct rpcrdma_ep *ep = id->context;
might_sleep();
@@ -241,14 +242,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->re_async_rc = -ENETUNREACH;
complete(&ep->re_done);
return 0;
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- pr_info("rpcrdma: removing device %s for %pISpc\n",
- ep->re_id->device->name, sap);
- switch (xchg(&ep->re_connect_status, -ENODEV)) {
- case 0: goto wake_connect_worker;
- case 1: goto disconnected;
- }
- return 0;
case RDMA_CM_EVENT_ADDR_CHANGE:
ep->re_connect_status = -ENODEV;
goto disconnected;
@@ -284,6 +277,14 @@ disconnected:
return 0;
}
+static void rpcrdma_ep_removal_done(struct rpcrdma_notification *rn)
+{
+ struct rpcrdma_ep *ep = container_of(rn, struct rpcrdma_ep, re_rn);
+
+ trace_xprtrdma_device_removal(ep->re_id);
+ xprt_force_disconnect(ep->re_xprt);
+}
+
static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_ep *ep)
{
@@ -323,6 +324,10 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
if (rc)
goto out;
+ rc = rpcrdma_rn_register(id->device, &ep->re_rn, rpcrdma_ep_removal_done);
+ if (rc)
+ goto out;
+
return id;
out:
@@ -350,6 +355,8 @@ static void rpcrdma_ep_destroy(struct kref *kref)
ib_dealloc_pd(ep->re_pd);
ep->re_pd = NULL;
+ rpcrdma_rn_unregister(ep->re_id->device, &ep->re_rn);
+
kfree(ep);
module_put(THIS_MODULE);
}
@@ -505,7 +512,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
* outstanding Receives.
*/
rpcrdma_ep_get(ep);
- rpcrdma_post_recvs(r_xprt, 1, true);
+ rpcrdma_post_recvs(r_xprt, 1);
rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
if (rc)
@@ -897,6 +904,8 @@ static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt)
static void rpcrdma_req_reset(struct rpcrdma_req *req)
{
+ struct rpcrdma_mr *mr;
+
/* Credits are valid for only one connection */
req->rl_slot.rq_cong = 0;
@@ -906,7 +915,19 @@ static void rpcrdma_req_reset(struct rpcrdma_req *req)
rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
- frwr_reset(req);
+ /* The verbs consumer can't know the state of an MR on the
+ * req->rl_registered list unless a successful completion
+ * has occurred, so they cannot be re-used.
+ */
+ while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
+ struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf;
+
+ spin_lock(&buf->rb_lock);
+ list_del(&mr->mr_all);
+ spin_unlock(&buf->rb_lock);
+
+ frwr_mr_release(mr);
+ }
}
/* ASSUMPTION: the rb_allreqs list is stable for the duration,
@@ -924,18 +945,20 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
}
static noinline
-struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
- bool temp)
+struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_ep *ep = r_xprt->rx_ep;
+ struct ib_device *device = ep->re_id->device;
struct rpcrdma_rep *rep;
rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS);
if (rep == NULL)
goto out;
- rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv,
- DMA_FROM_DEVICE);
+ rep->rr_rdmabuf = rpcrdma_regbuf_alloc_node(ep->re_inline_recv,
+ DMA_FROM_DEVICE,
+ ibdev_to_node(device));
if (!rep->rr_rdmabuf)
goto out_free;
@@ -950,7 +973,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
- rep->rr_temp = temp;
spin_lock(&buf->rb_lock);
list_add(&rep->rr_all, &buf->rb_all_reps);
@@ -969,17 +991,6 @@ static void rpcrdma_rep_free(struct rpcrdma_rep *rep)
kfree(rep);
}
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
-{
- struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
-
- spin_lock(&buf->rb_lock);
- list_del(&rep->rr_all);
- spin_unlock(&buf->rb_lock);
-
- rpcrdma_rep_free(rep);
-}
-
static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
{
struct llist_node *node;
@@ -1011,10 +1022,8 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
- list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
+ list_for_each_entry(rep, &buf->rb_all_reps, rr_all)
rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
- rep->rr_temp = true; /* Mark this rep for destruction */
- }
}
static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
@@ -1231,14 +1240,15 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
* or Replies they may be registered externally via frwr_map.
*/
static struct rpcrdma_regbuf *
-rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
+rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction,
+ int node)
{
struct rpcrdma_regbuf *rb;
- rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS);
+ rb = kmalloc_node(sizeof(*rb), XPRTRDMA_GFP_FLAGS, node);
if (!rb)
return NULL;
- rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS);
+ rb->rg_data = kmalloc_node(size, XPRTRDMA_GFP_FLAGS, node);
if (!rb->rg_data) {
kfree(rb);
return NULL;
@@ -1250,6 +1260,12 @@ rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
return rb;
}
+static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
+{
+ return rpcrdma_regbuf_alloc_node(size, direction, NUMA_NO_NODE);
+}
+
/**
* rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer
* @rb: regbuf to reallocate
@@ -1327,10 +1343,9 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
* rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance
* @needed: current credit grant
- * @temp: mark Receive buffers to be deleted after one use
*
*/
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
@@ -1344,8 +1359,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
if (likely(ep->re_receive_count > needed))
goto out;
needed -= ep->re_receive_count;
- if (!temp)
- needed += RPCRDMA_MAX_RECV_BATCH;
+ needed += RPCRDMA_MAX_RECV_BATCH;
if (atomic_inc_return(&ep->re_receiving) > 1)
goto out;
@@ -1354,12 +1368,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
wr = NULL;
while (needed) {
rep = rpcrdma_rep_get_locked(buf);
- if (rep && rep->rr_temp) {
- rpcrdma_rep_destroy(rep);
- continue;
- }
if (!rep)
- rep = rpcrdma_rep_create(r_xprt, temp);
+ rep = rpcrdma_rep_create(r_xprt);
if (!rep)
break;
if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index da409450dfc0..8147d2b41494 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -56,6 +56,7 @@
#include <linux/sunrpc/rpc_rdma_cid.h> /* completion IDs */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
+#include <linux/sunrpc/rdma_rn.h> /* removal notifications */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
@@ -92,6 +93,7 @@ struct rpcrdma_ep {
struct rpcrdma_connect_private
re_cm_private;
struct rdma_conn_param re_remote_cma;
+ struct rpcrdma_notification re_rn;
int re_receive_count;
unsigned int re_max_requests; /* depends on device */
unsigned int re_inline_send; /* negotiated */
@@ -198,7 +200,6 @@ struct rpcrdma_rep {
__be32 rr_proc;
int rr_wc_flags;
u32 rr_inv_rkey;
- bool rr_temp;
struct rpcrdma_regbuf *rr_rdmabuf;
struct rpcrdma_xprt *rr_rxprt;
struct rpc_rqst *rr_rqst;
@@ -466,7 +467,7 @@ void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc);
int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed);
/*
* Buffer calls - xprtrdma/verbs.c