summaryrefslogtreecommitdiff
path: root/drivers/staging/lustre/lnet
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lnet')
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c16
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h18
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c201
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c5
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h8
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c48
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c207
-rw-r--r--drivers/staging/lustre/lnet/libcfs/debug.c9
-rw-r--r--drivers/staging/lustre/lnet/libcfs/fail.c6
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_string.c2
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c17
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c4
-rw-r--r--drivers/staging/lustre/lnet/lnet/api-ni.c46
-rw-r--r--drivers/staging/lustre/lnet/lnet/config.c14
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-md.c30
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-move.c363
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-msg.c18
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-socket.c21
-rw-r--r--drivers/staging/lustre/lnet/lnet/lo.c39
-rw-r--r--drivers/staging/lustre/lnet/lnet/router.c20
-rw-r--r--drivers/staging/lustre/lnet/selftest/brw_test.c4
-rw-r--r--drivers/staging/lustre/lnet/selftest/conrpc.c15
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.c2
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.h1
-rw-r--r--drivers/staging/lustre/lnet/selftest/framework.c4
-rw-r--r--drivers/staging/lustre/lnet/selftest/rpc.c8
26 files changed, 398 insertions, 728 deletions
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 4f5978b3767b..c7a5d49e487f 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -128,6 +128,7 @@ static int kiblnd_msgtype2size(int type)
static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
{
struct kib_rdma_desc *rd;
+ int msg_size;
int nob;
int n;
int i;
@@ -146,12 +147,6 @@ static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
n = rd->rd_nfrags;
- if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) {
- CERROR("Bad nfrags: %d, should be 0 < n <= %d\n",
- n, IBLND_MAX_RDMA_FRAGS);
- return 1;
- }
-
nob = offsetof(struct kib_msg, ibm_u) +
kiblnd_rd_msg_size(rd, msg->ibm_type, n);
@@ -161,6 +156,13 @@ static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
return 1;
}
+ msg_size = kiblnd_rd_size(rd);
+ if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) {
+ CERROR("Bad msg_size: %d, should be 0 < n <= %d\n",
+ msg_size, LNET_MAX_PAYLOAD);
+ return 1;
+ }
+
if (!flip)
return 0;
@@ -618,7 +620,7 @@ static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt)
}
struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid,
- int state, int version)
+ int state, int version)
{
/*
* CAVEAT EMPTOR:
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 078a0c3e8845..14576977200f 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -113,8 +113,9 @@ extern struct kib_tunables kiblnd_tunables;
#define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1)
#define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0)
-#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */
-#define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */
+#define IBLND_FRAG_SHIFT (PAGE_SHIFT - 12) /* frag size on wire is in 4K units */
+#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */
+#define IBLND_MAX_RDMA_FRAGS (LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */
/************************/
/* derived constants... */
@@ -133,8 +134,8 @@ extern struct kib_tunables kiblnd_tunables;
/* WRs and CQEs (per connection) */
#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c)
#define IBLND_SEND_WRS(c) \
- ((c->ibc_max_frags + 1) * kiblnd_concurrent_sends(c->ibc_version, \
- c->ibc_peer->ibp_ni))
+ (((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \
+ kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni))
#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
struct kib_hca_dev;
@@ -582,6 +583,8 @@ struct kib_peer {
unsigned short ibp_connecting;
/* reconnect this peer later */
unsigned short ibp_reconnecting:1;
+ /* counter of how many times we triggered a conn race */
+ unsigned char ibp_races;
/* # consecutive reconnection attempts to this peer */
unsigned int ibp_reconnected;
/* errno on closing this peer */
@@ -607,14 +610,14 @@ kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
mod = tunables->lnd_map_on_demand;
- return mod ? mod : IBLND_MAX_RDMA_FRAGS;
+ return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT;
}
static inline int
kiblnd_rdma_frags(int version, struct lnet_ni *ni)
{
return version == IBLND_MSG_VERSION_1 ?
- IBLND_MAX_RDMA_FRAGS :
+ (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) :
kiblnd_cfg_rdma_frags(ni);
}
@@ -1034,5 +1037,4 @@ int kiblnd_post_rx(struct kib_rx *rx, int credit);
int kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
+ struct iov_iter *to, unsigned int rlen);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 596a697b9d39..b27de8888149 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -36,16 +36,19 @@
#include "o2iblnd.h"
+#define MAX_CONN_RACES_BEFORE_ABORT 20
+
static void kiblnd_peer_alive(struct kib_peer *peer);
static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error);
-static void kiblnd_check_sends(struct kib_conn *conn);
static void kiblnd_init_tx_msg(lnet_ni_t *ni, struct kib_tx *tx,
- int type, int body_nob);
+ int type, int body_nob);
static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
- int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie);
+ int resid, struct kib_rdma_desc *dstrd,
+ __u64 dstcookie);
static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn);
static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn);
static void kiblnd_unmap_tx(lnet_ni_t *ni, struct kib_tx *tx);
+static void kiblnd_check_sends_locked(struct kib_conn *conn);
static void
kiblnd_tx_done(lnet_ni_t *ni, struct kib_tx *tx)
@@ -211,9 +214,9 @@ kiblnd_post_rx(struct kib_rx *rx, int credit)
conn->ibc_outstanding_credits++;
else
conn->ibc_reserved_credits++;
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
- kiblnd_check_sends(conn);
out:
kiblnd_conn_decref(conn);
return rc;
@@ -344,8 +347,8 @@ kiblnd_handle_rx(struct kib_rx *rx)
!IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
conn->ibc_outstanding_credits++;
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
- kiblnd_check_sends(conn);
}
switch (msg->ibm_type) {
@@ -648,7 +651,7 @@ static int kiblnd_map_tx(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc
static int
kiblnd_setup_rd_iov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
- unsigned int niov, struct kvec *iov, int offset, int nob)
+ unsigned int niov, const struct kvec *iov, int offset, int nob)
{
struct kib_net *net = ni->ni_data;
struct page *page;
@@ -705,7 +708,7 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
static int
kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
- int nkiov, lnet_kiov_t *kiov, int offset, int nob)
+ int nkiov, const lnet_kiov_t *kiov, int offset, int nob)
{
struct kib_net *net = ni->ni_data;
struct scatterlist *sg;
@@ -717,8 +720,8 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
LASSERT(nkiov > 0);
LASSERT(net);
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
+ while (offset >= kiov->bv_len) {
+ offset -= kiov->bv_len;
nkiov--;
kiov++;
LASSERT(nkiov > 0);
@@ -728,10 +731,10 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
do {
LASSERT(nkiov > 0);
- fragnob = min((int)(kiov->kiov_len - offset), nob);
+ fragnob = min((int)(kiov->bv_len - offset), nob);
- sg_set_page(sg, kiov->kiov_page, fragnob,
- kiov->kiov_offset + offset);
+ sg_set_page(sg, kiov->bv_page, fragnob,
+ kiov->bv_offset + offset);
sg = sg_next(sg);
if (!sg) {
CERROR("lacking enough sg entries to map tx\n");
@@ -761,7 +764,6 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
LASSERT(tx->tx_queued);
/* We rely on this for QP sizing */
LASSERT(tx->tx_nwrq > 0);
- LASSERT(tx->tx_nwrq <= 1 + conn->ibc_max_frags);
LASSERT(!credit || credit == 1);
LASSERT(conn->ibc_outstanding_credits >= 0);
@@ -800,7 +802,7 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) {
/*
* OK to drop when posted enough NOOPs, since
- * kiblnd_check_sends will queue NOOP again when
+ * kiblnd_check_sends_locked will queue NOOP again when
* posted NOOPs complete
*/
spin_unlock(&conn->ibc_lock);
@@ -905,7 +907,7 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
}
static void
-kiblnd_check_sends(struct kib_conn *conn)
+kiblnd_check_sends_locked(struct kib_conn *conn)
{
int ver = conn->ibc_version;
lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
@@ -918,8 +920,6 @@ kiblnd_check_sends(struct kib_conn *conn)
return;
}
- spin_lock(&conn->ibc_lock);
-
LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni));
LASSERT(!IBLND_OOB_CAPABLE(ver) ||
conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
@@ -969,8 +969,6 @@ kiblnd_check_sends(struct kib_conn *conn)
if (kiblnd_post_tx_locked(conn, tx, credit))
break;
}
-
- spin_unlock(&conn->ibc_lock);
}
static void
@@ -1016,16 +1014,11 @@ kiblnd_tx_complete(struct kib_tx *tx, int status)
if (idle)
list_del(&tx->tx_list);
- kiblnd_conn_addref(conn); /* 1 ref for me.... */
-
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
if (idle)
kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx);
-
- kiblnd_check_sends(conn);
-
- kiblnd_conn_decref(conn); /* ...until here */
}
static void
@@ -1078,6 +1071,15 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
LASSERT(type == IBLND_MSG_GET_DONE ||
type == IBLND_MSG_PUT_DONE);
+ if (kiblnd_rd_size(srcrd) > conn->ibc_max_frags << PAGE_SHIFT) {
+ CERROR("RDMA is too large for peer %s (%d), src size: %d dst size: %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid),
+ conn->ibc_max_frags << PAGE_SHIFT,
+ kiblnd_rd_size(srcrd), kiblnd_rd_size(dstrd));
+ rc = -EMSGSIZE;
+ goto too_big;
+ }
+
while (resid > 0) {
if (srcidx >= srcrd->rd_nfrags) {
CERROR("Src buffer exhausted: %d frags\n", srcidx);
@@ -1091,10 +1093,10 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
break;
}
- if (tx->tx_nwrq >= conn->ibc_max_frags) {
+ if (tx->tx_nwrq >= IBLND_MAX_RDMA_FRAGS) {
CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid),
- conn->ibc_max_frags,
+ IBLND_MAX_RDMA_FRAGS,
srcidx, srcrd->rd_nfrags,
dstidx, dstrd->rd_nfrags);
rc = -EMSGSIZE;
@@ -1132,7 +1134,7 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
wrq++;
sge++;
}
-
+too_big:
if (rc < 0) /* no RDMA if completing with failure */
tx->tx_nwrq = 0;
@@ -1204,9 +1206,8 @@ kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn)
{
spin_lock(&conn->ibc_lock);
kiblnd_queue_tx_locked(tx, conn);
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
-
- kiblnd_check_sends(conn);
}
static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
@@ -1499,6 +1500,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
unsigned int payload_offset = lntmsg->msg_offset;
unsigned int payload_nob = lntmsg->msg_len;
+ struct iov_iter from;
struct kib_msg *ibmsg;
struct kib_rdma_desc *rd;
struct kib_tx *tx;
@@ -1518,6 +1520,17 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
/* payload is either all vaddrs or all pages */
LASSERT(!(payload_kiov && payload_iov));
+ if (payload_kiov)
+ iov_iter_bvec(&from, ITER_BVEC | WRITE,
+ payload_kiov, payload_niov,
+ payload_nob + payload_offset);
+ else
+ iov_iter_kvec(&from, ITER_KVEC | WRITE,
+ payload_iov, payload_niov,
+ payload_nob + payload_offset);
+
+ iov_iter_advance(&from, payload_offset);
+
switch (type) {
default:
LBUG();
@@ -1637,17 +1650,8 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
ibmsg = tx->tx_msg;
ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
- if (payload_kiov)
- lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- else
- lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-
+ copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, IBLND_MSG_SIZE,
+ &from);
nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
@@ -1719,8 +1723,7 @@ kiblnd_reply(lnet_ni_t *ni, struct kib_rx *rx, lnet_msg_t *lntmsg)
int
kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
+ struct iov_iter *to, unsigned int rlen)
{
struct kib_rx *rx = private;
struct kib_msg *rxmsg = rx->rx_msg;
@@ -1730,10 +1733,9 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
int post_credit = IBLND_POSTRX_PEER_CREDIT;
int rc = 0;
- LASSERT(mlen <= rlen);
+ LASSERT(iov_iter_count(to) <= rlen);
LASSERT(!in_interrupt());
/* Either all pages or all vaddrs */
- LASSERT(!(kiov && iov));
switch (rxmsg->ibm_type) {
default:
@@ -1749,16 +1751,8 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
break;
}
- if (kiov)
- lnet_copy_flat2kiov(niov, kiov, offset,
- IBLND_MSG_SIZE, rxmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- mlen);
- else
- lnet_copy_flat2iov(niov, iov, offset,
- IBLND_MSG_SIZE, rxmsg,
- offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
- mlen);
+ copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload,
+ IBLND_MSG_SIZE, to);
lnet_finalize(ni, lntmsg, 0);
break;
@@ -1766,7 +1760,7 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
struct kib_msg *txmsg;
struct kib_rdma_desc *rd;
- if (!mlen) {
+ if (!iov_iter_count(to)) {
lnet_finalize(ni, lntmsg, 0);
kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0,
rxmsg->ibm_u.putreq.ibprm_cookie);
@@ -1784,12 +1778,16 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
txmsg = tx->tx_msg;
rd = &txmsg->ibm_u.putack.ibpam_rd;
- if (!kiov)
+ if (!(to->type & ITER_BVEC))
rc = kiblnd_setup_rd_iov(ni, tx, rd,
- niov, iov, offset, mlen);
+ to->nr_segs, to->kvec,
+ to->iov_offset,
+ iov_iter_count(to));
else
rc = kiblnd_setup_rd_kiov(ni, tx, rd,
- niov, kiov, offset, mlen);
+ to->nr_segs, to->bvec,
+ to->iov_offset,
+ iov_iter_count(to));
if (rc) {
CERROR("Can't setup PUT sink for %s: %d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
@@ -2183,14 +2181,11 @@ kiblnd_connreq_done(struct kib_conn *conn, int status)
return;
}
- /**
- * refcount taken by cmid is not reliable after I released the glock
- * because this connection is visible to other threads now, another
- * thread can find and close this connection right after I released
- * the glock, if kiblnd_cm_callback for RDMA_CM_EVENT_DISCONNECTED is
- * called, it can release the connection refcount taken by cmid.
- * It means the connection could be destroyed before I finish my
- * operations on it.
+ /*
+ * +1 ref for myself, this connection is visible to other threads
+ * now, refcount of peer:ibp_conns can be released by connection
+ * close from either a different thread, or the calling of
+ * kiblnd_check_sends_locked() below. See bz21911 for details.
*/
kiblnd_conn_addref(conn);
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
@@ -2202,10 +2197,9 @@ kiblnd_connreq_done(struct kib_conn *conn, int status)
kiblnd_queue_tx_locked(tx, conn);
}
+ kiblnd_check_sends_locked(conn);
spin_unlock(&conn->ibc_lock);
- kiblnd_check_sends(conn);
-
/* schedule blocked rxs */
kiblnd_handle_early_rxs(conn);
@@ -2240,6 +2234,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
struct kib_rej rej;
int version = IBLND_MSG_VERSION;
unsigned long flags;
+ int max_frags;
int rc;
struct sockaddr_in *peer_addr;
@@ -2346,22 +2341,20 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
goto failed;
}
- if (reqmsg->ibm_u.connparams.ibcp_max_frags >
- kiblnd_rdma_frags(version, ni)) {
- CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n",
- libcfs_nid2str(nid), version,
- reqmsg->ibm_u.connparams.ibcp_max_frags,
+ max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
+ if (max_frags > kiblnd_rdma_frags(version, ni)) {
+ CWARN("Can't accept conn from %s (version %x): max message size %d is too large (%d wanted)\n",
+ libcfs_nid2str(nid), version, max_frags,
kiblnd_rdma_frags(version, ni));
if (version >= IBLND_MSG_VERSION)
rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
goto failed;
- } else if (reqmsg->ibm_u.connparams.ibcp_max_frags <
- kiblnd_rdma_frags(version, ni) && !net->ibn_fmr_ps) {
- CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n",
- libcfs_nid2str(nid), version,
- reqmsg->ibm_u.connparams.ibcp_max_frags,
+ } else if (max_frags < kiblnd_rdma_frags(version, ni) &&
+ !net->ibn_fmr_ps) {
+ CWARN("Can't accept conn from %s (version %x): max message size %d incompatible without FMR pool (%d wanted)\n",
+ libcfs_nid2str(nid), version, max_frags,
kiblnd_rdma_frags(version, ni));
if (version == IBLND_MSG_VERSION)
@@ -2387,7 +2380,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
}
/* We have validated the peer's parameters so use those */
- peer->ibp_max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags;
+ peer->ibp_max_frags = max_frags;
peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth;
write_lock_irqsave(g_lock, flags);
@@ -2419,23 +2412,37 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
goto failed;
}
- /* tie-break connection race in favour of the higher NID */
+ /*
+ * Tie-break connection race in favour of the higher NID.
+ * If we keep running into a race condition multiple times,
+ * we have to assume that the connection attempt with the
+ * higher NID is stuck in a connecting state and will never
+ * recover. As such, we pass through this if-block and let
+ * the lower NID connection win so we can move forward.
+ */
if (peer2->ibp_connecting &&
- nid < ni->ni_nid) {
+ nid < ni->ni_nid && peer2->ibp_races <
+ MAX_CONN_RACES_BEFORE_ABORT) {
+ peer2->ibp_races++;
write_unlock_irqrestore(g_lock, flags);
- CWARN("Conn race %s\n", libcfs_nid2str(peer2->ibp_nid));
+ CDEBUG(D_NET, "Conn race %s\n",
+ libcfs_nid2str(peer2->ibp_nid));
kiblnd_peer_decref(peer);
rej.ibr_why = IBLND_REJECT_CONN_RACE;
goto failed;
}
-
+ if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT)
+ CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n",
+ libcfs_nid2str(peer2->ibp_nid),
+ MAX_CONN_RACES_BEFORE_ABORT);
/**
* passive connection is allowed even this peer is waiting for
* reconnection.
*/
peer2->ibp_reconnecting = 0;
+ peer2->ibp_races = 0;
peer2->ibp_accepting++;
kiblnd_peer_addref(peer2);
@@ -2494,7 +2501,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
sizeof(ackmsg->ibm_u.connparams));
ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
- ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
+ ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
@@ -2526,9 +2533,9 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
failed:
if (ni) {
- lnet_ni_decref(ni);
rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
+ lnet_ni_decref(ni);
}
rej.ibr_version = version;
@@ -2556,7 +2563,7 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version,
if (cp) {
msg_size = cp->ibcp_max_msg_size;
- frag_num = cp->ibcp_max_frags;
+ frag_num = cp->ibcp_max_frags << IBLND_FRAG_SHIFT;
queue_dep = cp->ibcp_queue_depth;
}
@@ -2821,11 +2828,11 @@ kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
goto failed;
}
- if (msg->ibm_u.connparams.ibcp_max_frags >
+ if ((msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT) >
conn->ibc_max_frags) {
CERROR("%s has incompatible max_frags %d (<=%d wanted)\n",
libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_max_frags,
+ msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT,
conn->ibc_max_frags);
rc = -EPROTO;
goto failed;
@@ -2859,7 +2866,7 @@ kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth;
conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth;
conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth;
- conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags;
+ conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn));
@@ -2916,7 +2923,7 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
memset(msg, 0, sizeof(*msg));
kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
- msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
+ msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
kiblnd_pack_msg(peer->ibp_ni, msg, version,
@@ -3233,7 +3240,11 @@ kiblnd_check_conns(int idx)
*/
list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) {
list_del(&conn->ibc_connd_list);
- kiblnd_check_sends(conn);
+
+ spin_lock(&conn->ibc_lock);
+ kiblnd_check_sends_locked(conn);
+ spin_unlock(&conn->ibc_lock);
+
kiblnd_conn_decref(conn);
}
}
@@ -3419,6 +3430,12 @@ kiblnd_qp_event(struct ib_event *event, void *arg)
case IB_EVENT_COMM_EST:
CDEBUG(D_NET, "%s established\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ /*
+ * We received a packet but connection isn't established
+ * probably handshake packet was lost, so free to
+ * force make connection established
+ */
+ rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST);
return;
default:
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
index 07ec540946cd..cbc9a9c5385f 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -1468,11 +1468,6 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
conn->ksnc_route = NULL;
-#if 0 /* irrelevant with only eager routes */
- /* make route least favourite */
- list_del(&route->ksnr_list);
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-#endif
ksocknal_route_decref(route); /* drop conn's ref on route */
}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
index a56632b4ee37..e6ca0cf52691 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
@@ -86,8 +86,6 @@ struct ksock_sched { /* per scheduler state */
int kss_nconns; /* # connections assigned to
* this scheduler */
struct ksock_sched_info *kss_info; /* owner of it */
- struct page *kss_rx_scratch_pgs[LNET_MAX_IOV];
- struct kvec kss_scratch_iov[LNET_MAX_IOV];
};
struct ksock_sched_info {
@@ -616,9 +614,7 @@ void ksocknal_shutdown(lnet_ni_t *ni);
int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
int ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
+ int delayed, struct iov_iter *to, unsigned int rlen);
int ksocknal_accept(lnet_ni_t *ni, struct socket *sock);
int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
@@ -635,7 +631,7 @@ int ksocknal_close_peer_conns_locked(struct ksock_peer *peer,
int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why);
int ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr);
struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer,
- struct ksock_tx *tx, int nonblk);
+ struct ksock_tx *tx, int nonblk);
int ksocknal_launch_packet(lnet_ni_t *ni, struct ksock_tx *tx,
lnet_process_id_t id);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
index 303576d815c6..c1c6f604e6ad 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -35,8 +35,8 @@ ksocknal_alloc_tx(int type, int size)
spin_lock(&ksocknal_data.ksnd_tx_lock);
if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
- tx = list_entry(ksocknal_data.ksnd_idle_noop_txs. \
- next, struct ksock_tx, tx_list);
+ tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next,
+ struct ksock_tx, tx_list);
LASSERT(tx->tx_desc_size == size);
list_del(&tx->tx_list);
}
@@ -164,13 +164,13 @@ ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
do {
LASSERT(tx->tx_nkiov > 0);
- if (nob < (int)kiov->kiov_len) {
- kiov->kiov_offset += nob;
- kiov->kiov_len -= nob;
+ if (nob < (int)kiov->bv_len) {
+ kiov->bv_offset += nob;
+ kiov->bv_len -= nob;
return rc;
}
- nob -= (int)kiov->kiov_len;
+ nob -= (int)kiov->bv_len;
tx->tx_kiov = ++kiov;
tx->tx_nkiov--;
} while (nob);
@@ -326,13 +326,13 @@ ksocknal_recv_kiov(struct ksock_conn *conn)
do {
LASSERT(conn->ksnc_rx_nkiov > 0);
- if (nob < (int)kiov->kiov_len) {
- kiov->kiov_offset += nob;
- kiov->kiov_len -= nob;
+ if (nob < (int)kiov->bv_len) {
+ kiov->bv_offset += nob;
+ kiov->bv_len -= nob;
return -EAGAIN;
}
- nob -= kiov->kiov_len;
+ nob -= kiov->bv_len;
conn->ksnc_rx_kiov = ++kiov;
conn->ksnc_rx_nkiov--;
} while (nob);
@@ -1325,39 +1325,36 @@ ksocknal_process_receive(struct ksock_conn *conn)
int
ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
- unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
+ struct iov_iter *to, unsigned int rlen)
{
struct ksock_conn *conn = private;
struct ksock_sched *sched = conn->ksnc_scheduler;
- LASSERT(mlen <= rlen);
- LASSERT(niov <= LNET_MAX_IOV);
+ LASSERT(iov_iter_count(to) <= rlen);
+ LASSERT(to->nr_segs <= LNET_MAX_IOV);
conn->ksnc_cookie = msg;
- conn->ksnc_rx_nob_wanted = mlen;
+ conn->ksnc_rx_nob_wanted = iov_iter_count(to);
conn->ksnc_rx_nob_left = rlen;
- if (!mlen || iov) {
+ if (to->type & ITER_KVEC) {
conn->ksnc_rx_nkiov = 0;
conn->ksnc_rx_kiov = NULL;
conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
conn->ksnc_rx_niov =
lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov,
- niov, iov, offset, mlen);
+ to->nr_segs, to->kvec,
+ to->iov_offset, iov_iter_count(to));
} else {
conn->ksnc_rx_niov = 0;
conn->ksnc_rx_iov = NULL;
conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
conn->ksnc_rx_nkiov =
lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov,
- niov, kiov, offset, mlen);
+ to->nr_segs, to->bvec,
+ to->iov_offset, iov_iter_count(to));
}
- LASSERT(mlen ==
- lnet_iov_nob(conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
- lnet_kiov_nob(conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
-
LASSERT(conn->ksnc_rx_scheduled);
spin_lock_bh(&sched->kss_lock);
@@ -2008,13 +2005,6 @@ ksocknal_connect(struct ksock_route *route)
list_splice_init(&peer->ksnp_tx_queue, &zombies);
}
-#if 0 /* irrelevant with only eager routes */
- if (!route->ksnr_deleted) {
- /* make this route least-favourite for re-selection */
- list_del(&route->ksnr_list);
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
- }
-#endif
write_unlock_bh(&ksocknal_data.ksnd_global_lock);
ksocknal_peer_failed(peer);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
index 6a17757fce1e..6c95e989ca12 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
@@ -73,9 +73,9 @@ ksocknal_lib_zc_capable(struct ksock_conn *conn)
int
ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
{
+ struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
struct socket *sock = conn->ksnc_sock;
- int nob;
- int rc;
+ int nob, i;
if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */
conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */
@@ -83,34 +83,16 @@ ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
!tx->tx_msg.ksm_csum) /* not checksummed */
ksocknal_lib_csum_tx(tx);
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
- {
-#if SOCKNAL_SINGLE_FRAG_TX
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
- unsigned int niov = tx->tx_niov;
-#endif
- struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
- int i;
+ for (nob = i = 0; i < tx->tx_niov; i++)
+ nob += tx->tx_iov[i].iov_len;
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = tx->tx_iov[i];
- nob += scratchiov[i].iov_len;
- }
+ if (!list_empty(&conn->ksnc_tx_queue) ||
+ nob < tx->tx_resid)
+ msg.msg_flags |= MSG_MORE;
- if (!list_empty(&conn->ksnc_tx_queue) ||
- nob < tx->tx_resid)
- msg.msg_flags |= MSG_MORE;
-
- rc = kernel_sendmsg(sock, &msg, scratchiov, niov, nob);
- }
- return rc;
+ iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
+ tx->tx_iov, tx->tx_niov, nob);
+ return sock_sendmsg(sock, &msg);
}
int
@@ -124,20 +106,16 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
/* Not NOOP message */
LASSERT(tx->tx_lnetmsg);
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
if (tx->tx_msg.ksm_zc_cookies[0]) {
/* Zero copy is enabled */
struct sock *sk = sock->sk;
- struct page *page = kiov->kiov_page;
- int offset = kiov->kiov_offset;
- int fragsize = kiov->kiov_len;
+ struct page *page = kiov->bv_page;
+ int offset = kiov->bv_offset;
+ int fragsize = kiov->bv_len;
int msgflg = MSG_DONTWAIT;
CDEBUG(D_NET, "page %p + offset %x for %d\n",
- page, offset, kiov->kiov_len);
+ page, offset, kiov->bv_len);
if (!list_empty(&conn->ksnc_tx_queue) ||
fragsize < tx->tx_resid)
@@ -150,34 +128,19 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
rc = tcp_sendpage(sk, page, offset, fragsize, msgflg);
}
} else {
-#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
- unsigned int niov = tx->tx_nkiov;
-#endif
struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
int i;
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
- kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- }
+ for (nob = i = 0; i < tx->tx_nkiov; i++)
+ nob += kiov[i].bv_len;
if (!list_empty(&conn->ksnc_tx_queue) ||
nob < tx->tx_resid)
msg.msg_flags |= MSG_MORE;
- rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob);
-
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
+ iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC,
+ kiov, tx->tx_nkiov, nob);
+ rc = sock_sendmsg(sock, &msg);
}
return rc;
}
@@ -201,14 +164,7 @@ ksocknal_lib_eager_ack(struct ksock_conn *conn)
int
ksocknal_lib_recv_iov(struct ksock_conn *conn)
{
-#if SOCKNAL_SINGLE_FRAG_RX
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- unsigned int niov = 1;
-#else
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_niov;
-#endif
struct kvec *iov = conn->ksnc_rx_iov;
struct msghdr msg = {
.msg_flags = 0
@@ -220,20 +176,15 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn)
int sum;
__u32 saved_csum;
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
LASSERT(niov > 0);
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = iov[i];
- nob += scratchiov[i].iov_len;
- }
+ for (nob = i = 0; i < niov; i++)
+ nob += iov[i].iov_len;
+
LASSERT(nob <= conn->ksnc_rx_nob_wanted);
- rc = kernel_recvmsg(conn->ksnc_sock, &msg, scratchiov, niov, nob,
- MSG_DONTWAIT);
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, niov, nob);
+ rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
saved_csum = 0;
if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
@@ -259,67 +210,10 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn)
return rc;
}
-static void
-ksocknal_lib_kiov_vunmap(void *addr)
-{
- if (!addr)
- return;
-
- vunmap(addr);
-}
-
-static void *
-ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
- struct kvec *iov, struct page **pages)
-{
- void *addr;
- int nob;
- int i;
-
- if (!*ksocknal_tunables.ksnd_zc_recv || !pages)
- return NULL;
-
- LASSERT(niov <= LNET_MAX_IOV);
-
- if (niov < 2 ||
- niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags)
- return NULL;
-
- for (nob = i = 0; i < niov; i++) {
- if ((kiov[i].kiov_offset && i > 0) ||
- (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_SIZE && i < niov - 1))
- return NULL;
-
- pages[i] = kiov[i].kiov_page;
- nob += kiov[i].kiov_len;
- }
-
- addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL);
- if (!addr)
- return NULL;
-
- iov->iov_base = addr + kiov[0].kiov_offset;
- iov->iov_len = nob;
-
- return addr;
-}
-
int
ksocknal_lib_recv_kiov(struct ksock_conn *conn)
{
-#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct kvec scratch;
- struct kvec *scratchiov = &scratch;
- struct page **pages = NULL;
- unsigned int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
- struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
- struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs;
unsigned int niov = conn->ksnc_rx_nkiov;
-#endif
lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
struct msghdr msg = {
.msg_flags = 0
@@ -328,63 +222,32 @@ ksocknal_lib_recv_kiov(struct ksock_conn *conn)
int i;
int rc;
void *base;
- void *addr;
int sum;
int fragnob;
- int n;
-
- /*
- * NB we can't trust socket ops to either consume our iovs
- * or leave them alone.
- */
- addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages);
- if (addr) {
- nob = scratchiov[0].iov_len;
- n = 1;
- } else {
- for (nob = i = 0; i < niov; i++) {
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
- kiov[i].kiov_offset;
- }
- n = niov;
- }
+ for (nob = i = 0; i < niov; i++)
+ nob += kiov[i].bv_len;
LASSERT(nob <= conn->ksnc_rx_nob_wanted);
- rc = kernel_recvmsg(conn->ksnc_sock, &msg, (struct kvec *)scratchiov,
- n, nob, MSG_DONTWAIT);
+ iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, kiov, niov, nob);
+ rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
if (conn->ksnc_msg.ksm_csum) {
for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
LASSERT(i < niov);
- /*
- * Dang! have to kmap again because I have nowhere to
- * stash the mapped address. But by doing it while the
- * page is still mapped, the kernel just bumps the map
- * count and returns me the address it stashed.
- */
- base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
- fragnob = kiov[i].kiov_len;
+ base = kmap(kiov[i].bv_page) + kiov[i].bv_offset;
+ fragnob = kiov[i].bv_len;
if (fragnob > sum)
fragnob = sum;
conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
base, fragnob);
- kunmap(kiov[i].kiov_page);
+ kunmap(kiov[i].bv_page);
}
}
-
- if (addr) {
- ksocknal_lib_kiov_vunmap(addr);
- } else {
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
- }
-
return rc;
}
@@ -406,12 +269,12 @@ ksocknal_lib_csum_tx(struct ksock_tx *tx)
if (tx->tx_kiov) {
for (i = 0; i < tx->tx_nkiov; i++) {
- base = kmap(tx->tx_kiov[i].kiov_page) +
- tx->tx_kiov[i].kiov_offset;
+ base = kmap(tx->tx_kiov[i].bv_page) +
+ tx->tx_kiov[i].bv_offset;
- csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
+ csum = ksocknal_csum(csum, base, tx->tx_kiov[i].bv_len);
- kunmap(tx->tx_kiov[i].kiov_page);
+ kunmap(tx->tx_kiov[i].bv_page);
}
} else {
for (i = 1; i < tx->tx_niov; i++)
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
index 42b15a769183..23b36b890964 100644
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c
@@ -328,15 +328,20 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
*/
void libcfs_debug_dumplog_internal(void *arg)
{
+ static time64_t last_dump_time;
+ time64_t current_time;
void *journal_info;
journal_info = current->journal_info;
current->journal_info = NULL;
+ current_time = ktime_get_real_seconds();
- if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) {
+ if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) &&
+ current_time > last_dump_time) {
+ last_dump_time = current_time;
snprintf(debug_file_name, sizeof(debug_file_name) - 1,
"%s.%lld.%ld", libcfs_debug_file_path_arr,
- (s64)ktime_get_real_seconds(), (long_ptr_t)arg);
+ (s64)current_time, (long_ptr_t)arg);
pr_alert("LustreError: dumping log to %s\n", debug_file_name);
cfs_tracefile_dump_all_pages(debug_file_name);
libcfs_run_debug_log_upcall(debug_file_name);
diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c
index 9288ee08d1f7..e4b1a0a86eae 100644
--- a/drivers/staging/lustre/lnet/libcfs/fail.c
+++ b/drivers/staging/lustre/lnet/libcfs/fail.c
@@ -90,8 +90,10 @@ int __cfs_fail_check_set(__u32 id, __u32 value, int set)
}
}
- if ((set == CFS_FAIL_LOC_ORSET || set == CFS_FAIL_LOC_RESET) &&
- (value & CFS_FAIL_ONCE))
+ /* Take into account the current call for FAIL_ONCE for ORSET only,
+ * as RESET is a new fail_loc, it does not change the current call
+ */
+ if ((set == CFS_FAIL_LOC_ORSET) && (value & CFS_FAIL_ONCE))
set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
/* Lost race to set CFS_FAILED_BIT. */
if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) {
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
index fc697cdfcdaf..56a614d7713b 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
@@ -229,8 +229,6 @@ cfs_str2num_check(char *str, int nob, unsigned *num,
char *endp, cache;
int rc;
- str = cfs_trimwhite(str);
-
/**
* kstrouint can only handle strings composed
* of only numbers. We need to scan the string
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index b52518c54efe..e8b1a61420de 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -74,6 +74,17 @@ struct cfs_cpt_data {
static struct cfs_cpt_data cpt_data;
+static void
+cfs_node_to_cpumask(int node, cpumask_t *mask)
+{
+ const cpumask_t *tmp = cpumask_of_node(node);
+
+ if (tmp)
+ cpumask_copy(mask, tmp);
+ else
+ cpumask_clear(mask);
+}
+
void
cfs_cpt_table_free(struct cfs_cpt_table *cptab)
{
@@ -403,7 +414,7 @@ cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
mutex_lock(&cpt_data.cpt_mutex);
mask = cpt_data.cpt_cpumask;
- cpumask_copy(mask, cpumask_of_node(node));
+ cfs_node_to_cpumask(node, mask);
rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
@@ -427,7 +438,7 @@ cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
mutex_lock(&cpt_data.cpt_mutex);
mask = cpt_data.cpt_cpumask;
- cpumask_copy(mask, cpumask_of_node(node));
+ cfs_node_to_cpumask(node, mask);
cfs_cpt_unset_cpumask(cptab, cpt, mask);
@@ -749,7 +760,7 @@ cfs_cpt_table_create(int ncpt)
}
for_each_online_node(i) {
- cpumask_copy(mask, cpumask_of_node(i));
+ cfs_node_to_cpumask(i, mask);
while (!cpumask_empty(mask)) {
struct cfs_cpu_partition *part;
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
index 5c0116ade909..7f56d2c9dd00 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
@@ -95,8 +95,8 @@ static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg,
err = crypto_ahash_setkey(tfm, key, key_len);
else if ((*type)->cht_key != 0)
err = crypto_ahash_setkey(tfm,
- (unsigned char *)&((*type)->cht_key),
- (*type)->cht_size);
+ (unsigned char *)&((*type)->cht_key),
+ (*type)->cht_size);
if (err != 0) {
ahash_request_free(*req);
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 346db892f275..4daf828198c3 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -1286,6 +1286,25 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
sizeof(*ni->ni_lnd_tunables));
}
+ /*
+ * If given some LND tunable parameters, parse those now to
+ * override the values in the NI structure.
+ */
+ if (conf) {
+ if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
+ ni->ni_peerrtrcredits =
+ conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
+ if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
+ ni->ni_peertimeout =
+ conf->cfg_config_u.cfg_net.net_peer_timeout;
+ if (conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
+ ni->ni_peertxcredits =
+ conf->cfg_config_u.cfg_net.net_peer_tx_credits;
+ if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
+ ni->ni_maxtxcredits =
+ conf->cfg_config_u.cfg_net.net_max_tx_credits;
+ }
+
rc = lnd->lnd_startup(ni);
mutex_unlock(&the_lnet.ln_lnd_mutex);
@@ -1299,33 +1318,6 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
goto failed0;
}
- /*
- * If given some LND tunable parameters, parse those now to
- * override the values in the NI structure.
- */
- if (conf && conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) {
- ni->ni_peerrtrcredits =
- conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
- }
- if (conf && conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) {
- ni->ni_peertimeout =
- conf->cfg_config_u.cfg_net.net_peer_timeout;
- }
- /*
- * TODO
- * Note: For now, don't allow the user to change
- * peertxcredits as this number is used in the
- * IB LND to control queue depth.
- *
- * if (conf && conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
- * ni->ni_peertxcredits =
- * conf->cfg_config_u.cfg_net.net_peer_tx_credits;
- */
- if (conf && conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) {
- ni->ni_maxtxcredits =
- conf->cfg_config_u.cfg_net.net_max_tx_credits;
- }
-
LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query);
lnet_net_lock(LNET_LOCK_EX);
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
index a72afdf68bb2..9e2183ff847e 100644
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ b/drivers/staging/lustre/lnet/lnet/config.c
@@ -31,6 +31,8 @@
*/
#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
#include "../../include/linux/lnet/lib-lnet.h"
struct lnet_text_buf { /* tmp struct for parsing routes */
@@ -110,6 +112,11 @@ lnet_ni_free(struct lnet_ni *ni)
LIBCFS_FREE(ni->ni_interfaces[i],
strlen(ni->ni_interfaces[i]) + 1);
}
+
+ /* release reference to net namespace */
+ if (ni->ni_net_ns)
+ put_net(ni->ni_net_ns);
+
LIBCFS_FREE(ni, sizeof(*ni));
}
@@ -171,6 +178,13 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
/* LND will fill in the address part of the NID */
ni->ni_nid = LNET_MKNID(net, 0);
+
+ /* Store net namespace in which current ni is being created */
+ if (current->nsproxy->net_ns)
+ ni->ni_net_ns = get_net(current->nsproxy->net_ns);
+ else
+ ni->ni_net_ns = NULL;
+
ni->ni_last_alive = ktime_get_real_seconds();
list_add_tail(&ni->ni_list, nilist);
return ni;
diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c
index 1834bf7a27ef..eab53cd57296 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-md.c
@@ -134,11 +134,11 @@ lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink)
for (i = 0; i < (int)niov; i++) {
/* We take the page pointer on trust */
- if (lmd->md_iov.kiov[i].kiov_offset +
- lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE)
+ if (lmd->md_iov.kiov[i].bv_offset +
+ lmd->md_iov.kiov[i].bv_len > PAGE_SIZE)
return -EINVAL; /* invalid length */
- total_length += lmd->md_iov.kiov[i].kiov_len;
+ total_length += lmd->md_iov.kiov[i].bv_len;
}
lmd->md_length = total_length;
@@ -292,11 +292,12 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
return -ENOMEM;
rc = lnet_md_build(md, &umd, unlink);
+ if (rc)
+ goto out_free;
+
cpt = lnet_cpt_of_cookie(meh.cookie);
lnet_res_lock(cpt);
- if (rc)
- goto failed;
me = lnet_handle2me(&meh);
if (!me)
@@ -307,7 +308,7 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
rc = lnet_md_link(md, umd.eq_handle, cpt);
if (rc)
- goto failed;
+ goto out_unlock;
/*
* attach this MD to portal of ME and check if it matches any
@@ -324,10 +325,10 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
return 0;
- failed:
- lnet_md_free(md);
-
+out_unlock:
lnet_res_unlock(cpt);
+out_free:
+ lnet_md_free(md);
return rc;
}
EXPORT_SYMBOL(LNetMDAttach);
@@ -370,24 +371,25 @@ LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle)
return -ENOMEM;
rc = lnet_md_build(md, &umd, unlink);
+ if (rc)
+ goto out_free;
cpt = lnet_res_lock_current();
- if (rc)
- goto failed;
rc = lnet_md_link(md, umd.eq_handle, cpt);
if (rc)
- goto failed;
+ goto out_unlock;
lnet_md2handle(handle, md);
lnet_res_unlock(cpt);
return 0;
- failed:
+out_unlock:
+ lnet_res_unlock(cpt);
+out_free:
lnet_md_free(md);
- lnet_res_unlock(cpt);
return rc;
}
EXPORT_SYMBOL(LNetMDBind);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index e6d3b801d87d..48e6f8f2392f 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -37,6 +37,8 @@
#define DEBUG_SUBSYSTEM S_LNET
#include "../../include/linux/lnet/lib-lnet.h"
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
static int local_nid_dist_zero = 1;
module_param(local_nid_dist_zero, int, 0444);
@@ -166,25 +168,17 @@ lnet_iov_nob(unsigned int niov, struct kvec *iov)
EXPORT_SYMBOL(lnet_iov_nob);
void
-lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset,
- unsigned int nsiov, struct kvec *siov, unsigned int soffset,
- unsigned int nob)
+lnet_copy_iov2iter(struct iov_iter *to,
+ unsigned int nsiov, const struct kvec *siov,
+ unsigned int soffset, unsigned int nob)
{
/* NB diov, siov are READ-ONLY */
- unsigned int this_nob;
+ const char *s;
+ size_t left;
if (!nob)
return;
- /* skip complete frags before 'doffset' */
- LASSERT(ndiov > 0);
- while (doffset >= diov->iov_len) {
- doffset -= diov->iov_len;
- diov++;
- ndiov--;
- LASSERT(ndiov > 0);
- }
-
/* skip complete frags before 'soffset' */
LASSERT(nsiov > 0);
while (soffset >= siov->iov_len) {
@@ -194,39 +188,68 @@ lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset,
LASSERT(nsiov > 0);
}
+ s = (char *)siov->iov_base + soffset;
+ left = siov->iov_len - soffset;
do {
- LASSERT(ndiov > 0);
+ size_t n, copy = left;
LASSERT(nsiov > 0);
- this_nob = min(diov->iov_len - doffset,
- siov->iov_len - soffset);
- this_nob = min(this_nob, nob);
- memcpy((char *)diov->iov_base + doffset,
- (char *)siov->iov_base + soffset, this_nob);
- nob -= this_nob;
+ if (copy > nob)
+ copy = nob;
+ n = copy_to_iter(s, copy, to);
+ if (n != copy)
+ return;
+ nob -= n;
- if (diov->iov_len > doffset + this_nob) {
- doffset += this_nob;
- } else {
- diov++;
- ndiov--;
- doffset = 0;
- }
+ siov++;
+ s = (char *)siov->iov_base;
+ left = siov->iov_len;
+ nsiov--;
+ } while (nob > 0);
+}
+EXPORT_SYMBOL(lnet_copy_iov2iter);
- if (siov->iov_len > soffset + this_nob) {
- soffset += this_nob;
- } else {
- siov++;
- nsiov--;
- soffset = 0;
- }
+void
+lnet_copy_kiov2iter(struct iov_iter *to,
+ unsigned int nsiov, const lnet_kiov_t *siov,
+ unsigned int soffset, unsigned int nob)
+{
+ if (!nob)
+ return;
+
+ LASSERT(!in_interrupt());
+
+ LASSERT(nsiov > 0);
+ while (soffset >= siov->bv_len) {
+ soffset -= siov->bv_len;
+ siov++;
+ nsiov--;
+ LASSERT(nsiov > 0);
+ }
+
+ do {
+ size_t copy = siov->bv_len - soffset, n;
+
+ LASSERT(nsiov > 0);
+
+ if (copy > nob)
+ copy = nob;
+ n = copy_page_to_iter(siov->bv_page,
+ siov->bv_offset + soffset,
+ copy, to);
+ if (n != copy)
+ return;
+ nob -= n;
+ siov++;
+ nsiov--;
+ soffset = 0;
} while (nob > 0);
}
-EXPORT_SYMBOL(lnet_copy_iov2iov);
+EXPORT_SYMBOL(lnet_copy_kiov2iter);
int
lnet_extract_iov(int dst_niov, struct kvec *dst,
- int src_niov, struct kvec *src,
+ int src_niov, const struct kvec *src,
unsigned int offset, unsigned int len)
{
/*
@@ -280,238 +303,15 @@ lnet_kiov_nob(unsigned int niov, lnet_kiov_t *kiov)
LASSERT(!niov || kiov);
while (niov-- > 0)
- nob += (kiov++)->kiov_len;
+ nob += (kiov++)->bv_len;
return nob;
}
EXPORT_SYMBOL(lnet_kiov_nob);
-void
-lnet_copy_kiov2kiov(unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset,
- unsigned int nsiov, lnet_kiov_t *siov, unsigned int soffset,
- unsigned int nob)
-{
- /* NB diov, siov are READ-ONLY */
- unsigned int this_nob;
- char *daddr = NULL;
- char *saddr = NULL;
-
- if (!nob)
- return;
-
- LASSERT(!in_interrupt());
-
- LASSERT(ndiov > 0);
- while (doffset >= diov->kiov_len) {
- doffset -= diov->kiov_len;
- diov++;
- ndiov--;
- LASSERT(ndiov > 0);
- }
-
- LASSERT(nsiov > 0);
- while (soffset >= siov->kiov_len) {
- soffset -= siov->kiov_len;
- siov++;
- nsiov--;
- LASSERT(nsiov > 0);
- }
-
- do {
- LASSERT(ndiov > 0);
- LASSERT(nsiov > 0);
- this_nob = min(diov->kiov_len - doffset,
- siov->kiov_len - soffset);
- this_nob = min(this_nob, nob);
-
- if (!daddr)
- daddr = ((char *)kmap(diov->kiov_page)) +
- diov->kiov_offset + doffset;
- if (!saddr)
- saddr = ((char *)kmap(siov->kiov_page)) +
- siov->kiov_offset + soffset;
-
- /*
- * Vanishing risk of kmap deadlock when mapping 2 pages.
- * However in practice at least one of the kiovs will be mapped
- * kernel pages and the map/unmap will be NOOPs
- */
- memcpy(daddr, saddr, this_nob);
- nob -= this_nob;
-
- if (diov->kiov_len > doffset + this_nob) {
- daddr += this_nob;
- doffset += this_nob;
- } else {
- kunmap(diov->kiov_page);
- daddr = NULL;
- diov++;
- ndiov--;
- doffset = 0;
- }
-
- if (siov->kiov_len > soffset + this_nob) {
- saddr += this_nob;
- soffset += this_nob;
- } else {
- kunmap(siov->kiov_page);
- saddr = NULL;
- siov++;
- nsiov--;
- soffset = 0;
- }
- } while (nob > 0);
-
- if (daddr)
- kunmap(diov->kiov_page);
- if (saddr)
- kunmap(siov->kiov_page);
-}
-EXPORT_SYMBOL(lnet_copy_kiov2kiov);
-
-void
-lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov, unsigned int iovoffset,
- unsigned int nkiov, lnet_kiov_t *kiov,
- unsigned int kiovoffset, unsigned int nob)
-{
- /* NB iov, kiov are READ-ONLY */
- unsigned int this_nob;
- char *addr = NULL;
-
- if (!nob)
- return;
-
- LASSERT(!in_interrupt());
-
- LASSERT(niov > 0);
- while (iovoffset >= iov->iov_len) {
- iovoffset -= iov->iov_len;
- iov++;
- niov--;
- LASSERT(niov > 0);
- }
-
- LASSERT(nkiov > 0);
- while (kiovoffset >= kiov->kiov_len) {
- kiovoffset -= kiov->kiov_len;
- kiov++;
- nkiov--;
- LASSERT(nkiov > 0);
- }
-
- do {
- LASSERT(niov > 0);
- LASSERT(nkiov > 0);
- this_nob = min(iov->iov_len - iovoffset,
- (__kernel_size_t)kiov->kiov_len - kiovoffset);
- this_nob = min(this_nob, nob);
-
- if (!addr)
- addr = ((char *)kmap(kiov->kiov_page)) +
- kiov->kiov_offset + kiovoffset;
-
- memcpy((char *)iov->iov_base + iovoffset, addr, this_nob);
- nob -= this_nob;
-
- if (iov->iov_len > iovoffset + this_nob) {
- iovoffset += this_nob;
- } else {
- iov++;
- niov--;
- iovoffset = 0;
- }
-
- if (kiov->kiov_len > kiovoffset + this_nob) {
- addr += this_nob;
- kiovoffset += this_nob;
- } else {
- kunmap(kiov->kiov_page);
- addr = NULL;
- kiov++;
- nkiov--;
- kiovoffset = 0;
- }
-
- } while (nob > 0);
-
- if (addr)
- kunmap(kiov->kiov_page);
-}
-EXPORT_SYMBOL(lnet_copy_kiov2iov);
-
-void
-lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov,
- unsigned int kiovoffset, unsigned int niov,
- struct kvec *iov, unsigned int iovoffset,
- unsigned int nob)
-{
- /* NB kiov, iov are READ-ONLY */
- unsigned int this_nob;
- char *addr = NULL;
-
- if (!nob)
- return;
-
- LASSERT(!in_interrupt());
-
- LASSERT(nkiov > 0);
- while (kiovoffset >= kiov->kiov_len) {
- kiovoffset -= kiov->kiov_len;
- kiov++;
- nkiov--;
- LASSERT(nkiov > 0);
- }
-
- LASSERT(niov > 0);
- while (iovoffset >= iov->iov_len) {
- iovoffset -= iov->iov_len;
- iov++;
- niov--;
- LASSERT(niov > 0);
- }
-
- do {
- LASSERT(nkiov > 0);
- LASSERT(niov > 0);
- this_nob = min((__kernel_size_t)kiov->kiov_len - kiovoffset,
- iov->iov_len - iovoffset);
- this_nob = min(this_nob, nob);
-
- if (!addr)
- addr = ((char *)kmap(kiov->kiov_page)) +
- kiov->kiov_offset + kiovoffset;
-
- memcpy(addr, (char *)iov->iov_base + iovoffset, this_nob);
- nob -= this_nob;
-
- if (kiov->kiov_len > kiovoffset + this_nob) {
- addr += this_nob;
- kiovoffset += this_nob;
- } else {
- kunmap(kiov->kiov_page);
- addr = NULL;
- kiov++;
- nkiov--;
- kiovoffset = 0;
- }
-
- if (iov->iov_len > iovoffset + this_nob) {
- iovoffset += this_nob;
- } else {
- iov++;
- niov--;
- iovoffset = 0;
- }
- } while (nob > 0);
-
- if (addr)
- kunmap(kiov->kiov_page);
-}
-EXPORT_SYMBOL(lnet_copy_iov2kiov);
-
int
lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
- int src_niov, lnet_kiov_t *src,
+ int src_niov, const lnet_kiov_t *src,
unsigned int offset, unsigned int len)
{
/*
@@ -526,8 +326,8 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
return 0; /* no frags */
LASSERT(src_niov > 0);
- while (offset >= src->kiov_len) { /* skip initial frags */
- offset -= src->kiov_len;
+ while (offset >= src->bv_len) { /* skip initial frags */
+ offset -= src->bv_len;
src_niov--;
src++;
LASSERT(src_niov > 0);
@@ -538,19 +338,19 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
LASSERT(src_niov > 0);
LASSERT((int)niov <= dst_niov);
- frag_len = src->kiov_len - offset;
- dst->kiov_page = src->kiov_page;
- dst->kiov_offset = src->kiov_offset + offset;
+ frag_len = src->bv_len - offset;
+ dst->bv_page = src->bv_page;
+ dst->bv_offset = src->bv_offset + offset;
if (len <= frag_len) {
- dst->kiov_len = len;
- LASSERT(dst->kiov_offset + dst->kiov_len
+ dst->bv_len = len;
+ LASSERT(dst->bv_offset + dst->bv_len
<= PAGE_SIZE);
return niov;
}
- dst->kiov_len = frag_len;
- LASSERT(dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
+ dst->bv_len = frag_len;
+ LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE);
len -= frag_len;
dst++;
@@ -569,6 +369,7 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
unsigned int niov = 0;
struct kvec *iov = NULL;
lnet_kiov_t *kiov = NULL;
+ struct iov_iter to;
int rc;
LASSERT(!in_interrupt());
@@ -594,8 +395,14 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
}
}
- rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed,
- niov, iov, kiov, offset, mlen, rlen);
+ if (iov) {
+ iov_iter_kvec(&to, ITER_KVEC | READ, iov, niov, mlen + offset);
+ iov_iter_advance(&to, offset);
+ } else {
+ iov_iter_bvec(&to, ITER_BVEC | READ, kiov, niov, mlen + offset);
+ iov_iter_advance(&to, offset);
+ }
+ rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, &to, rlen);
if (rc < 0)
lnet_finalize(ni, msg, rc);
}
@@ -2002,6 +1809,9 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
lnet_msgtyp2str(type), rc);
lnet_msg_free(msg);
+ if (rc == -ESHUTDOWN)
+ /* We are shutting down. Don't do anything more */
+ return 0;
goto drop;
}
@@ -2512,6 +2322,15 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
}
if (LNET_NIDNET(ni->ni_nid) == dstnet) {
+ /*
+ * Check if ni was originally created in
+ * current net namespace.
+ * If not, assign order above 0xffff0000,
+ * to make this ni not a priority.
+ */
+ if (!net_eq(ni->ni_net_ns, current->nsproxy->net_ns))
+ order += 0xffff0000;
+
if (srcnidp)
*srcnidp = ni->ni_nid;
if (orderp)
diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c
index 910e106e221d..0897e588bd54 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-msg.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-msg.c
@@ -449,23 +449,7 @@ lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int status)
if (!msg)
return;
-#if 0
- CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n",
- lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target),
- msg->msg_target_is_router ? "t" : "",
- msg->msg_routing ? "X" : "",
- msg->msg_ack ? "A" : "",
- msg->msg_sending ? "S" : "",
- msg->msg_receiving ? "R" : "",
- msg->msg_delayed ? "d" : "",
- msg->msg_txcredit ? "C" : "",
- msg->msg_peertxcredit ? "c" : "",
- msg->msg_rtrcredit ? "F" : "",
- msg->msg_peerrtrcredit ? "f" : "",
- msg->msg_onactivelist ? "!" : "",
- !msg->msg_txpeer ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid),
- !msg->msg_rxpeer ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid));
-#endif
+
msg->msg_ev.status = status;
if (msg->msg_md) {
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index 891fd59401d7..4e6dd5149b4f 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -265,21 +265,17 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
unsigned long then;
struct timeval tv;
+ struct kvec iov = { .iov_base = buffer, .iov_len = nob };
+ struct msghdr msg = {NULL,};
LASSERT(nob > 0);
/*
* Caller may pass a zero timeout if she thinks the socket buffer is
* empty enough to take the whole message immediately
*/
+ iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob);
for (;;) {
- struct kvec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_flags = !timeout ? MSG_DONTWAIT : 0
- };
-
+ msg.msg_flags = !timeout ? MSG_DONTWAIT : 0;
if (timeout) {
/* Set send timeout to remaining time */
jiffies_to_timeval(jiffies_left, &tv);
@@ -296,9 +292,6 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
jiffies_left -= jiffies - then;
- if (rc == nob)
- return 0;
-
if (rc < 0)
return rc;
@@ -307,11 +300,11 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
return -ECONNABORTED;
}
+ if (!msg_data_left(&msg))
+ break;
+
if (jiffies_left <= 0)
return -EAGAIN;
-
- buffer = ((char *)buffer) + rc;
- nob -= rc;
}
return 0;
}
diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c
index 08402712a452..cb213b8f51cf 100644
--- a/drivers/staging/lustre/lnet/lnet/lo.c
+++ b/drivers/staging/lustre/lnet/lnet/lo.c
@@ -42,36 +42,23 @@ lolnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
static int
lolnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct kvec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
+ int delayed, struct iov_iter *to, unsigned int rlen)
{
lnet_msg_t *sendmsg = private;
if (lntmsg) { /* not discarding */
- if (sendmsg->msg_iov) {
- if (iov)
- lnet_copy_iov2iov(niov, iov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_iov,
- sendmsg->msg_offset, mlen);
- else
- lnet_copy_iov2kiov(niov, kiov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_iov,
- sendmsg->msg_offset, mlen);
- } else {
- if (iov)
- lnet_copy_kiov2iov(niov, iov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_kiov,
- sendmsg->msg_offset, mlen);
- else
- lnet_copy_kiov2kiov(niov, kiov, offset,
- sendmsg->msg_niov,
- sendmsg->msg_kiov,
- sendmsg->msg_offset, mlen);
- }
+ if (sendmsg->msg_iov)
+ lnet_copy_iov2iter(to,
+ sendmsg->msg_niov,
+ sendmsg->msg_iov,
+ sendmsg->msg_offset,
+ iov_iter_count(to));
+ else
+ lnet_copy_kiov2iter(to,
+ sendmsg->msg_niov,
+ sendmsg->msg_kiov,
+ sendmsg->msg_offset,
+ iov_iter_count(to));
lnet_finalize(ni, lntmsg, 0);
}
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index 063543233035..063ad55ec950 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -1307,7 +1307,7 @@ lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages)
int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
while (--npages >= 0)
- __free_page(rb->rb_kiov[npages].kiov_page);
+ __free_page(rb->rb_kiov[npages].bv_page);
LIBCFS_FREE(rb, sz);
}
@@ -1333,15 +1333,15 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
GFP_KERNEL | __GFP_ZERO, 0);
if (!page) {
while (--i >= 0)
- __free_page(rb->rb_kiov[i].kiov_page);
+ __free_page(rb->rb_kiov[i].bv_page);
LIBCFS_FREE(rb, sz);
return NULL;
}
- rb->rb_kiov[i].kiov_len = PAGE_SIZE;
- rb->rb_kiov[i].kiov_offset = 0;
- rb->rb_kiov[i].kiov_page = page;
+ rb->rb_kiov[i].bv_len = PAGE_SIZE;
+ rb->rb_kiov[i].bv_offset = 0;
+ rb->rb_kiov[i].bv_page = page;
}
return rb;
@@ -1693,7 +1693,7 @@ lnet_rtrpools_adjust(int tiny, int small, int large)
int
lnet_rtrpools_enable(void)
{
- int rc;
+ int rc = 0;
if (the_lnet.ln_routing)
return 0;
@@ -1706,9 +1706,9 @@ lnet_rtrpools_enable(void)
* if we are just configuring this for the first
* time.
*/
- return lnet_rtrpools_alloc(1);
-
- rc = lnet_rtrpools_adjust_helper(0, 0, 0);
+ rc = lnet_rtrpools_alloc(1);
+ else
+ rc = lnet_rtrpools_adjust_helper(0, 0, 0);
if (rc)
return rc;
@@ -1718,7 +1718,7 @@ lnet_rtrpools_enable(void)
the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED;
lnet_net_unlock(LNET_LOCK_EX);
- return 0;
+ return rc;
}
void
diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c
index 13d0454e7fcb..b20c5d394e3b 100644
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ b/drivers/staging/lustre/lnet/selftest/brw_test.c
@@ -226,7 +226,7 @@ brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
struct page *pg;
for (i = 0; i < bk->bk_niov; i++) {
- pg = bk->bk_iovs[i].kiov_page;
+ pg = bk->bk_iovs[i].bv_page;
brw_fill_page(pg, pattern, magic);
}
}
@@ -238,7 +238,7 @@ brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
struct page *pg;
for (i = 0; i < bk->bk_niov; i++) {
- pg = bk->bk_iovs[i].kiov_page;
+ pg = bk->bk_iovs[i].bv_page;
if (brw_check_page(pg, pattern, magic)) {
CERROR("Bulk page %p (%d/%d) is corrupted!\n",
pg, i, bk->bk_niov);
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index 1be3cad727ae..55afb53b0743 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -152,10 +152,10 @@ lstcon_rpc_put(struct lstcon_rpc *crpc)
LASSERT(list_empty(&crpc->crp_link));
for (i = 0; i < bulk->bk_niov; i++) {
- if (!bulk->bk_iovs[i].kiov_page)
+ if (!bulk->bk_iovs[i].bv_page)
continue;
- __free_page(bulk->bk_iovs[i].kiov_page);
+ __free_page(bulk->bk_iovs[i].bv_page);
}
srpc_client_rpc_decref(crpc->crp_rpc);
@@ -705,7 +705,7 @@ lstcon_next_id(int idx, int nkiov, lnet_kiov_t *kiov)
LASSERT(i < nkiov);
- pid = (lnet_process_id_packed_t *)page_address(kiov[i].kiov_page);
+ pid = (lnet_process_id_packed_t *)page_address(kiov[i].bv_page);
return &pid[idx % SFW_ID_PER_PAGE];
}
@@ -849,12 +849,11 @@ lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned feats,
min_t(int, nob, PAGE_SIZE);
nob -= len;
- bulk->bk_iovs[i].kiov_offset = 0;
- bulk->bk_iovs[i].kiov_len = len;
- bulk->bk_iovs[i].kiov_page =
- alloc_page(GFP_KERNEL);
+ bulk->bk_iovs[i].bv_offset = 0;
+ bulk->bk_iovs[i].bv_len = len;
+ bulk->bk_iovs[i].bv_page = alloc_page(GFP_KERNEL);
- if (!bulk->bk_iovs[i].kiov_page) {
+ if (!bulk->bk_iovs[i].bv_page) {
lstcon_rpc_put(*crpc);
return -ENOMEM;
}
diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c
index 4c33621f06da..a0fcbf3bcc95 100644
--- a/drivers/staging/lustre/lnet/selftest/console.c
+++ b/drivers/staging/lustre/lnet/selftest/console.c
@@ -1993,8 +1993,6 @@ static void lstcon_init_acceptor_service(void)
lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX;
}
-extern int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr);
-
static DECLARE_IOCTL_HANDLER(lstcon_ioctl_handler, lstcon_ioctl_entry);
/* initialize console */
diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h
index 78b147732615..78388a611c22 100644
--- a/drivers/staging/lustre/lnet/selftest/console.h
+++ b/drivers/staging/lustre/lnet/selftest/console.h
@@ -184,6 +184,7 @@ lstcon_id2hash(lnet_process_id_t id, struct list_head *hash)
return &hash[idx];
}
+int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr);
int lstcon_console_init(void);
int lstcon_console_fini(void);
int lstcon_session_match(lst_sid_t sid);
diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c
index c2f121f44d33..abbd6287b4bd 100644
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ b/drivers/staging/lustre/lnet/selftest/framework.c
@@ -784,8 +784,8 @@ sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc)
lnet_process_id_packed_t id;
int j;
- dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page);
- LASSERT(dests); /* my pages are within KVM always */
+ dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].bv_page);
+ LASSERT(dests); /* my pages are within KVM always */
id = dests[i % SFW_ID_PER_PAGE];
if (msg->msg_magic != SRPC_MSG_MAGIC)
sfw_unpack_id(id);
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 3b26d6eb4240..f5619d8744ef 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -91,9 +91,9 @@ srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int nob)
LASSERT(nob > 0);
LASSERT(i >= 0 && i < bk->bk_niov);
- bk->bk_iovs[i].kiov_offset = 0;
- bk->bk_iovs[i].kiov_page = pg;
- bk->bk_iovs[i].kiov_len = nob;
+ bk->bk_iovs[i].bv_offset = 0;
+ bk->bk_iovs[i].bv_page = pg;
+ bk->bk_iovs[i].bv_len = nob;
return nob;
}
@@ -106,7 +106,7 @@ srpc_free_bulk(struct srpc_bulk *bk)
LASSERT(bk);
for (i = 0; i < bk->bk_niov; i++) {
- pg = bk->bk_iovs[i].kiov_page;
+ pg = bk->bk_iovs[i].bv_page;
if (!pg)
break;