summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-10-26 16:20:53 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-26 16:20:53 +0400
commite33bae14fd8da449d735552d78e6dd33ece0458c (patch)
tree048c062c47f7bfdb0ea8ad19c5b5849a58ff3022 /net
parent7670c7010c7b367ca40c3aba02afb36149764a6e (diff)
parent14211d026dad4641d4dffd7a4c520bcaa8fd4a65 (diff)
downloadlinux-e33bae14fd8da449d735552d78e6dd33ece0458c.tar.xz
Merge branch 'for-linus' of git://github.com/ericvh/linux
* 'for-linus' of git://github.com/ericvh/linux: 9p: fix 9p.txt to advertise msize instead of maxdata net/9p: Convert net/9p protocol dumps to tracepoints fs/9p: change an int to unsigned int fs/9p: Cleanup option parsing in 9p 9p: move dereference after NULL check fs/9p: inode file operation is properly initialized init_special_inode fs/9p: Update zero-copy implementation in 9p
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c469
-rw-r--r--net/9p/protocol.c99
-rw-r--r--net/9p/protocol.h4
-rw-r--r--net/9p/trans_common.c53
-rw-r--r--net/9p/trans_common.h21
-rw-r--r--net/9p/trans_virtio.c319
6 files changed, 554 insertions, 411 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 0505a03c374c..854ca7a911c4 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -38,6 +38,9 @@
#include <net/9p/transport.h>
#include "protocol.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/9p.h>
+
/*
* Client Option Parsing (code inspired by NFS code)
* - a little lazy - parse all client options
@@ -123,21 +126,19 @@ static int parse_opts(char *opts, struct p9_client *clnt)
options = tmp_options;
while ((p = strsep(&options, ",")) != NULL) {
- int token;
+ int token, r;
if (!*p)
continue;
token = match_token(p, tokens, args);
- if (token < Opt_trans) {
- int r = match_int(&args[0], &option);
+ switch (token) {
+ case Opt_msize:
+ r = match_int(&args[0], &option);
if (r < 0) {
P9_DPRINTK(P9_DEBUG_ERROR,
- "integer field, but no integer?\n");
+ "integer field, but no integer?\n");
ret = r;
continue;
}
- }
- switch (token) {
- case Opt_msize:
clnt->msize = option;
break;
case Opt_trans:
@@ -203,11 +204,13 @@ free_and_return:
*
*/
-static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
+static struct p9_req_t *
+p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
{
unsigned long flags;
int row, col;
struct p9_req_t *req;
+ int alloc_msize = min(c->msize, max_size);
/* This looks up the original request by tag so we know which
* buffer to read the data into */
@@ -245,23 +248,10 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
return ERR_PTR(-ENOMEM);
}
init_waitqueue_head(req->wq);
- if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
- P9_TRANS_PREF_PAYLOAD_SEP) {
- int alloc_msize = min(c->msize, 4096);
- req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
- GFP_NOFS);
- req->tc->capacity = alloc_msize;
- req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
- GFP_NOFS);
- req->rc->capacity = alloc_msize;
- } else {
- req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
- GFP_NOFS);
- req->tc->capacity = c->msize;
- req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
- GFP_NOFS);
- req->rc->capacity = c->msize;
- }
+ req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
+ GFP_NOFS);
+ req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
+ GFP_NOFS);
if ((!req->tc) || (!req->rc)) {
printk(KERN_ERR "Couldn't grow tag array\n");
kfree(req->tc);
@@ -271,6 +261,8 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
req->wq = NULL;
return ERR_PTR(-ENOMEM);
}
+ req->tc->capacity = alloc_msize;
+ req->rc->capacity = alloc_msize;
req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
}
@@ -475,37 +467,22 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
int ecode;
err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+ /*
+ * dump the response from server
+ * This should be after check errors which poplulate pdu_fcall.
+ */
+ trace_9p_protocol_dump(c, req->rc);
if (err) {
P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
return err;
}
-
if (type != P9_RERROR && type != P9_RLERROR)
return 0;
if (!p9_is_proto_dotl(c)) {
char *ename;
-
- if (req->tc->pbuf_size) {
- /* Handle user buffers */
- size_t len = req->rc->size - req->rc->offset;
- if (req->tc->pubuf) {
- /* User Buffer */
- err = copy_from_user(
- &req->rc->sdata[req->rc->offset],
- req->tc->pubuf, len);
- if (err) {
- err = -EFAULT;
- goto out_err;
- }
- } else {
- /* Kernel Buffer */
- memmove(&req->rc->sdata[req->rc->offset],
- req->tc->pkbuf, len);
- }
- }
err = p9pdu_readf(req->rc, c->proto_version, "s?d",
- &ename, &ecode);
+ &ename, &ecode);
if (err)
goto out_err;
@@ -515,11 +492,10 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
if (!err || !IS_ERR_VALUE(err)) {
err = p9_errstr2errno(ename, strlen(ename));
- P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode,
- ename);
-
- kfree(ename);
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n",
+ -ecode, ename);
}
+ kfree(ename);
} else {
err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
err = -ecode;
@@ -527,7 +503,6 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
}
-
return err;
out_err:
@@ -536,6 +511,115 @@ out_err:
return err;
}
+/**
+ * p9_check_zc_errors - check 9p packet for error return and process it
+ * @c: current client instance
+ * @req: request to parse and check for error conditions
+ * @in_hdrlen: Size of response protocol buffer.
+ *
+ * returns error code if one is discovered, otherwise returns 0
+ *
+ * this will have to be more complicated if we have multiple
+ * error packet types
+ */
+
+static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
+ char *uidata, int in_hdrlen, int kern_buf)
+{
+ int err;
+ int ecode;
+ int8_t type;
+ char *ename = NULL;
+
+ err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+ /*
+ * dump the response from server
+ * This should be after parse_header which poplulate pdu_fcall.
+ */
+ trace_9p_protocol_dump(c, req->rc);
+ if (err) {
+ P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
+ return err;
+ }
+
+ if (type != P9_RERROR && type != P9_RLERROR)
+ return 0;
+
+ if (!p9_is_proto_dotl(c)) {
+ /* Error is reported in string format */
+ uint16_t len;
+ /* 7 = header size for RERROR, 2 is the size of string len; */
+ int inline_len = in_hdrlen - (7 + 2);
+
+ /* Read the size of error string */
+ err = p9pdu_readf(req->rc, c->proto_version, "w", &len);
+ if (err)
+ goto out_err;
+
+ ename = kmalloc(len + 1, GFP_NOFS);
+ if (!ename) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+ if (len <= inline_len) {
+ /* We have error in protocol buffer itself */
+ if (pdu_read(req->rc, ename, len)) {
+ err = -EFAULT;
+ goto out_free;
+
+ }
+ } else {
+ /*
+ * Part of the data is in user space buffer.
+ */
+ if (pdu_read(req->rc, ename, inline_len)) {
+ err = -EFAULT;
+ goto out_free;
+
+ }
+ if (kern_buf) {
+ memcpy(ename + inline_len, uidata,
+ len - inline_len);
+ } else {
+ err = copy_from_user(ename + inline_len,
+ uidata, len - inline_len);
+ if (err) {
+ err = -EFAULT;
+ goto out_free;
+ }
+ }
+ }
+ ename[len] = 0;
+ if (p9_is_proto_dotu(c)) {
+ /* For dotu we also have error code */
+ err = p9pdu_readf(req->rc,
+ c->proto_version, "d", &ecode);
+ if (err)
+ goto out_free;
+ err = -ecode;
+ }
+ if (!err || !IS_ERR_VALUE(err)) {
+ err = p9_errstr2errno(ename, strlen(ename));
+
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n",
+ -ecode, ename);
+ }
+ kfree(ename);
+ } else {
+ err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+ err = -ecode;
+
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+ }
+ return err;
+
+out_free:
+ kfree(ename);
+out_err:
+ P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
+ return err;
+}
+
static struct p9_req_t *
p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
@@ -579,23 +663,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
return 0;
}
-/**
- * p9_client_rpc - issue a request and wait for a response
- * @c: client session
- * @type: type of request
- * @fmt: protocol format string (see protocol.c)
- *
- * Returns request structure (which client must free using p9_free_req)
- */
-
-static struct p9_req_t *
-p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
+static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
+ int8_t type, int req_size,
+ const char *fmt, va_list ap)
{
- va_list ap;
int tag, err;
struct p9_req_t *req;
- unsigned long flags;
- int sigpending;
P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type);
@@ -607,12 +680,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
if ((c->status == BeginDisconnect) && (type != P9_TCLUNK))
return ERR_PTR(-EIO);
- if (signal_pending(current)) {
- sigpending = 1;
- clear_thread_flag(TIF_SIGPENDING);
- } else
- sigpending = 0;
-
tag = P9_NOTAG;
if (type != P9_TVERSION) {
tag = p9_idpool_get(c->tagpool);
@@ -620,18 +687,51 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
return ERR_PTR(-ENOMEM);
}
- req = p9_tag_alloc(c, tag);
+ req = p9_tag_alloc(c, tag, req_size);
if (IS_ERR(req))
return req;
/* marshall the data */
p9pdu_prepare(req->tc, tag, type);
- va_start(ap, fmt);
err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
- va_end(ap);
if (err)
goto reterr;
- p9pdu_finalize(req->tc);
+ p9pdu_finalize(c, req->tc);
+ trace_9p_client_req(c, type, tag);
+ return req;
+reterr:
+ p9_free_req(c, req);
+ return ERR_PTR(err);
+}
+
+/**
+ * p9_client_rpc - issue a request and wait for a response
+ * @c: client session
+ * @type: type of request
+ * @fmt: protocol format string (see protocol.c)
+ *
+ * Returns request structure (which client must free using p9_free_req)
+ */
+
+static struct p9_req_t *
+p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
+{
+ va_list ap;
+ int sigpending, err;
+ unsigned long flags;
+ struct p9_req_t *req;
+
+ va_start(ap, fmt);
+ req = p9_client_prepare_req(c, type, c->msize, fmt, ap);
+ va_end(ap);
+ if (IS_ERR(req))
+ return req;
+
+ if (signal_pending(current)) {
+ sigpending = 1;
+ clear_thread_flag(TIF_SIGPENDING);
+ } else
+ sigpending = 0;
err = c->trans_mod->request(c, req);
if (err < 0) {
@@ -639,18 +739,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
c->status = Disconnected;
goto reterr;
}
-
- P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag);
+ /* Wait for the response */
err = wait_event_interruptible(*req->wq,
- req->status >= REQ_STATUS_RCVD);
- P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n",
- req->wq, tag, err);
+ req->status >= REQ_STATUS_RCVD);
if (req->status == REQ_STATUS_ERROR) {
P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
err = req->t_err;
}
-
if ((err == -ERESTARTSYS) && (c->status == Connected)) {
P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
sigpending = 1;
@@ -663,25 +759,102 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
if (req->status == REQ_STATUS_RCVD)
err = 0;
}
-
if (sigpending) {
spin_lock_irqsave(&current->sighand->siglock, flags);
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, flags);
}
-
if (err < 0)
goto reterr;
err = p9_check_errors(c, req);
- if (!err) {
- P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type);
+ trace_9p_client_res(c, type, req->rc->tag, err);
+ if (!err)
+ return req;
+reterr:
+ p9_free_req(c, req);
+ return ERR_PTR(err);
+}
+
+/**
+ * p9_client_zc_rpc - issue a request and wait for a response
+ * @c: client session
+ * @type: type of request
+ * @uidata: user bffer that should be ued for zero copy read
+ * @uodata: user buffer that shoud be user for zero copy write
+ * @inlen: read buffer size
+ * @olen: write buffer size
+ * @hdrlen: reader header size, This is the size of response protocol data
+ * @fmt: protocol format string (see protocol.c)
+ *
+ * Returns request structure (which client must free using p9_free_req)
+ */
+static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
+ char *uidata, char *uodata,
+ int inlen, int olen, int in_hdrlen,
+ int kern_buf, const char *fmt, ...)
+{
+ va_list ap;
+ int sigpending, err;
+ unsigned long flags;
+ struct p9_req_t *req;
+
+ va_start(ap, fmt);
+ /*
+ * We allocate a inline protocol data of only 4k bytes.
+ * The actual content is passed in zero-copy fashion.
+ */
+ req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap);
+ va_end(ap);
+ if (IS_ERR(req))
return req;
+
+ if (signal_pending(current)) {
+ sigpending = 1;
+ clear_thread_flag(TIF_SIGPENDING);
+ } else
+ sigpending = 0;
+
+ /* If we are called with KERNEL_DS force kern_buf */
+ if (segment_eq(get_fs(), KERNEL_DS))
+ kern_buf = 1;
+
+ err = c->trans_mod->zc_request(c, req, uidata, uodata,
+ inlen, olen, in_hdrlen, kern_buf);
+ if (err < 0) {
+ if (err == -EIO)
+ c->status = Disconnected;
+ goto reterr;
+ }
+ if (req->status == REQ_STATUS_ERROR) {
+ P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
+ err = req->t_err;
+ }
+ if ((err == -ERESTARTSYS) && (c->status == Connected)) {
+ P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
+ sigpending = 1;
+ clear_thread_flag(TIF_SIGPENDING);
+
+ if (c->trans_mod->cancel(c, req))
+ p9_client_flush(c, req);
+
+ /* if we received the response anyway, don't signal error */
+ if (req->status == REQ_STATUS_RCVD)
+ err = 0;
+ }
+ if (sigpending) {
+ spin_lock_irqsave(&current->sighand->siglock, flags);
+ recalc_sigpending();
+ spin_unlock_irqrestore(&current->sighand->siglock, flags);
}
+ if (err < 0)
+ goto reterr;
+ err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf);
+ trace_9p_client_res(c, type, req->rc->tag, err);
+ if (!err)
+ return req;
reterr:
- P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type,
- err);
p9_free_req(c, req);
return ERR_PTR(err);
}
@@ -769,7 +942,7 @@ static int p9_client_version(struct p9_client *c)
err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
if (err) {
P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err);
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(c, req->rc);
goto error;
}
@@ -906,15 +1079,14 @@ EXPORT_SYMBOL(p9_client_begin_disconnect);
struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
char *uname, u32 n_uname, char *aname)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_fid *fid;
struct p9_qid qid;
- P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n",
- afid ? afid->fid : -1, uname, aname);
- err = 0;
+ P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n",
+ afid ? afid->fid : -1, uname, aname);
fid = p9_fid_create(clnt);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
@@ -931,7 +1103,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -991,7 +1163,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
p9_free_req(clnt, req);
goto clunk_fid;
}
@@ -1058,7 +1230,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto free_and_error;
}
@@ -1101,7 +1273,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto free_and_error;
}
@@ -1146,7 +1318,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto free_and_error;
}
@@ -1185,7 +1357,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid,
err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto free_and_error;
}
@@ -1330,13 +1502,15 @@ int
p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
u32 count)
{
- int err, rsize;
- struct p9_client *clnt;
- struct p9_req_t *req;
char *dataptr;
+ int kernel_buf = 0;
+ struct p9_req_t *req;
+ struct p9_client *clnt;
+ int err, rsize, non_zc = 0;
+
- P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid,
- (long long unsigned) offset, count);
+ P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n",
+ fid->fid, (long long unsigned) offset, count);
err = 0;
clnt = fid->clnt;
@@ -1348,13 +1522,24 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
rsize = count;
/* Don't bother zerocopy for small IO (< 1024) */
- if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
- P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
- req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset,
- rsize, data, udata);
+ if (clnt->trans_mod->zc_request && rsize > 1024) {
+ char *indata;
+ if (data) {
+ kernel_buf = 1;
+ indata = data;
+ } else
+ indata = (char *)udata;
+ /*
+ * response header len is 11
+ * PDU Header(7) + IO Size (4)
+ */
+ req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0,
+ 11, kernel_buf, "dqd", fid->fid,
+ offset, rsize);
} else {
+ non_zc = 1;
req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
- rsize);
+ rsize);
}
if (IS_ERR(req)) {
err = PTR_ERR(req);
@@ -1363,14 +1548,13 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto free_and_error;
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
- P9_DUMP_PKT(1, req->rc);
- if (!req->tc->pbuf_size) {
+ if (non_zc) {
if (data) {
memmove(data, dataptr, count);
} else {
@@ -1396,6 +1580,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
u64 offset, u32 count)
{
int err, rsize;
+ int kernel_buf = 0;
struct p9_client *clnt;
struct p9_req_t *req;
@@ -1411,19 +1596,24 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
if (count < rsize)
rsize = count;
- /* Don't bother zerocopy form small IO (< 1024) */
- if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
- P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
- req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset,
- rsize, data, udata);
+ /* Don't bother zerocopy for small IO (< 1024) */
+ if (clnt->trans_mod->zc_request && rsize > 1024) {
+ char *odata;
+ if (data) {
+ kernel_buf = 1;
+ odata = data;
+ } else
+ odata = (char *)udata;
+ req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize,
+ P9_ZC_HDR_SZ, kernel_buf, "dqd",
+ fid->fid, offset, rsize);
} else {
-
if (data)
req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid,
- offset, rsize, data);
+ offset, rsize, data);
else
req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid,
- offset, rsize, udata);
+ offset, rsize, udata);
}
if (IS_ERR(req)) {
err = PTR_ERR(req);
@@ -1432,7 +1622,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto free_and_error;
}
@@ -1472,7 +1662,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -1523,7 +1713,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -1671,7 +1861,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
&sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
&sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -1778,7 +1968,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
}
err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
p9_free_req(clnt, req);
goto clunk_fid;
}
@@ -1824,7 +2014,7 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate);
int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
{
- int err, rsize;
+ int err, rsize, non_zc = 0;
struct p9_client *clnt;
struct p9_req_t *req;
char *dataptr;
@@ -1842,13 +2032,18 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
if (count < rsize)
rsize = count;
- if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
- P9_TRANS_PREF_PAYLOAD_SEP) {
- req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid,
- offset, rsize, data);
+ /* Don't bother zerocopy for small IO (< 1024) */
+ if (clnt->trans_mod->zc_request && rsize > 1024) {
+ /*
+ * response header len is 11
+ * PDU Header(7) + IO Size (4)
+ */
+ req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0,
+ 11, 1, "dqd", fid->fid, offset, rsize);
} else {
+ non_zc = 1;
req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
- offset, rsize);
+ offset, rsize);
}
if (IS_ERR(req)) {
err = PTR_ERR(req);
@@ -1857,13 +2052,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto free_and_error;
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
- if (!req->tc->pbuf_size && data)
+ if (non_zc)
memmove(data, dataptr, count);
p9_free_req(clnt, req);
@@ -1894,7 +2089,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto error;
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
@@ -1925,7 +2120,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto error;
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
@@ -1960,7 +2155,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto error;
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
@@ -1993,7 +2188,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
&glock->start, &glock->length, &glock->proc_id,
&glock->client_id);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto error;
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
@@ -2021,7 +2216,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
if (err) {
- P9_DUMP_PKT(1, req->rc);
+ trace_9p_protocol_dump(clnt, req->rc);
goto error;
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index df58375ea6b3..55e10a96c902 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -37,40 +37,11 @@
#include <net/9p/client.h>
#include "protocol.h"
+#include <trace/events/9p.h>
+
static int
p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
-#ifdef CONFIG_NET_9P_DEBUG
-void
-p9pdu_dump(int way, struct p9_fcall *pdu)
-{
- int len = pdu->size;
-
- if ((p9_debug_level & P9_DEBUG_VPKT) != P9_DEBUG_VPKT) {
- if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) {
- if (len > 32)
- len = 32;
- } else {
- /* shouldn't happen */
- return;
- }
- }
-
- if (way)
- print_hex_dump_bytes("[9P] ", DUMP_PREFIX_OFFSET, pdu->sdata,
- len);
- else
- print_hex_dump_bytes("]9P[ ", DUMP_PREFIX_OFFSET, pdu->sdata,
- len);
-}
-#else
-void
-p9pdu_dump(int way, struct p9_fcall *pdu)
-{
-}
-#endif
-EXPORT_SYMBOL(p9pdu_dump);
-
void p9stat_free(struct p9_wstat *stbuf)
{
kfree(stbuf->name);
@@ -81,7 +52,7 @@ void p9stat_free(struct p9_wstat *stbuf)
}
EXPORT_SYMBOL(p9stat_free);
-static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
+size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
{
size_t len = min(pdu->size - pdu->offset, size);
memcpy(data, &pdu->sdata[pdu->offset], len);
@@ -108,26 +79,6 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
return size - len;
}
-static size_t
-pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata,
- size_t size)
-{
- BUG_ON(pdu->size > P9_IOHDRSZ);
- pdu->pubuf = (char __user *)udata;
- pdu->pkbuf = (char *)kdata;
- pdu->pbuf_size = size;
- return 0;
-}
-
-static size_t
-pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size)
-{
- BUG_ON(pdu->size > P9_READDIRHDRSZ);
- pdu->pkbuf = (char *)kdata;
- pdu->pbuf_size = size;
- return 0;
-}
-
/*
b - int8_t
w - int16_t
@@ -459,26 +410,6 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
errcode = -EFAULT;
}
break;
- case 'E':{
- int32_t cnt = va_arg(ap, int32_t);
- const char *k = va_arg(ap, const void *);
- const char __user *u = va_arg(ap,
- const void __user *);
- errcode = p9pdu_writef(pdu, proto_version, "d",
- cnt);
- if (!errcode && pdu_write_urw(pdu, k, u, cnt))
- errcode = -EFAULT;
- }
- break;
- case 'F':{
- int32_t cnt = va_arg(ap, int32_t);
- const char *k = va_arg(ap, const void *);
- errcode = p9pdu_writef(pdu, proto_version, "d",
- cnt);
- if (!errcode && pdu_write_readdir(pdu, k, cnt))
- errcode = -EFAULT;
- }
- break;
case 'U':{
int32_t count = va_arg(ap, int32_t);
const char __user *udata =
@@ -591,7 +522,7 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
return ret;
}
-int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version)
+int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st)
{
struct p9_fcall fake_pdu;
int ret;
@@ -601,10 +532,10 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version)
fake_pdu.sdata = buf;
fake_pdu.offset = 0;
- ret = p9pdu_readf(&fake_pdu, proto_version, "S", st);
+ ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st);
if (ret) {
P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
- P9_DUMP_PKT(0, &fake_pdu);
+ trace_9p_protocol_dump(clnt, &fake_pdu);
}
return ret;
@@ -617,7 +548,7 @@ int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
}
-int p9pdu_finalize(struct p9_fcall *pdu)
+int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu)
{
int size = pdu->size;
int err;
@@ -626,7 +557,7 @@ int p9pdu_finalize(struct p9_fcall *pdu)
err = p9pdu_writef(pdu, 0, "d", size);
pdu->size = size;
- P9_DUMP_PKT(0, pdu);
+ trace_9p_protocol_dump(clnt, pdu);
P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size,
pdu->id, pdu->tag);
@@ -637,14 +568,10 @@ void p9pdu_reset(struct p9_fcall *pdu)
{
pdu->offset = 0;
pdu->size = 0;
- pdu->private = NULL;
- pdu->pubuf = NULL;
- pdu->pkbuf = NULL;
- pdu->pbuf_size = 0;
}
-int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
- int proto_version)
+int p9dirent_read(struct p9_client *clnt, char *buf, int len,
+ struct p9_dirent *dirent)
{
struct p9_fcall fake_pdu;
int ret;
@@ -655,11 +582,11 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
fake_pdu.sdata = buf;
fake_pdu.offset = 0;
- ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid,
- &dirent->d_off, &dirent->d_type, &nameptr);
+ ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid,
+ &dirent->d_off, &dirent->d_type, &nameptr);
if (ret) {
P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret);
- P9_DUMP_PKT(1, &fake_pdu);
+ trace_9p_protocol_dump(clnt, &fake_pdu);
goto out;
}
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index 2431c0f38d56..2cc525fa49fa 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h
@@ -29,6 +29,6 @@ int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
va_list ap);
int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
-int p9pdu_finalize(struct p9_fcall *pdu);
-void p9pdu_dump(int, struct p9_fcall *);
+int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu);
void p9pdu_reset(struct p9_fcall *pdu);
+size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size);
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index 9a70ebdec56e..de8df957867d 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -21,30 +21,25 @@
/**
* p9_release_req_pages - Release pages after the transaction.
- * @*private: PDU's private page of struct trans_rpage_info
*/
-void
-p9_release_req_pages(struct trans_rpage_info *rpinfo)
+void p9_release_pages(struct page **pages, int nr_pages)
{
int i = 0;
-
- while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) {
- put_page(rpinfo->rp_data[i]);
+ while (pages[i] && nr_pages--) {
+ put_page(pages[i]);
i++;
}
}
-EXPORT_SYMBOL(p9_release_req_pages);
+EXPORT_SYMBOL(p9_release_pages);
/**
* p9_nr_pages - Return number of pages needed to accommodate the payload.
*/
-int
-p9_nr_pages(struct p9_req_t *req)
+int p9_nr_pages(char *data, int len)
{
unsigned long start_page, end_page;
- start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT;
- end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size +
- PAGE_SIZE - 1) >> PAGE_SHIFT;
+ start_page = (unsigned long)data >> PAGE_SHIFT;
+ end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
return end_page - start_page;
}
EXPORT_SYMBOL(p9_nr_pages);
@@ -58,35 +53,17 @@ EXPORT_SYMBOL(p9_nr_pages);
* @nr_pages: number of pages to accommodate the payload
* @rw: Indicates if the pages are for read or write.
*/
-int
-p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
- int nr_pages, u8 rw)
-{
- uint32_t first_page_bytes = 0;
- int32_t pdata_mapped_pages;
- struct trans_rpage_info *rpinfo;
-
- *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1);
- if (*pdata_off)
- first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off),
- req->tc->pbuf_size);
+int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write)
+{
+ int nr_mapped_pages;
- rpinfo = req->tc->private;
- pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
- nr_pages, rw, &rpinfo->rp_data[0]);
- if (pdata_mapped_pages <= 0)
- return pdata_mapped_pages;
+ nr_mapped_pages = get_user_pages_fast((unsigned long)data,
+ *nr_pages, write, pages);
+ if (nr_mapped_pages <= 0)
+ return nr_mapped_pages;
- rpinfo->rp_nr_pages = pdata_mapped_pages;
- if (*pdata_off) {
- *pdata_len = first_page_bytes;
- *pdata_len += min((req->tc->pbuf_size - *pdata_len),
- ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT);
- } else {
- *pdata_len = min(req->tc->pbuf_size,
- (size_t)pdata_mapped_pages << PAGE_SHIFT);
- }
+ *nr_pages = nr_mapped_pages;
return 0;
}
EXPORT_SYMBOL(p9_payload_gup);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
index 76309223bb02..173bb550a9eb 100644
--- a/net/9p/trans_common.h
+++ b/net/9p/trans_common.h
@@ -12,21 +12,6 @@
*
*/
-/* TRUE if it is user context */
-#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS))
-
-/**
- * struct trans_rpage_info - To store mapped page information in PDU.
- * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu.
- * @rp_nr_pages: Number of mapped pages
- * @rp_data: Array of page pointers
- */
-struct trans_rpage_info {
- u8 rp_alloc;
- int rp_nr_pages;
- struct page *rp_data[0];
-};
-
-void p9_release_req_pages(struct trans_rpage_info *);
-int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8);
-int p9_nr_pages(struct p9_req_t *);
+void p9_release_pages(struct page **, int);
+int p9_payload_gup(char *, int *, struct page **, int);
+int p9_nr_pages(char *, int);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e317583fcc73..32aa9834229c 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -150,12 +150,10 @@ static void req_done(struct virtqueue *vq)
while (1) {
spin_lock_irqsave(&chan->lock, flags);
rc = virtqueue_get_buf(chan->vq, &len);
-
if (rc == NULL) {
spin_unlock_irqrestore(&chan->lock, flags);
break;
}
-
chan->ring_bufs_avail = 1;
spin_unlock_irqrestore(&chan->lock, flags);
/* Wakeup if anyone waiting for VirtIO ring space. */
@@ -163,17 +161,6 @@ static void req_done(struct virtqueue *vq)
P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
req = p9_tag_lookup(chan->client, rc->tag);
- if (req->tc->private) {
- struct trans_rpage_info *rp = req->tc->private;
- int p = rp->rp_nr_pages;
- /*Release pages */
- p9_release_req_pages(rp);
- atomic_sub(p, &vp_pinned);
- wake_up(&vp_wq);
- if (rp->rp_alloc)
- kfree(rp);
- req->tc->private = NULL;
- }
req->status = REQ_STATUS_RCVD;
p9_client_cb(chan->client, req);
}
@@ -193,9 +180,8 @@ static void req_done(struct virtqueue *vq)
*
*/
-static int
-pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
- int count)
+static int pack_sg_list(struct scatterlist *sg, int start,
+ int limit, char *data, int count)
{
int s;
int index = start;
@@ -224,31 +210,36 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
* this takes a list of pages.
* @sg: scatter/gather list to pack into
* @start: which segment of the sg_list to start at
- * @pdata_off: Offset into the first page
* @**pdata: a list of pages to add into sg.
+ * @nr_pages: number of pages to pack into the scatter/gather list
+ * @data: data to pack into scatter/gather list
* @count: amount of data to pack into the scatter/gather list
*/
static int
-pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
- struct page **pdata, int count)
+pack_sg_list_p(struct scatterlist *sg, int start, int limit,
+ struct page **pdata, int nr_pages, char *data, int count)
{
- int s;
- int i = 0;
+ int i = 0, s;
+ int data_off;
int index = start;
- if (pdata_off) {
- s = min((int)(PAGE_SIZE - pdata_off), count);
- sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
- count -= s;
- }
-
- while (count) {
- BUG_ON(index > limit);
- s = min((int)PAGE_SIZE, count);
- sg_set_page(&sg[index++], pdata[i++], s, 0);
+ BUG_ON(nr_pages > (limit - start));
+ /*
+ * if the first page doesn't start at
+ * page boundary find the offset
+ */
+ data_off = offset_in_page(data);
+ while (nr_pages) {
+ s = rest_of_page(data);
+ if (s > count)
+ s = count;
+ sg_set_page(&sg[index++], pdata[i++], s, data_off);
+ data_off = 0;
+ data += s;
count -= s;
+ nr_pages--;
}
- return index-start;
+ return index - start;
}
/**
@@ -261,114 +252,166 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
static int
p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
{
- int in, out, inp, outp;
- struct virtio_chan *chan = client->trans;
+ int err;
+ int in, out;
unsigned long flags;
- size_t pdata_off = 0;
- struct trans_rpage_info *rpinfo = NULL;
- int err, pdata_len = 0;
+ struct virtio_chan *chan = client->trans;
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
req->status = REQ_STATUS_SENT;
+req_retry:
+ spin_lock_irqsave(&chan->lock, flags);
+
+ /* Handle out VirtIO ring buffers */
+ out = pack_sg_list(chan->sg, 0,
+ VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
- if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
- int nr_pages = p9_nr_pages(req);
- int rpinfo_size = sizeof(struct trans_rpage_info) +
- sizeof(struct page *) * nr_pages;
+ in = pack_sg_list(chan->sg, out,
+ VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
- if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
- err = wait_event_interruptible(vp_wq,
- atomic_read(&vp_pinned) < chan->p9_max_pages);
+ err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
+ if (err < 0) {
+ if (err == -ENOSPC) {
+ chan->ring_bufs_avail = 0;
+ spin_unlock_irqrestore(&chan->lock, flags);
+ err = wait_event_interruptible(*chan->vc_wq,
+ chan->ring_bufs_avail);
if (err == -ERESTARTSYS)
return err;
- P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
- }
- if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
- /* We can use sdata */
- req->tc->private = req->tc->sdata + req->tc->size;
- rpinfo = (struct trans_rpage_info *)req->tc->private;
- rpinfo->rp_alloc = 0;
+ P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
+ goto req_retry;
} else {
- req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
- if (!req->tc->private) {
- P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
- "private kmalloc returned NULL");
- return -ENOMEM;
- }
- rpinfo = (struct trans_rpage_info *)req->tc->private;
- rpinfo->rp_alloc = 1;
+ spin_unlock_irqrestore(&chan->lock, flags);
+ P9_DPRINTK(P9_DEBUG_TRANS,
+ "9p debug: "
+ "virtio rpc add_buf returned failure");
+ return -EIO;
}
+ }
+ virtqueue_kick(chan->vq);
+ spin_unlock_irqrestore(&chan->lock, flags);
- err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
- req->tc->id == P9_TREAD ? 1 : 0);
- if (err < 0) {
- if (rpinfo->rp_alloc)
- kfree(rpinfo);
+ P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
+ return 0;
+}
+
+static int p9_get_mapped_pages(struct virtio_chan *chan,
+ struct page **pages, char *data,
+ int nr_pages, int write, int kern_buf)
+{
+ int err;
+ if (!kern_buf) {
+ /*
+ * We allow only p9_max_pages pinned. We wait for the
+ * Other zc request to finish here
+ */
+ if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
+ err = wait_event_interruptible(vp_wq,
+ (atomic_read(&vp_pinned) < chan->p9_max_pages));
+ if (err == -ERESTARTSYS)
+ return err;
+ }
+ err = p9_payload_gup(data, &nr_pages, pages, write);
+ if (err < 0)
return err;
- } else {
- atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
+ atomic_add(nr_pages, &vp_pinned);
+ } else {
+ /* kernel buffer, no need to pin pages */
+ int s, index = 0;
+ int count = nr_pages;
+ while (nr_pages) {
+ s = rest_of_page(data);
+ pages[index++] = virt_to_page(data);
+ data += s;
+ nr_pages--;
}
+ nr_pages = count;
}
+ return nr_pages;
+}
-req_retry_pinned:
- spin_lock_irqsave(&chan->lock, flags);
+/**
+ * p9_virtio_zc_request - issue a zero copy request
+ * @client: client instance issuing the request
+ * @req: request to be issued
+ * @uidata: user bffer that should be ued for zero copy read
+ * @uodata: user buffer that shoud be user for zero copy write
+ * @inlen: read buffer size
+ * @olen: write buffer size
+ * @hdrlen: reader header size, This is the size of response protocol data
+ *
+ */
+static int
+p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
+ char *uidata, char *uodata, int inlen,
+ int outlen, int in_hdr_len, int kern_buf)
+{
+ int in, out, err;
+ unsigned long flags;
+ int in_nr_pages = 0, out_nr_pages = 0;
+ struct page **in_pages = NULL, **out_pages = NULL;
+ struct virtio_chan *chan = client->trans;
- /* Handle out VirtIO ring buffers */
- out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
- req->tc->size);
-
- if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
- /* We have additional write payload buffer to take care */
- if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
- outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
- pdata_off, rpinfo->rp_data, pdata_len);
- } else {
- char *pbuf;
- if (req->tc->pubuf)
- pbuf = (__force char *) req->tc->pubuf;
- else
- pbuf = req->tc->pkbuf;
- outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
- req->tc->pbuf_size);
+ P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
+
+ if (uodata) {
+ out_nr_pages = p9_nr_pages(uodata, outlen);
+ out_pages = kmalloc(sizeof(struct page *) * out_nr_pages,
+ GFP_NOFS);
+ if (!out_pages) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata,
+ out_nr_pages, 0, kern_buf);
+ if (out_nr_pages < 0) {
+ err = out_nr_pages;
+ kfree(out_pages);
+ out_pages = NULL;
+ goto err_out;
}
- out += outp;
}
-
- /* Handle in VirtIO ring buffers */
- if (req->tc->pbuf_size &&
- ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
- /*
- * Take care of additional Read payload.
- * 11 is the read/write header = PDU Header(7) + IO Size (4).
- * Arrange in such a way that server places header in the
- * alloced memory and payload onto the user buffer.
- */
- inp = pack_sg_list(chan->sg, out,
- VIRTQUEUE_NUM, req->rc->sdata, 11);
- /*
- * Running executables in the filesystem may result in
- * a read request with kernel buffer as opposed to user buffer.
- */
- if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
- in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
- pdata_off, rpinfo->rp_data, pdata_len);
- } else {
- char *pbuf;
- if (req->tc->pubuf)
- pbuf = (__force char *) req->tc->pubuf;
- else
- pbuf = req->tc->pkbuf;
-
- in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
- pbuf, req->tc->pbuf_size);
+ if (uidata) {
+ in_nr_pages = p9_nr_pages(uidata, inlen);
+ in_pages = kmalloc(sizeof(struct page *) * in_nr_pages,
+ GFP_NOFS);
+ if (!in_pages) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata,
+ in_nr_pages, 1, kern_buf);
+ if (in_nr_pages < 0) {
+ err = in_nr_pages;
+ kfree(in_pages);
+ in_pages = NULL;
+ goto err_out;
}
- in += inp;
- } else {
- in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM,
- req->rc->sdata, req->rc->capacity);
}
+ req->status = REQ_STATUS_SENT;
+req_retry_pinned:
+ spin_lock_irqsave(&chan->lock, flags);
+ /* out data */
+ out = pack_sg_list(chan->sg, 0,
+ VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+
+ if (out_pages)
+ out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
+ out_pages, out_nr_pages, uodata, outlen);
+ /*
+ * Take care of in data
+ * For example TREAD have 11.
+ * 11 is the read/write header = PDU Header(7) + IO Size (4).
+ * Arrange in such a way that server places header in the
+ * alloced memory and payload onto the user buffer.
+ */
+ in = pack_sg_list(chan->sg, out,
+ VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
+ if (in_pages)
+ in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
+ in_pages, in_nr_pages, uidata, inlen);
err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
if (err < 0) {
@@ -376,28 +419,45 @@ req_retry_pinned:
chan->ring_bufs_avail = 0;
spin_unlock_irqrestore(&chan->lock, flags);
err = wait_event_interruptible(*chan->vc_wq,
- chan->ring_bufs_avail);
+ chan->ring_bufs_avail);
if (err == -ERESTARTSYS)
- return err;
+ goto err_out;
P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
goto req_retry_pinned;
} else {
spin_unlock_irqrestore(&chan->lock, flags);
P9_DPRINTK(P9_DEBUG_TRANS,
- "9p debug: "
- "virtio rpc add_buf returned failure");
- if (rpinfo && rpinfo->rp_alloc)
- kfree(rpinfo);
- return -EIO;
+ "9p debug: "
+ "virtio rpc add_buf returned failure");
+ err = -EIO;
+ goto err_out;
}
}
-
virtqueue_kick(chan->vq);
spin_unlock_irqrestore(&chan->lock, flags);
-
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
- return 0;
+ err = wait_event_interruptible(*req->wq,
+ req->status >= REQ_STATUS_RCVD);
+ /*
+ * Non kernel buffers are pinned, unpin them
+ */
+err_out:
+ if (!kern_buf) {
+ if (in_pages) {
+ p9_release_pages(in_pages, in_nr_pages);
+ atomic_sub(in_nr_pages, &vp_pinned);
+ }
+ if (out_pages) {
+ p9_release_pages(out_pages, out_nr_pages);
+ atomic_sub(out_nr_pages, &vp_pinned);
+ }
+ /* wakeup anybody waiting for slots to pin pages */
+ wake_up(&vp_wq);
+ }
+ kfree(in_pages);
+ kfree(out_pages);
+ return err;
}
static ssize_t p9_mount_tag_show(struct device *dev,
@@ -591,8 +651,8 @@ static struct p9_trans_module p9_virtio_trans = {
.create = p9_virtio_create,
.close = p9_virtio_close,
.request = p9_virtio_request,
+ .zc_request = p9_virtio_zc_request,
.cancel = p9_virtio_cancel,
-
/*
* We leave one entry for input and one entry for response
* headers. We also skip one more entry to accomodate, address
@@ -600,7 +660,6 @@ static struct p9_trans_module p9_virtio_trans = {
* page in zero copy.
*/
.maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
- .pref = P9_TRANS_PREF_PAYLOAD_SEP,
.def = 0,
.owner = THIS_MODULE,
};