summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-04-03 21:07:43 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-03 21:07:43 +0400
commit3cc50ac0dbda5100684e570247782330155d35e0 (patch)
treef4b8f22d1725ebe65d2fe658d292dabacd7ed564 /fs
parentd9b9be024a6628a01d8730d1fd0b5f25658a2794 (diff)
parentb797cac7487dee6bfddeb161631c1bbc54fa3cdb (diff)
downloadlinux-3cc50ac0dbda5100684e570247782330155d35e0.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-fscache
* git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-fscache: (41 commits) NFS: Add mount options to enable local caching on NFS NFS: Display local caching state NFS: Store pages from an NFS inode into a local cache NFS: Read pages from FS-Cache into an NFS inode NFS: nfs_readpage_async() needs to be accessible as a fallback for local caching NFS: Add read context retention for FS-Cache to call back with NFS: FS-Cache page management NFS: Add some new I/O counters for FS-Cache doing things for NFS NFS: Invalidate FsCache page flags when cache removed NFS: Use local disk inode cache NFS: Define and create inode-level cache objects NFS: Define and create superblock-level objects NFS: Define and create server-level objects NFS: Register NFS for caching and retrieve the top-level index NFS: Permit local filesystem caching to be enabled for NFS NFS: Add FS-Cache option bit and debug bit NFS: Add comment banners to some NFS functions FS-Cache: Make kAFS use FS-Cache CacheFiles: A cache that backs onto a mounted filesystem CacheFiles: Export things for CacheFiles ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig7
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/Kconfig8
-rw-r--r--fs/afs/Makefile3
-rw-r--r--fs/afs/cache.c503
-rw-r--r--fs/afs/cache.h15
-rw-r--r--fs/afs/cell.c16
-rw-r--r--fs/afs/file.c220
-rw-r--r--fs/afs/inode.c31
-rw-r--r--fs/afs/internal.h53
-rw-r--r--fs/afs/main.c27
-rw-r--r--fs/afs/mntpt.c4
-rw-r--r--fs/afs/vlocation.c25
-rw-r--r--fs/afs/volume.c14
-rw-r--r--fs/afs/write.c21
-rw-r--r--fs/cachefiles/Kconfig39
-rw-r--r--fs/cachefiles/Makefile18
-rw-r--r--fs/cachefiles/bind.c286
-rw-r--r--fs/cachefiles/daemon.c755
-rw-r--r--fs/cachefiles/interface.c449
-rw-r--r--fs/cachefiles/internal.h360
-rw-r--r--fs/cachefiles/key.c159
-rw-r--r--fs/cachefiles/main.c106
-rw-r--r--fs/cachefiles/namei.c771
-rw-r--r--fs/cachefiles/proc.c134
-rw-r--r--fs/cachefiles/rdwr.c879
-rw-r--r--fs/cachefiles/security.c116
-rw-r--r--fs/cachefiles/xattr.c291
-rw-r--r--fs/fscache/Kconfig56
-rw-r--r--fs/fscache/Makefile19
-rw-r--r--fs/fscache/cache.c415
-rw-r--r--fs/fscache/cookie.c500
-rw-r--r--fs/fscache/fsdef.c144
-rw-r--r--fs/fscache/histogram.c109
-rw-r--r--fs/fscache/internal.h380
-rw-r--r--fs/fscache/main.c124
-rw-r--r--fs/fscache/netfs.c103
-rw-r--r--fs/fscache/object.c810
-rw-r--r--fs/fscache/operation.c459
-rw-r--r--fs/fscache/page.c816
-rw-r--r--fs/fscache/proc.c68
-rw-r--r--fs/fscache/stats.c212
-rw-r--r--fs/nfs/Kconfig8
-rw-r--r--fs/nfs/Makefile1
-rw-r--r--fs/nfs/client.c14
-rw-r--r--fs/nfs/file.c38
-rw-r--r--fs/nfs/fscache-index.c337
-rw-r--r--fs/nfs/fscache.c523
-rw-r--r--fs/nfs/fscache.h220
-rw-r--r--fs/nfs/inode.c14
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/iostat.h18
-rw-r--r--fs/nfs/read.c27
-rw-r--r--fs/nfs/super.c45
-rw-r--r--fs/splice.c3
-rw-r--r--fs/super.c1
56 files changed, 10413 insertions, 367 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index ae3b34a2ea69..86b203fc3c56 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -66,6 +66,13 @@ config GENERIC_ACL
bool
select FS_POSIX_ACL
+menu "Caches"
+
+source "fs/fscache/Kconfig"
+source "fs/cachefiles/Kconfig"
+
+endmenu
+
if BLOCK
menu "CD-ROM/DVD Filesystems"
diff --git a/fs/Makefile b/fs/Makefile
index 15f73014a208..70b2aed87133 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_PROFILING) += dcookies.o
obj-$(CONFIG_DLM) += dlm/
# Do not add any filesystems before this line
+obj-$(CONFIG_FSCACHE) += fscache/
obj-$(CONFIG_REISERFS_FS) += reiserfs/
obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
obj-$(CONFIG_EXT2_FS) += ext2/
@@ -116,6 +117,7 @@ obj-$(CONFIG_AFS_FS) += afs/
obj-$(CONFIG_BEFS_FS) += befs/
obj-$(CONFIG_HOSTFS) += hostfs/
obj-$(CONFIG_HPPFS) += hppfs/
+obj-$(CONFIG_CACHEFILES) += cachefiles/
obj-$(CONFIG_DEBUG_FS) += debugfs/
obj-$(CONFIG_OCFS2_FS) += ocfs2/
obj-$(CONFIG_BTRFS_FS) += btrfs/
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index e7b522fe15e1..5c4e61d3c772 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -19,3 +19,11 @@ config AFS_DEBUG
See <file:Documentation/filesystems/afs.txt> for more information.
If unsure, say N.
+
+config AFS_FSCACHE
+ bool "Provide AFS client caching support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y
+ help
+ Say Y here if you want AFS data to be cached locally on disk through
+ the generic filesystem cache manager
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index a66671082cfb..4f64b95d57bd 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -2,7 +2,10 @@
# Makefile for Red Hat Linux AFS client.
#
+afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o
+
kafs-objs := \
+ $(afs-cache-y) \
callback.o \
cell.o \
cmservice.o \
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index de0d7de69edc..e2b1d3f16519 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -1,6 +1,6 @@
/* AFS caching stuff
*
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -9,248 +9,395 @@
* 2 of the License, or (at your option) any later version.
*/
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_cell_cache_match(void *target,
- const void *entry);
-static void afs_cell_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_cache_cell_index_def = {
- .name = "cell_ix",
- .data_size = sizeof(struct afs_cache_cell),
- .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
- .match = afs_cell_cache_match,
- .update = afs_cell_cache_update,
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include "internal.h"
+
+static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t buflen);
+static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t buflen);
+static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
+ const void *buffer,
+ uint16_t buflen);
+
+static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t buflen);
+static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t buflen);
+static enum fscache_checkaux afs_vlocation_cache_check_aux(
+ void *cookie_netfs_data, const void *buffer, uint16_t buflen);
+
+static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t buflen);
+
+static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t buflen);
+static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
+ uint64_t *size);
+static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t buflen);
+static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
+ const void *buffer,
+ uint16_t buflen);
+static void afs_vnode_cache_now_uncached(void *cookie_netfs_data);
+
+struct fscache_netfs afs_cache_netfs = {
+ .name = "afs",
+ .version = 0,
+};
+
+struct fscache_cookie_def afs_cell_cache_index_def = {
+ .name = "AFS.cell",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = afs_cell_cache_get_key,
+ .get_aux = afs_cell_cache_get_aux,
+ .check_aux = afs_cell_cache_check_aux,
+};
+
+struct fscache_cookie_def afs_vlocation_cache_index_def = {
+ .name = "AFS.vldb",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = afs_vlocation_cache_get_key,
+ .get_aux = afs_vlocation_cache_get_aux,
+ .check_aux = afs_vlocation_cache_check_aux,
+};
+
+struct fscache_cookie_def afs_volume_cache_index_def = {
+ .name = "AFS.volume",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = afs_volume_cache_get_key,
+};
+
+struct fscache_cookie_def afs_vnode_cache_index_def = {
+ .name = "AFS.vnode",
+ .type = FSCACHE_COOKIE_TYPE_DATAFILE,
+ .get_key = afs_vnode_cache_get_key,
+ .get_attr = afs_vnode_cache_get_attr,
+ .get_aux = afs_vnode_cache_get_aux,
+ .check_aux = afs_vnode_cache_check_aux,
+ .now_uncached = afs_vnode_cache_now_uncached,
};
-#endif
/*
- * match a cell record obtained from the cache
+ * set the key for the index entry
*/
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_cell_cache_match(void *target,
- const void *entry)
+static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
{
- const struct afs_cache_cell *ccell = entry;
- struct afs_cell *cell = target;
+ const struct afs_cell *cell = cookie_netfs_data;
+ uint16_t klen;
- _enter("{%s},{%s}", ccell->name, cell->name);
+ _enter("%p,%p,%u", cell, buffer, bufmax);
- if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) {
- _leave(" = SUCCESS");
- return CACHEFS_MATCH_SUCCESS;
- }
+ klen = strlen(cell->name);
+ if (klen > bufmax)
+ return 0;
- _leave(" = FAILED");
- return CACHEFS_MATCH_FAILED;
+ memcpy(buffer, cell->name, klen);
+ return klen;
}
-#endif
/*
- * update a cell record in the cache
+ * provide new auxilliary cache data
*/
-#ifdef AFS_CACHING_SUPPORT
-static void afs_cell_cache_update(void *source, void *entry)
+static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
{
- struct afs_cache_cell *ccell = entry;
- struct afs_cell *cell = source;
+ const struct afs_cell *cell = cookie_netfs_data;
+ uint16_t dlen;
- _enter("%p,%p", source, entry);
+ _enter("%p,%p,%u", cell, buffer, bufmax);
- strncpy(ccell->name, cell->name, sizeof(ccell->name));
+ dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]);
+ dlen = min(dlen, bufmax);
+ dlen &= ~(sizeof(cell->vl_addrs[0]) - 1);
- memcpy(ccell->vl_servers,
- cell->vl_addrs,
- min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs)));
+ memcpy(buffer, cell->vl_addrs, dlen);
+ return dlen;
+}
+/*
+ * check that the auxilliary data indicates that the entry is still valid
+ */
+static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
+ const void *buffer,
+ uint16_t buflen)
+{
+ _leave(" = OKAY");
+ return FSCACHE_CHECKAUX_OKAY;
}
-#endif
-
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vlocation_cache_match(void *target,
- const void *entry);
-static void afs_vlocation_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_vlocation_cache_index_def = {
- .name = "vldb",
- .data_size = sizeof(struct afs_cache_vlocation),
- .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
- .match = afs_vlocation_cache_match,
- .update = afs_vlocation_cache_update,
-};
-#endif
+/*****************************************************************************/
/*
- * match a VLDB record stored in the cache
- * - may also load target from entry
+ * set the key for the index entry
*/
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vlocation_cache_match(void *target,
- const void *entry)
+static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
{
- const struct afs_cache_vlocation *vldb = entry;
- struct afs_vlocation *vlocation = target;
+ const struct afs_vlocation *vlocation = cookie_netfs_data;
+ uint16_t klen;
+
+ _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
+
+ klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name));
+ if (klen > bufmax)
+ return 0;
- _enter("{%s},{%s}", vlocation->vldb.name, vldb->name);
+ memcpy(buffer, vlocation->vldb.name, klen);
- if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0
- ) {
- if (!vlocation->valid ||
- vlocation->vldb.rtime == vldb->rtime
+ _leave(" = %u", klen);
+ return klen;
+}
+
+/*
+ * provide new auxilliary cache data
+ */
+static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ const struct afs_vlocation *vlocation = cookie_netfs_data;
+ uint16_t dlen;
+
+ _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
+
+ dlen = sizeof(struct afs_cache_vlocation);
+ dlen -= offsetof(struct afs_cache_vlocation, nservers);
+ if (dlen > bufmax)
+ return 0;
+
+ memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen);
+
+ _leave(" = %u", dlen);
+ return dlen;
+}
+
+/*
+ * check that the auxilliary data indicates that the entry is still valid
+ */
+static
+enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data,
+ const void *buffer,
+ uint16_t buflen)
+{
+ const struct afs_cache_vlocation *cvldb;
+ struct afs_vlocation *vlocation = cookie_netfs_data;
+ uint16_t dlen;
+
+ _enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen);
+
+ /* check the size of the data is what we're expecting */
+ dlen = sizeof(struct afs_cache_vlocation);
+ dlen -= offsetof(struct afs_cache_vlocation, nservers);
+ if (dlen != buflen)
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ cvldb = container_of(buffer, struct afs_cache_vlocation, nservers);
+
+ /* if what's on disk is more valid than what's in memory, then use the
+ * VL record from the cache */
+ if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) {
+ memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen);
+ vlocation->valid = 1;
+ _leave(" = SUCCESS [c->m]");
+ return FSCACHE_CHECKAUX_OKAY;
+ }
+
+ /* need to update the cache if the cached info differs */
+ if (memcmp(&vlocation->vldb, buffer, dlen) != 0) {
+ /* delete if the volume IDs for this name differ */
+ if (memcmp(&vlocation->vldb.vid, &cvldb->vid,
+ sizeof(cvldb->vid)) != 0
) {
- vlocation->vldb = *vldb;
- vlocation->valid = 1;
- _leave(" = SUCCESS [c->m]");
- return CACHEFS_MATCH_SUCCESS;
- } else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
- /* delete if VIDs for this name differ */
- if (memcmp(&vlocation->vldb.vid,
- &vldb->vid,
- sizeof(vldb->vid)) != 0) {
- _leave(" = DELETE");
- return CACHEFS_MATCH_SUCCESS_DELETE;
- }
-
- _leave(" = UPDATE");
- return CACHEFS_MATCH_SUCCESS_UPDATE;
- } else {
- _leave(" = SUCCESS");
- return CACHEFS_MATCH_SUCCESS;
+ _leave(" = OBSOLETE");
+ return FSCACHE_CHECKAUX_OBSOLETE;
}
+
+ _leave(" = UPDATE");
+ return FSCACHE_CHECKAUX_NEEDS_UPDATE;
}
- _leave(" = FAILED");
- return CACHEFS_MATCH_FAILED;
+ _leave(" = OKAY");
+ return FSCACHE_CHECKAUX_OKAY;
}
-#endif
+/*****************************************************************************/
/*
- * update a VLDB record stored in the cache
+ * set the key for the volume index entry
*/
-#ifdef AFS_CACHING_SUPPORT
-static void afs_vlocation_cache_update(void *source, void *entry)
+static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
{
- struct afs_cache_vlocation *vldb = entry;
- struct afs_vlocation *vlocation = source;
+ const struct afs_volume *volume = cookie_netfs_data;
+ uint16_t klen;
+
+ _enter("{%u},%p,%u", volume->type, buffer, bufmax);
+
+ klen = sizeof(volume->type);
+ if (klen > bufmax)
+ return 0;
- _enter("");
+ memcpy(buffer, &volume->type, sizeof(volume->type));
+
+ _leave(" = %u", klen);
+ return klen;
- *vldb = vlocation->vldb;
}
-#endif
-
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_volume_cache_match(void *target,
- const void *entry);
-static void afs_volume_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_volume_cache_index_def = {
- .name = "volume",
- .data_size = sizeof(struct afs_cache_vhash),
- .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 },
- .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 },
- .match = afs_volume_cache_match,
- .update = afs_volume_cache_update,
-};
-#endif
+/*****************************************************************************/
/*
- * match a volume hash record stored in the cache
+ * set the key for the index entry
*/
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_volume_cache_match(void *target,
- const void *entry)
+static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
{
- const struct afs_cache_vhash *vhash = entry;
- struct afs_volume *volume = target;
+ const struct afs_vnode *vnode = cookie_netfs_data;
+ uint16_t klen;
- _enter("{%u},{%u}", volume->type, vhash->vtype);
+ _enter("{%x,%x,%llx},%p,%u",
+ vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
+ buffer, bufmax);
- if (volume->type == vhash->vtype) {
- _leave(" = SUCCESS");
- return CACHEFS_MATCH_SUCCESS;
- }
+ klen = sizeof(vnode->fid.vnode);
+ if (klen > bufmax)
+ return 0;
+
+ memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode));
- _leave(" = FAILED");
- return CACHEFS_MATCH_FAILED;
+ _leave(" = %u", klen);
+ return klen;
}
-#endif
/*
- * update a volume hash record stored in the cache
+ * provide updated file attributes
*/
-#ifdef AFS_CACHING_SUPPORT
-static void afs_volume_cache_update(void *source, void *entry)
+static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
+ uint64_t *size)
{
- struct afs_cache_vhash *vhash = entry;
- struct afs_volume *volume = source;
+ const struct afs_vnode *vnode = cookie_netfs_data;
- _enter("");
+ _enter("{%x,%x,%llx},",
+ vnode->fid.vnode, vnode->fid.unique,
+ vnode->status.data_version);
- vhash->vtype = volume->type;
+ *size = vnode->status.size;
}
-#endif
-
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vnode_cache_match(void *target,
- const void *entry);
-static void afs_vnode_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_vnode_cache_index_def = {
- .name = "vnode",
- .data_size = sizeof(struct afs_cache_vnode),
- .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 4 },
- .match = afs_vnode_cache_match,
- .update = afs_vnode_cache_update,
-};
-#endif
/*
- * match a vnode record stored in the cache
+ * provide new auxilliary cache data
+ */
+static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ const struct afs_vnode *vnode = cookie_netfs_data;
+ uint16_t dlen;
+
+ _enter("{%x,%x,%Lx},%p,%u",
+ vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
+ buffer, bufmax);
+
+ dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version);
+ if (dlen > bufmax)
+ return 0;
+
+ memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique));
+ buffer += sizeof(vnode->fid.unique);
+ memcpy(buffer, &vnode->status.data_version,
+ sizeof(vnode->status.data_version));
+
+ _leave(" = %u", dlen);
+ return dlen;
+}
+
+/*
+ * check that the auxilliary data indicates that the entry is still valid
*/
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vnode_cache_match(void *target,
- const void *entry)
+static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
+ const void *buffer,
+ uint16_t buflen)
{
- const struct afs_cache_vnode *cvnode = entry;
- struct afs_vnode *vnode = target;
-
- _enter("{%x,%x,%Lx},{%x,%x,%Lx}",
- vnode->fid.vnode,
- vnode->fid.unique,
- vnode->status.version,
- cvnode->vnode_id,
- cvnode->vnode_unique,
- cvnode->data_version);
-
- if (vnode->fid.vnode != cvnode->vnode_id) {
- _leave(" = FAILED");
- return CACHEFS_MATCH_FAILED;
+ struct afs_vnode *vnode = cookie_netfs_data;
+ uint16_t dlen;
+
+ _enter("{%x,%x,%llx},%p,%u",
+ vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
+ buffer, buflen);
+
+ /* check the size of the data is what we're expecting */
+ dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version);
+ if (dlen != buflen) {
+ _leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen);
+ return FSCACHE_CHECKAUX_OBSOLETE;
}
- if (vnode->fid.unique != cvnode->vnode_unique ||
- vnode->status.version != cvnode->data_version) {
- _leave(" = DELETE");
- return CACHEFS_MATCH_SUCCESS_DELETE;
+ if (memcmp(buffer,
+ &vnode->fid.unique,
+ sizeof(vnode->fid.unique)
+ ) != 0) {
+ unsigned unique;
+
+ memcpy(&unique, buffer, sizeof(unique));
+
+ _leave(" = OBSOLETE [uniq %x != %x]",
+ unique, vnode->fid.unique);
+ return FSCACHE_CHECKAUX_OBSOLETE;
+ }
+
+ if (memcmp(buffer + sizeof(vnode->fid.unique),
+ &vnode->status.data_version,
+ sizeof(vnode->status.data_version)
+ ) != 0) {
+ afs_dataversion_t version;
+
+ memcpy(&version, buffer + sizeof(vnode->fid.unique),
+ sizeof(version));
+
+ _leave(" = OBSOLETE [vers %llx != %llx]",
+ version, vnode->status.data_version);
+ return FSCACHE_CHECKAUX_OBSOLETE;
}
_leave(" = SUCCESS");
- return CACHEFS_MATCH_SUCCESS;
+ return FSCACHE_CHECKAUX_OKAY;
}
-#endif
/*
- * update a vnode record stored in the cache
+ * indication the cookie is no longer uncached
+ * - this function is called when the backing store currently caching a cookie
+ * is removed
+ * - the netfs should use this to clean up any markers indicating cached pages
+ * - this is mandatory for any object that may have data
*/
-#ifdef AFS_CACHING_SUPPORT
-static void afs_vnode_cache_update(void *source, void *entry)
+static void afs_vnode_cache_now_uncached(void *cookie_netfs_data)
{
- struct afs_cache_vnode *cvnode = entry;
- struct afs_vnode *vnode = source;
+ struct afs_vnode *vnode = cookie_netfs_data;
+ struct pagevec pvec;
+ pgoff_t first;
+ int loop, nr_pages;
+
+ _enter("{%x,%x,%Lx}",
+ vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version);
+
+ pagevec_init(&pvec, 0);
+ first = 0;
+
+ for (;;) {
+ /* grab a bunch of pages to clean */
+ nr_pages = pagevec_lookup(&pvec, vnode->vfs_inode.i_mapping,
+ first,
+ PAGEVEC_SIZE - pagevec_count(&pvec));
+ if (!nr_pages)
+ break;
- _enter("");
+ for (loop = 0; loop < nr_pages; loop++)
+ ClearPageFsCache(pvec.pages[loop]);
+
+ first = pvec.pages[nr_pages - 1]->index + 1;
+
+ pvec.nr = nr_pages;
+ pagevec_release(&pvec);
+ cond_resched();
+ }
- cvnode->vnode_id = vnode->fid.vnode;
- cvnode->vnode_unique = vnode->fid.unique;
- cvnode->data_version = vnode->status.version;
+ _leave("");
}
-#endif
diff --git a/fs/afs/cache.h b/fs/afs/cache.h
index 36a3642cf90e..5c4f6b499e90 100644
--- a/fs/afs/cache.h
+++ b/fs/afs/cache.h
@@ -1,6 +1,6 @@
/* AFS local cache management interface
*
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -9,15 +9,4 @@
* 2 of the License, or (at your option) any later version.
*/
-#ifndef AFS_CACHE_H
-#define AFS_CACHE_H
-
-#undef AFS_CACHING_SUPPORT
-
-#include <linux/mm.h>
-#ifdef AFS_CACHING_SUPPORT
-#include <linux/cachefs.h>
-#endif
-#include "types.h"
-
-#endif /* AFS_CACHE_H */
+#include <linux/fscache.h>
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 5e1df14e16b1..e19c13f059ed 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -147,12 +147,11 @@ struct afs_cell *afs_cell_create(const char *name, char *vllist)
if (ret < 0)
goto error;
-#ifdef AFS_CACHING_SUPPORT
- /* put it up for caching */
- cachefs_acquire_cookie(afs_cache_netfs.primary_index,
- &afs_vlocation_cache_index_def,
- cell,
- &cell->cache);
+#ifdef CONFIG_AFS_FSCACHE
+ /* put it up for caching (this never returns an error) */
+ cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
+ &afs_cell_cache_index_def,
+ cell);
#endif
/* add to the cell lists */
@@ -362,10 +361,9 @@ static void afs_cell_destroy(struct afs_cell *cell)
list_del_init(&cell->proc_link);
up_write(&afs_proc_cells_sem);
-#ifdef AFS_CACHING_SUPPORT
- cachefs_relinquish_cookie(cell->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_relinquish_cookie(cell->cache, 0);
#endif
-
key_put(cell->anonymous_key);
kfree(cell);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index a3901769a96c..7a1d942ef68d 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -23,6 +23,9 @@ static void afs_invalidatepage(struct page *page, unsigned long offset);
static int afs_releasepage(struct page *page, gfp_t gfp_flags);
static int afs_launder_page(struct page *page);
+static int afs_readpages(struct file *filp, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages);
+
const struct file_operations afs_file_operations = {
.open = afs_open,
.release = afs_release,
@@ -46,6 +49,7 @@ const struct inode_operations afs_file_inode_operations = {
const struct address_space_operations afs_fs_aops = {
.readpage = afs_readpage,
+ .readpages = afs_readpages,
.set_page_dirty = afs_set_page_dirty,
.launder_page = afs_launder_page,
.releasepage = afs_releasepage,
@@ -101,37 +105,18 @@ int afs_release(struct inode *inode, struct file *file)
/*
* deal with notification that a page was read from the cache
*/
-#ifdef AFS_CACHING_SUPPORT
-static void afs_readpage_read_complete(void *cookie_data,
- struct page *page,
- void *data,
- int error)
+static void afs_file_readpage_read_complete(struct page *page,
+ void *data,
+ int error)
{
- _enter("%p,%p,%p,%d", cookie_data, page, data, error);
+ _enter("%p,%p,%d", page, data, error);
- if (error)
- SetPageError(page);
- else
+ /* if the read completes with an error, we just unlock the page and let
+ * the VM reissue the readpage */
+ if (!error)
SetPageUptodate(page);
unlock_page(page);
-
}
-#endif
-
-/*
- * deal with notification that a page was written to the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_readpage_write_complete(void *cookie_data,
- struct page *page,
- void *data,
- int error)
-{
- _enter("%p,%p,%p,%d", cookie_data, page, data, error);
-
- unlock_page(page);
-}
-#endif
/*
* AFS read page from file, directory or symlink
@@ -161,9 +146,9 @@ static int afs_readpage(struct file *file, struct page *page)
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
goto error;
-#ifdef AFS_CACHING_SUPPORT
/* is it cached? */
- ret = cachefs_read_or_alloc_page(vnode->cache,
+#ifdef CONFIG_AFS_FSCACHE
+ ret = fscache_read_or_alloc_page(vnode->cache,
page,
afs_file_readpage_read_complete,
NULL,
@@ -171,20 +156,21 @@ static int afs_readpage(struct file *file, struct page *page)
#else
ret = -ENOBUFS;
#endif
-
switch (ret) {
- /* read BIO submitted and wb-journal entry found */
- case 1:
- BUG(); // TODO - handle wb-journal match
-
/* read BIO submitted (page in cache) */
case 0:
break;
- /* no page available in cache */
- case -ENOBUFS:
+ /* page not yet cached */
case -ENODATA:
+ _debug("cache said ENODATA");
+ goto go_on;
+
+ /* page will not be cached */
+ case -ENOBUFS:
+ _debug("cache said ENOBUFS");
default:
+ go_on:
offset = page->index << PAGE_CACHE_SHIFT;
len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE);
@@ -198,27 +184,25 @@ static int afs_readpage(struct file *file, struct page *page)
set_bit(AFS_VNODE_DELETED, &vnode->flags);
ret = -ESTALE;
}
-#ifdef AFS_CACHING_SUPPORT
- cachefs_uncache_page(vnode->cache, page);
+
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_uncache_page(vnode->cache, page);
#endif
+ BUG_ON(PageFsCache(page));
goto error;
}
SetPageUptodate(page);
-#ifdef AFS_CACHING_SUPPORT
- if (cachefs_write_page(vnode->cache,
- page,
- afs_file_readpage_write_complete,
- NULL,
- GFP_KERNEL) != 0
- ) {
- cachefs_uncache_page(vnode->cache, page);
- unlock_page(page);
+ /* send the page to the cache */
+#ifdef CONFIG_AFS_FSCACHE
+ if (PageFsCache(page) &&
+ fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) {
+ fscache_uncache_page(vnode->cache, page);
+ BUG_ON(PageFsCache(page));
}
-#else
- unlock_page(page);
#endif
+ unlock_page(page);
}
_leave(" = 0");
@@ -232,34 +216,59 @@ error:
}
/*
- * invalidate part or all of a page
+ * read a set of pages
*/
-static void afs_invalidatepage(struct page *page, unsigned long offset)
+static int afs_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
{
- int ret = 1;
+ struct afs_vnode *vnode;
+ int ret = 0;
- _enter("{%lu},%lu", page->index, offset);
+ _enter(",{%lu},,%d", mapping->host->i_ino, nr_pages);
- BUG_ON(!PageLocked(page));
+ vnode = AFS_FS_I(mapping->host);
+ if (vnode->flags & AFS_VNODE_DELETED) {
+ _leave(" = -ESTALE");
+ return -ESTALE;
+ }
- if (PagePrivate(page)) {
- /* We release buffers only if the entire page is being
- * invalidated.
- * The get_block cached value has been unconditionally
- * invalidated, so real IO is not possible anymore.
- */
- if (offset == 0) {
- BUG_ON(!PageLocked(page));
-
- ret = 0;
- if (!PageWriteback(page))
- ret = page->mapping->a_ops->releasepage(page,
- 0);
- /* possibly should BUG_ON(!ret); - neilb */
- }
+ /* attempt to read as many of the pages as possible */
+#ifdef CONFIG_AFS_FSCACHE
+ ret = fscache_read_or_alloc_pages(vnode->cache,
+ mapping,
+ pages,
+ &nr_pages,
+ afs_file_readpage_read_complete,
+ NULL,
+ mapping_gfp_mask(mapping));
+#else
+ ret = -ENOBUFS;
+#endif
+
+ switch (ret) {
+ /* all pages are being read from the cache */
+ case 0:
+ BUG_ON(!list_empty(pages));
+ BUG_ON(nr_pages != 0);
+ _leave(" = 0 [reading all]");
+ return 0;
+
+ /* there were pages that couldn't be read from the cache */
+ case -ENODATA:
+ case -ENOBUFS:
+ break;
+
+ /* other error */
+ default:
+ _leave(" = %d", ret);
+ return ret;
}
- _leave(" = %d", ret);
+ /* load the missing pages from the network */
+ ret = read_cache_pages(mapping, pages, (void *) afs_readpage, file);
+
+ _leave(" = %d [netting]", ret);
+ return ret;
}
/*
@@ -273,25 +282,82 @@ static int afs_launder_page(struct page *page)
}
/*
- * release a page and cleanup its private data
+ * invalidate part or all of a page
+ * - release a page and clean up its private data if offset is 0 (indicating
+ * the entire page)
+ */
+static void afs_invalidatepage(struct page *page, unsigned long offset)
+{
+ struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
+
+ _enter("{%lu},%lu", page->index, offset);
+
+ BUG_ON(!PageLocked(page));
+
+ /* we clean up only if the entire page is being invalidated */
+ if (offset == 0) {
+#ifdef CONFIG_AFS_FSCACHE
+ if (PageFsCache(page)) {
+ struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+ fscache_wait_on_page_write(vnode->cache, page);
+ fscache_uncache_page(vnode->cache, page);
+ ClearPageFsCache(page);
+ }
+#endif
+
+ if (PagePrivate(page)) {
+ if (wb && !PageWriteback(page)) {
+ set_page_private(page, 0);
+ afs_put_writeback(wb);
+ }
+
+ if (!page_private(page))
+ ClearPagePrivate(page);
+ }
+ }
+
+ _leave("");
+}
+
+/*
+ * release a page and clean up its private state if it's not busy
+ * - return true if the page can now be released, false if not
*/
static int afs_releasepage(struct page *page, gfp_t gfp_flags)
{
+ struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
- struct afs_writeback *wb;
_enter("{{%x:%u}[%lu],%lx},%x",
vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
gfp_flags);
+ /* deny if page is being written to the cache and the caller hasn't
+ * elected to wait */
+#ifdef CONFIG_AFS_FSCACHE
+ if (PageFsCache(page)) {
+ if (fscache_check_page_write(vnode->cache, page)) {
+ if (!(gfp_flags & __GFP_WAIT)) {
+ _leave(" = F [cache busy]");
+ return 0;
+ }
+ fscache_wait_on_page_write(vnode->cache, page);
+ }
+
+ fscache_uncache_page(vnode->cache, page);
+ ClearPageFsCache(page);
+ }
+#endif
+
if (PagePrivate(page)) {
- wb = (struct afs_writeback *) page_private(page);
- ASSERT(wb != NULL);
- set_page_private(page, 0);
+ if (wb) {
+ set_page_private(page, 0);
+ afs_put_writeback(wb);
+ }
ClearPagePrivate(page);
- afs_put_writeback(wb);
}
- _leave(" = 0");
- return 0;
+ /* indicate that the page can be released */
+ _leave(" = T");
+ return 1;
}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index bb47217f6a18..c048f0658751 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -61,6 +61,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
return -EBADMSG;
}
+#ifdef CONFIG_AFS_FSCACHE
+ if (vnode->status.size != inode->i_size)
+ fscache_attr_changed(vnode->cache);
+#endif
+
inode->i_nlink = vnode->status.nlink;
inode->i_uid = vnode->status.owner;
inode->i_gid = 0;
@@ -149,15 +154,6 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
return inode;
}
-#ifdef AFS_CACHING_SUPPORT
- /* set up caching before reading the status, as fetch-status reads the
- * first page of symlinks to see if they're really mntpts */
- cachefs_acquire_cookie(vnode->volume->cache,
- NULL,
- vnode,
- &vnode->cache);
-#endif
-
if (!status) {
/* it's a remotely extant inode */
set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
@@ -183,6 +179,15 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
}
}
+ /* set up caching before mapping the status, as map-status reads the
+ * first page of symlinks to see if they're really mountpoints */
+ inode->i_size = vnode->status.size;
+#ifdef CONFIG_AFS_FSCACHE
+ vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
+ &afs_vnode_cache_index_def,
+ vnode);
+#endif
+
ret = afs_inode_map_status(vnode, key);
if (ret < 0)
goto bad_inode;
@@ -196,6 +201,10 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
/* failure */
bad_inode:
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_relinquish_cookie(vnode->cache, 0);
+ vnode->cache = NULL;
+#endif
iget_failed(inode);
_leave(" = %d [bad]", ret);
return ERR_PTR(ret);
@@ -340,8 +349,8 @@ void afs_clear_inode(struct inode *inode)
ASSERT(list_empty(&vnode->writebacks));
ASSERT(!vnode->cb_promised);
-#ifdef AFS_CACHING_SUPPORT
- cachefs_relinquish_cookie(vnode->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_relinquish_cookie(vnode->cache, 0);
vnode->cache = NULL;
#endif
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 67f259d99cd6..106be66dafd2 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -21,6 +21,7 @@
#include "afs.h"
#include "afs_vl.h"
+#include "cache.h"
#define AFS_CELL_MAX_ADDRS 15
@@ -193,8 +194,8 @@ struct afs_cell {
struct key *anonymous_key; /* anonymous user key for this cell */
struct list_head proc_link; /* /proc cell list link */
struct proc_dir_entry *proc_dir; /* /proc dir for this cell */
-#ifdef AFS_CACHING_SUPPORT
- struct cachefs_cookie *cache; /* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+ struct fscache_cookie *cache; /* caching cookie */
#endif
/* server record management */
@@ -249,8 +250,8 @@ struct afs_vlocation {
struct list_head grave; /* link in master graveyard list */
struct list_head update; /* link in master update list */
struct afs_cell *cell; /* cell to which volume belongs */
-#ifdef AFS_CACHING_SUPPORT
- struct cachefs_cookie *cache; /* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+ struct fscache_cookie *cache; /* caching cookie */
#endif
struct afs_cache_vlocation vldb; /* volume information DB record */
struct afs_volume *vols[3]; /* volume access record pointer (index by type) */
@@ -302,8 +303,8 @@ struct afs_volume {
atomic_t usage;
struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */
struct afs_vlocation *vlocation; /* volume location */
-#ifdef AFS_CACHING_SUPPORT
- struct cachefs_cookie *cache; /* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+ struct fscache_cookie *cache; /* caching cookie */
#endif
afs_volid_t vid; /* volume ID */
afs_voltype_t type; /* type of volume */
@@ -333,8 +334,8 @@ struct afs_vnode {
struct afs_server *server; /* server currently supplying this file */
struct afs_fid fid; /* the file identifier for this inode */
struct afs_file_status status; /* AFS status info for this file */
-#ifdef AFS_CACHING_SUPPORT
- struct cachefs_cookie *cache; /* caching cookie */
+#ifdef CONFIG_AFS_FSCACHE
+ struct fscache_cookie *cache; /* caching cookie */
#endif
struct afs_permits *permits; /* cache of permits so far obtained */
struct mutex permits_lock; /* lock for altering permits list */
@@ -428,6 +429,22 @@ struct afs_uuid {
/*****************************************************************************/
/*
+ * cache.c
+ */
+#ifdef CONFIG_AFS_FSCACHE
+extern struct fscache_netfs afs_cache_netfs;
+extern struct fscache_cookie_def afs_cell_cache_index_def;
+extern struct fscache_cookie_def afs_vlocation_cache_index_def;
+extern struct fscache_cookie_def afs_volume_cache_index_def;
+extern struct fscache_cookie_def afs_vnode_cache_index_def;
+#else
+#define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL)
+#define afs_vlocation_cache_index_def (*(struct fscache_cookie_def *) NULL)
+#define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL)
+#define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL)
+#endif
+
+/*
* callback.c
*/
extern void afs_init_callback_state(struct afs_server *);
@@ -446,9 +463,6 @@ extern void afs_callback_update_kill(void);
*/
extern struct rw_semaphore afs_proc_cells_sem;
extern struct list_head afs_proc_cells;
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_cache_cell_index_def;
-#endif
#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
extern int afs_cell_init(char *);
@@ -554,9 +568,6 @@ extern void afs_clear_inode(struct inode *);
* main.c
*/
extern struct afs_uuid afs_uuid;
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_netfs afs_cache_netfs;
-#endif
/*
* misc.c
@@ -637,10 +648,6 @@ extern int afs_get_MAC_address(u8 *, size_t);
/*
* vlclient.c
*/
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_vlocation_cache_index_def;
-#endif
-
extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *,
const char *, struct afs_cache_vlocation *,
const struct afs_wait_mode *);
@@ -664,12 +671,6 @@ extern void afs_vlocation_purge(void);
/*
* vnode.c
*/
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_vnode_cache_index_def;
-#endif
-
-extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
-
static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
{
return container_of(inode, struct afs_vnode, vfs_inode);
@@ -711,10 +712,6 @@ extern int afs_vnode_release_lock(struct afs_vnode *, struct key *);
/*
* volume.c
*/
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_volume_cache_index_def;
-#endif
-
#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
extern void afs_put_volume(struct afs_volume *);
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 2d3e5d4fb9f7..66d54d348c55 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -1,6 +1,6 @@
/* AFS client file system
*
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -29,18 +29,6 @@ static char *rootcell;
module_param(rootcell, charp, 0);
MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
-#ifdef AFS_CACHING_SUPPORT
-static struct cachefs_netfs_operations afs_cache_ops = {
- .get_page_cookie = afs_cache_get_page_cookie,
-};
-
-struct cachefs_netfs afs_cache_netfs = {
- .name = "afs",
- .version = 0,
- .ops = &afs_cache_ops,
-};
-#endif
-
struct afs_uuid afs_uuid;
/*
@@ -104,10 +92,9 @@ static int __init afs_init(void)
if (ret < 0)
return ret;
-#ifdef AFS_CACHING_SUPPORT
+#ifdef CONFIG_AFS_FSCACHE
/* we want to be able to cache */
- ret = cachefs_register_netfs(&afs_cache_netfs,
- &afs_cache_cell_index_def);
+ ret = fscache_register_netfs(&afs_cache_netfs);
if (ret < 0)
goto error_cache;
#endif
@@ -142,8 +129,8 @@ error_fs:
error_open_socket:
error_vl_update_init:
error_cell_init:
-#ifdef AFS_CACHING_SUPPORT
- cachefs_unregister_netfs(&afs_cache_netfs);
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_unregister_netfs(&afs_cache_netfs);
error_cache:
#endif
afs_callback_update_kill();
@@ -175,8 +162,8 @@ static void __exit afs_exit(void)
afs_vlocation_purge();
flush_scheduled_work();
afs_cell_purge();
-#ifdef AFS_CACHING_SUPPORT
- cachefs_unregister_netfs(&afs_cache_netfs);
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_unregister_netfs(&afs_cache_netfs);
#endif
afs_proc_cleanup();
rcu_barrier();
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 78db4953a800..2b9e2d03a390 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -173,9 +173,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
if (PageError(page))
goto error;
- buf = kmap(page);
+ buf = kmap_atomic(page, KM_USER0);
memcpy(devname, buf, size);
- kunmap(page);
+ kunmap_atomic(buf, KM_USER0);
page_cache_release(page);
page = NULL;
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 849fc3160cb5..ec2a7431e458 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -281,9 +281,8 @@ static void afs_vlocation_apply_update(struct afs_vlocation *vl,
vl->vldb = *vldb;
-#ifdef AFS_CACHING_SUPPORT
- /* update volume entry in local cache */
- cachefs_update_cookie(vl->cache);
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_update_cookie(vl->cache);
#endif
}
@@ -304,11 +303,9 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
memset(&vldb, 0, sizeof(vldb));
/* see if we have an in-cache copy (will set vl->valid if there is) */
-#ifdef AFS_CACHING_SUPPORT
- cachefs_acquire_cookie(cell->cache,
- &afs_volume_cache_index_def,
- vlocation,
- &vl->cache);
+#ifdef CONFIG_AFS_FSCACHE
+ vl->cache = fscache_acquire_cookie(vl->cell->cache,
+ &afs_vlocation_cache_index_def, vl);
#endif
if (vl->valid) {
@@ -420,6 +417,11 @@ fill_in_record:
spin_unlock(&vl->lock);
wake_up(&vl->waitq);
+ /* update volume entry in local cache */
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_update_cookie(vl->cache);
+#endif
+
/* schedule for regular updates */
afs_vlocation_queue_for_updates(vl);
goto success;
@@ -465,7 +467,7 @@ found_in_memory:
spin_unlock(&vl->lock);
success:
- _leave(" = %p",vl);
+ _leave(" = %p", vl);
return vl;
error_abandon:
@@ -523,10 +525,9 @@ static void afs_vlocation_destroy(struct afs_vlocation *vl)
{
_enter("%p", vl);
-#ifdef AFS_CACHING_SUPPORT
- cachefs_relinquish_cookie(vl->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_relinquish_cookie(vl->cache, 0);
#endif
-
afs_put_cell(vl->cell);
kfree(vl);
}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 8bab0e3437f9..a353e69e2391 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -124,13 +124,11 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
}
/* attach the cache and volume location */
-#ifdef AFS_CACHING_SUPPORT
- cachefs_acquire_cookie(vlocation->cache,
- &afs_vnode_cache_index_def,
- volume,
- &volume->cache);
+#ifdef CONFIG_AFS_FSCACHE
+ volume->cache = fscache_acquire_cookie(vlocation->cache,
+ &afs_volume_cache_index_def,
+ volume);
#endif
-
afs_get_vlocation(vlocation);
volume->vlocation = vlocation;
@@ -194,8 +192,8 @@ void afs_put_volume(struct afs_volume *volume)
up_write(&vlocation->cell->vl_sem);
/* finish cleaning up the volume */
-#ifdef AFS_CACHING_SUPPORT
- cachefs_relinquish_cookie(volume->cache, 0);
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_relinquish_cookie(volume->cache, 0);
#endif
afs_put_vlocation(vlocation);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3fb36d433621..c2e7a7ff0080 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -780,3 +780,24 @@ int afs_fsync(struct file *file, struct dentry *dentry, int datasync)
_leave(" = %d", ret);
return ret;
}
+
+/*
+ * notification that a previously read-only page is about to become writable
+ * - if it returns an error, the caller will deliver a bus error signal
+ */
+int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+ struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host);
+
+ _enter("{{%x:%u}},{%lx}",
+ vnode->fid.vid, vnode->fid.vnode, page->index);
+
+ /* wait for the page to be written to the cache before we allow it to
+ * be modified */
+#ifdef CONFIG_AFS_FSCACHE
+ fscache_wait_on_page_write(vnode->cache, page);
+#endif
+
+ _leave(" = 0");
+ return 0;
+}
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig
new file mode 100644
index 000000000000..80e9c6167f0b
--- /dev/null
+++ b/fs/cachefiles/Kconfig
@@ -0,0 +1,39 @@
+
+config CACHEFILES
+ tristate "Filesystem caching on files"
+ depends on FSCACHE && BLOCK
+ help
+ This permits use of a mounted filesystem as a cache for other
+ filesystems - primarily networking filesystems - thus allowing fast
+ local disk to enhance the speed of slower devices.
+
+ See Documentation/filesystems/caching/cachefiles.txt for more
+ information.
+
+config CACHEFILES_DEBUG
+ bool "Debug CacheFiles"
+ depends on CACHEFILES
+ help
+ This permits debugging to be dynamically enabled in the filesystem
+ caching on files module. If this is set, the debugging output may be
+ enabled by setting bits in /sys/modules/cachefiles/parameter/debug or
+ by including a debugging specifier in /etc/cachefilesd.conf.
+
+config CACHEFILES_HISTOGRAM
+ bool "Gather latency information on CacheFiles"
+ depends on CACHEFILES && PROC_FS
+ help
+
+ This option causes latency information to be gathered on CacheFiles
+ operation and exported through file:
+
+ /proc/fs/cachefiles/histogram
+
+ The generation of this histogram adds a certain amount of overhead to
+ execution as there are a number of points at which data is gathered,
+ and on a multi-CPU system these may be on cachelines that keep
+ bouncing between CPUs. On the other hand, the histogram may be
+ useful for debugging purposes. Saying 'N' here is recommended.
+
+ See Documentation/filesystems/caching/cachefiles.txt for more
+ information.
diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile
new file mode 100644
index 000000000000..32cbab0ffce3
--- /dev/null
+++ b/fs/cachefiles/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for caching in a mounted filesystem
+#
+
+cachefiles-y := \
+ bind.o \
+ daemon.o \
+ interface.o \
+ key.o \
+ main.o \
+ namei.o \
+ rdwr.o \
+ security.o \
+ xattr.o
+
+cachefiles-$(CONFIG_CACHEFILES_HISTOGRAM) += proc.o
+
+obj-$(CONFIG_CACHEFILES) := cachefiles.o
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
new file mode 100644
index 000000000000..3797e0077b35
--- /dev/null
+++ b/fs/cachefiles/bind.c
@@ -0,0 +1,286 @@
+/* Bind and unbind a cache from the filesystem backing it
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/statfs.h>
+#include <linux/ctype.h>
+#include "internal.h"
+
+static int cachefiles_daemon_add_cache(struct cachefiles_cache *caches);
+
+/*
+ * bind a directory as a cache
+ */
+int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
+{
+ _enter("{%u,%u,%u,%u,%u,%u},%s",
+ cache->frun_percent,
+ cache->fcull_percent,
+ cache->fstop_percent,
+ cache->brun_percent,
+ cache->bcull_percent,
+ cache->bstop_percent,
+ args);
+
+ /* start by checking things over */
+ ASSERT(cache->fstop_percent >= 0 &&
+ cache->fstop_percent < cache->fcull_percent &&
+ cache->fcull_percent < cache->frun_percent &&
+ cache->frun_percent < 100);
+
+ ASSERT(cache->bstop_percent >= 0 &&
+ cache->bstop_percent < cache->bcull_percent &&
+ cache->bcull_percent < cache->brun_percent &&
+ cache->brun_percent < 100);
+
+ if (*args) {
+ kerror("'bind' command doesn't take an argument");
+ return -EINVAL;
+ }
+
+ if (!cache->rootdirname) {
+ kerror("No cache directory specified");
+ return -EINVAL;
+ }
+
+ /* don't permit already bound caches to be re-bound */
+ if (test_bit(CACHEFILES_READY, &cache->flags)) {
+ kerror("Cache already bound");
+ return -EBUSY;
+ }
+
+ /* make sure we have copies of the tag and dirname strings */
+ if (!cache->tag) {
+ /* the tag string is released by the fops->release()
+ * function, so we don't release it on error here */
+ cache->tag = kstrdup("CacheFiles", GFP_KERNEL);
+ if (!cache->tag)
+ return -ENOMEM;
+ }
+
+ /* add the cache */
+ return cachefiles_daemon_add_cache(cache);
+}
+
+/*
+ * add a cache
+ */
+static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
+{
+ struct cachefiles_object *fsdef;
+ struct nameidata nd;
+ struct kstatfs stats;
+ struct dentry *graveyard, *cachedir, *root;
+ const struct cred *saved_cred;
+ int ret;
+
+ _enter("");
+
+ /* we want to work under the module's security ID */
+ ret = cachefiles_get_security_ID(cache);
+ if (ret < 0)
+ return ret;
+
+ cachefiles_begin_secure(cache, &saved_cred);
+
+ /* allocate the root index object */
+ ret = -ENOMEM;
+
+ fsdef = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL);
+ if (!fsdef)
+ goto error_root_object;
+
+ ASSERTCMP(fsdef->backer, ==, NULL);
+
+ atomic_set(&fsdef->usage, 1);
+ fsdef->type = FSCACHE_COOKIE_TYPE_INDEX;
+
+ _debug("- fsdef %p", fsdef);
+
+ /* look up the directory at the root of the cache */
+ memset(&nd, 0, sizeof(nd));
+
+ ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd);
+ if (ret < 0)
+ goto error_open_root;
+
+ cache->mnt = mntget(nd.path.mnt);
+ root = dget(nd.path.dentry);
+ path_put(&nd.path);
+
+ /* check parameters */
+ ret = -EOPNOTSUPP;
+ if (!root->d_inode ||
+ !root->d_inode->i_op ||
+ !root->d_inode->i_op->lookup ||
+ !root->d_inode->i_op->mkdir ||
+ !root->d_inode->i_op->setxattr ||
+ !root->d_inode->i_op->getxattr ||
+ !root->d_sb ||
+ !root->d_sb->s_op ||
+ !root->d_sb->s_op->statfs ||
+ !root->d_sb->s_op->sync_fs)
+ goto error_unsupported;
+
+ ret = -EROFS;
+ if (root->d_sb->s_flags & MS_RDONLY)
+ goto error_unsupported;
+
+ /* determine the security of the on-disk cache as this governs
+ * security ID of files we create */
+ ret = cachefiles_determine_cache_security(cache, root, &saved_cred);
+ if (ret < 0)
+ goto error_unsupported;
+
+ /* get the cache size and blocksize */
+ ret = vfs_statfs(root, &stats);
+ if (ret < 0)
+ goto error_unsupported;
+
+ ret = -ERANGE;
+ if (stats.f_bsize <= 0)
+ goto error_unsupported;
+
+ ret = -EOPNOTSUPP;
+ if (stats.f_bsize > PAGE_SIZE)
+ goto error_unsupported;
+
+ cache->bsize = stats.f_bsize;
+ cache->bshift = 0;
+ if (stats.f_bsize < PAGE_SIZE)
+ cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize);
+
+ _debug("blksize %u (shift %u)",
+ cache->bsize, cache->bshift);
+
+ _debug("size %llu, avail %llu",
+ (unsigned long long) stats.f_blocks,
+ (unsigned long long) stats.f_bavail);
+
+ /* set up caching limits */
+ do_div(stats.f_files, 100);
+ cache->fstop = stats.f_files * cache->fstop_percent;
+ cache->fcull = stats.f_files * cache->fcull_percent;
+ cache->frun = stats.f_files * cache->frun_percent;
+
+ _debug("limits {%llu,%llu,%llu} files",
+ (unsigned long long) cache->frun,
+ (unsigned long long) cache->fcull,
+ (unsigned long long) cache->fstop);
+
+ stats.f_blocks >>= cache->bshift;
+ do_div(stats.f_blocks, 100);
+ cache->bstop = stats.f_blocks * cache->bstop_percent;
+ cache->bcull = stats.f_blocks * cache->bcull_percent;
+ cache->brun = stats.f_blocks * cache->brun_percent;
+
+ _debug("limits {%llu,%llu,%llu} blocks",
+ (unsigned long long) cache->brun,
+ (unsigned long long) cache->bcull,
+ (unsigned long long) cache->bstop);
+
+ /* get the cache directory and check its type */
+ cachedir = cachefiles_get_directory(cache, root, "cache");
+ if (IS_ERR(cachedir)) {
+ ret = PTR_ERR(cachedir);
+ goto error_unsupported;
+ }
+
+ fsdef->dentry = cachedir;
+ fsdef->fscache.cookie = NULL;
+
+ ret = cachefiles_check_object_type(fsdef);
+ if (ret < 0)
+ goto error_unsupported;
+
+ /* get the graveyard directory */
+ graveyard = cachefiles_get_directory(cache, root, "graveyard");
+ if (IS_ERR(graveyard)) {
+ ret = PTR_ERR(graveyard);
+ goto error_unsupported;
+ }
+
+ cache->graveyard = graveyard;
+
+ /* publish the cache */
+ fscache_init_cache(&cache->cache,
+ &cachefiles_cache_ops,
+ "%s",
+ fsdef->dentry->d_sb->s_id);
+
+ fscache_object_init(&fsdef->fscache, NULL, &cache->cache);
+
+ ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag);
+ if (ret < 0)
+ goto error_add_cache;
+
+ /* done */
+ set_bit(CACHEFILES_READY, &cache->flags);
+ dput(root);
+
+ printk(KERN_INFO "CacheFiles:"
+ " File cache on %s registered\n",
+ cache->cache.identifier);
+
+ /* check how much space the cache has */
+ cachefiles_has_space(cache, 0, 0);
+ cachefiles_end_secure(cache, saved_cred);
+ return 0;
+
+error_add_cache:
+ dput(cache->graveyard);
+ cache->graveyard = NULL;
+error_unsupported:
+ mntput(cache->mnt);
+ cache->mnt = NULL;
+ dput(fsdef->dentry);
+ fsdef->dentry = NULL;
+ dput(root);
+error_open_root:
+ kmem_cache_free(cachefiles_object_jar, fsdef);
+error_root_object:
+ cachefiles_end_secure(cache, saved_cred);
+ kerror("Failed to register: %d", ret);
+ return ret;
+}
+
+/*
+ * unbind a cache on fd release
+ */
+void cachefiles_daemon_unbind(struct cachefiles_cache *cache)
+{
+ _enter("");
+
+ if (test_bit(CACHEFILES_READY, &cache->flags)) {
+ printk(KERN_INFO "CacheFiles:"
+ " File cache on %s unregistering\n",
+ cache->cache.identifier);
+
+ fscache_withdraw_cache(&cache->cache);
+ }
+
+ dput(cache->graveyard);
+ mntput(cache->mnt);
+
+ kfree(cache->rootdirname);
+ kfree(cache->secctx);
+ kfree(cache->tag);
+
+ _leave("");
+}
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
new file mode 100644
index 000000000000..4618516dd994
--- /dev/null
+++ b/fs/cachefiles/daemon.c
@@ -0,0 +1,755 @@
+/* Daemon interface
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <linux/mount.h>
+#include <linux/statfs.h>
+#include <linux/ctype.h>
+#include <linux/fs_struct.h>
+#include "internal.h"
+
+static int cachefiles_daemon_open(struct inode *, struct file *);
+static int cachefiles_daemon_release(struct inode *, struct file *);
+static ssize_t cachefiles_daemon_read(struct file *, char __user *, size_t,
+ loff_t *);
+static ssize_t cachefiles_daemon_write(struct file *, const char __user *,
+ size_t, loff_t *);
+static unsigned int cachefiles_daemon_poll(struct file *,
+ struct poll_table_struct *);
+static int cachefiles_daemon_frun(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_fcull(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_fstop(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_brun(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_bcull(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_bstop(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_cull(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_debug(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_dir(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_inuse(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_secctx(struct cachefiles_cache *, char *);
+static int cachefiles_daemon_tag(struct cachefiles_cache *, char *);
+
+static unsigned long cachefiles_open;
+
+const struct file_operations cachefiles_daemon_fops = {
+ .owner = THIS_MODULE,
+ .open = cachefiles_daemon_open,
+ .release = cachefiles_daemon_release,
+ .read = cachefiles_daemon_read,
+ .write = cachefiles_daemon_write,
+ .poll = cachefiles_daemon_poll,
+};
+
+struct cachefiles_daemon_cmd {
+ char name[8];
+ int (*handler)(struct cachefiles_cache *cache, char *args);
+};
+
+static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = {
+ { "bind", cachefiles_daemon_bind },
+ { "brun", cachefiles_daemon_brun },
+ { "bcull", cachefiles_daemon_bcull },
+ { "bstop", cachefiles_daemon_bstop },
+ { "cull", cachefiles_daemon_cull },
+ { "debug", cachefiles_daemon_debug },
+ { "dir", cachefiles_daemon_dir },
+ { "frun", cachefiles_daemon_frun },
+ { "fcull", cachefiles_daemon_fcull },
+ { "fstop", cachefiles_daemon_fstop },
+ { "inuse", cachefiles_daemon_inuse },
+ { "secctx", cachefiles_daemon_secctx },
+ { "tag", cachefiles_daemon_tag },
+ { "", NULL }
+};
+
+
+/*
+ * do various checks
+ */
+static int cachefiles_daemon_open(struct inode *inode, struct file *file)
+{
+ struct cachefiles_cache *cache;
+
+ _enter("");
+
+ /* only the superuser may do this */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* the cachefiles device may only be open once at a time */
+ if (xchg(&cachefiles_open, 1) == 1)
+ return -EBUSY;
+
+ /* allocate a cache record */
+ cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL);
+ if (!cache) {
+ cachefiles_open = 0;
+ return -ENOMEM;
+ }
+
+ mutex_init(&cache->daemon_mutex);
+ cache->active_nodes = RB_ROOT;
+ rwlock_init(&cache->active_lock);
+ init_waitqueue_head(&cache->daemon_pollwq);
+
+ /* set default caching limits
+ * - limit at 1% free space and/or free files
+ * - cull below 5% free space and/or free files
+ * - cease culling above 7% free space and/or free files
+ */
+ cache->frun_percent = 7;
+ cache->fcull_percent = 5;
+ cache->fstop_percent = 1;
+ cache->brun_percent = 7;
+ cache->bcull_percent = 5;
+ cache->bstop_percent = 1;
+
+ file->private_data = cache;
+ cache->cachefilesd = file;
+ return 0;
+}
+
+/*
+ * release a cache
+ */
+static int cachefiles_daemon_release(struct inode *inode, struct file *file)
+{
+ struct cachefiles_cache *cache = file->private_data;
+
+ _enter("");
+
+ ASSERT(cache);
+
+ set_bit(CACHEFILES_DEAD, &cache->flags);
+
+ cachefiles_daemon_unbind(cache);
+
+ ASSERT(!cache->active_nodes.rb_node);
+
+ /* clean up the control file interface */
+ cache->cachefilesd = NULL;
+ file->private_data = NULL;
+ cachefiles_open = 0;
+
+ kfree(cache);
+
+ _leave("");
+ return 0;
+}
+
+/*
+ * read the cache state
+ */
+static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer,
+ size_t buflen, loff_t *pos)
+{
+ struct cachefiles_cache *cache = file->private_data;
+ char buffer[256];
+ int n;
+
+ //_enter(",,%zu,", buflen);
+
+ if (!test_bit(CACHEFILES_READY, &cache->flags))
+ return 0;
+
+ /* check how much space the cache has */
+ cachefiles_has_space(cache, 0, 0);
+
+ /* summarise */
+ clear_bit(CACHEFILES_STATE_CHANGED, &cache->flags);
+
+ n = snprintf(buffer, sizeof(buffer),
+ "cull=%c"
+ " frun=%llx"
+ " fcull=%llx"
+ " fstop=%llx"
+ " brun=%llx"
+ " bcull=%llx"
+ " bstop=%llx",
+ test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0',
+ (unsigned long long) cache->frun,
+ (unsigned long long) cache->fcull,
+ (unsigned long long) cache->fstop,
+ (unsigned long long) cache->brun,
+ (unsigned long long) cache->bcull,
+ (unsigned long long) cache->bstop
+ );
+
+ if (n > buflen)
+ return -EMSGSIZE;
+
+ if (copy_to_user(_buffer, buffer, n) != 0)
+ return -EFAULT;
+
+ return n;
+}
+
+/*
+ * command the cache
+ */
+static ssize_t cachefiles_daemon_write(struct file *file,
+ const char __user *_data,
+ size_t datalen,
+ loff_t *pos)
+{
+ const struct cachefiles_daemon_cmd *cmd;
+ struct cachefiles_cache *cache = file->private_data;
+ ssize_t ret;
+ char *data, *args, *cp;
+
+ //_enter(",,%zu,", datalen);
+
+ ASSERT(cache);
+
+ if (test_bit(CACHEFILES_DEAD, &cache->flags))
+ return -EIO;
+
+ if (datalen < 0 || datalen > PAGE_SIZE - 1)
+ return -EOPNOTSUPP;
+
+ /* drag the command string into the kernel so we can parse it */
+ data = kmalloc(datalen + 1, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ ret = -EFAULT;
+ if (copy_from_user(data, _data, datalen) != 0)
+ goto error;
+
+ data[datalen] = '\0';
+
+ ret = -EINVAL;
+ if (memchr(data, '\0', datalen))
+ goto error;
+
+ /* strip any newline */
+ cp = memchr(data, '\n', datalen);
+ if (cp) {
+ if (cp == data)
+ goto error;
+
+ *cp = '\0';
+ }
+
+ /* parse the command */
+ ret = -EOPNOTSUPP;
+
+ for (args = data; *args; args++)
+ if (isspace(*args))
+ break;
+ if (*args) {
+ if (args == data)
+ goto error;
+ *args = '\0';
+ for (args++; isspace(*args); args++)
+ continue;
+ }
+
+ /* run the appropriate command handler */
+ for (cmd = cachefiles_daemon_cmds; cmd->name[0]; cmd++)
+ if (strcmp(cmd->name, data) == 0)
+ goto found_command;
+
+error:
+ kfree(data);
+ //_leave(" = %zd", ret);
+ return ret;
+
+found_command:
+ mutex_lock(&cache->daemon_mutex);
+
+ ret = -EIO;
+ if (!test_bit(CACHEFILES_DEAD, &cache->flags))
+ ret = cmd->handler(cache, args);
+
+ mutex_unlock(&cache->daemon_mutex);
+
+ if (ret == 0)
+ ret = datalen;
+ goto error;
+}
+
+/*
+ * poll for culling state
+ * - use POLLOUT to indicate culling state
+ */
+static unsigned int cachefiles_daemon_poll(struct file *file,
+ struct poll_table_struct *poll)
+{
+ struct cachefiles_cache *cache = file->private_data;
+ unsigned int mask;
+
+ poll_wait(file, &cache->daemon_pollwq, poll);
+ mask = 0;
+
+ if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))
+ mask |= POLLIN;
+
+ if (test_bit(CACHEFILES_CULLING, &cache->flags))
+ mask |= POLLOUT;
+
+ return mask;
+}
+
+/*
+ * give a range error for cache space constraints
+ * - can be tail-called
+ */
+static int cachefiles_daemon_range_error(struct cachefiles_cache *cache,
+ char *args)
+{
+ kerror("Free space limits must be in range"
+ " 0%%<=stop<cull<run<100%%");
+
+ return -EINVAL;
+}
+
+/*
+ * set the percentage of files at which to stop culling
+ * - command: "frun <N>%"
+ */
+static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long frun;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ frun = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (frun <= cache->fcull_percent || frun >= 100)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->frun_percent = frun;
+ return 0;
+}
+
+/*
+ * set the percentage of files at which to start culling
+ * - command: "fcull <N>%"
+ */
+static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long fcull;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ fcull = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (fcull <= cache->fstop_percent || fcull >= cache->frun_percent)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->fcull_percent = fcull;
+ return 0;
+}
+
+/*
+ * set the percentage of files at which to stop allocating
+ * - command: "fstop <N>%"
+ */
+static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long fstop;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ fstop = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (fstop < 0 || fstop >= cache->fcull_percent)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->fstop_percent = fstop;
+ return 0;
+}
+
+/*
+ * set the percentage of blocks at which to stop culling
+ * - command: "brun <N>%"
+ */
+static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long brun;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ brun = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (brun <= cache->bcull_percent || brun >= 100)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->brun_percent = brun;
+ return 0;
+}
+
+/*
+ * set the percentage of blocks at which to start culling
+ * - command: "bcull <N>%"
+ */
+static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long bcull;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ bcull = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->bcull_percent = bcull;
+ return 0;
+}
+
+/*
+ * set the percentage of blocks at which to stop allocating
+ * - command: "bstop <N>%"
+ */
+static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long bstop;
+
+ _enter(",%s", args);
+
+ if (!*args)
+ return -EINVAL;
+
+ bstop = simple_strtoul(args, &args, 10);
+ if (args[0] != '%' || args[1] != '\0')
+ return -EINVAL;
+
+ if (bstop < 0 || bstop >= cache->bcull_percent)
+ return cachefiles_daemon_range_error(cache, args);
+
+ cache->bstop_percent = bstop;
+ return 0;
+}
+
+/*
+ * set the cache directory
+ * - command: "dir <name>"
+ */
+static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args)
+{
+ char *dir;
+
+ _enter(",%s", args);
+
+ if (!*args) {
+ kerror("Empty directory specified");
+ return -EINVAL;
+ }
+
+ if (cache->rootdirname) {
+ kerror("Second cache directory specified");
+ return -EEXIST;
+ }
+
+ dir = kstrdup(args, GFP_KERNEL);
+ if (!dir)
+ return -ENOMEM;
+
+ cache->rootdirname = dir;
+ return 0;
+}
+
+/*
+ * set the cache security context
+ * - command: "secctx <ctx>"
+ */
+static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args)
+{
+ char *secctx;
+
+ _enter(",%s", args);
+
+ if (!*args) {
+ kerror("Empty security context specified");
+ return -EINVAL;
+ }
+
+ if (cache->secctx) {
+ kerror("Second security context specified");
+ return -EINVAL;
+ }
+
+ secctx = kstrdup(args, GFP_KERNEL);
+ if (!secctx)
+ return -ENOMEM;
+
+ cache->secctx = secctx;
+ return 0;
+}
+
+/*
+ * set the cache tag
+ * - command: "tag <name>"
+ */
+static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args)
+{
+ char *tag;
+
+ _enter(",%s", args);
+
+ if (!*args) {
+ kerror("Empty tag specified");
+ return -EINVAL;
+ }
+
+ if (cache->tag)
+ return -EEXIST;
+
+ tag = kstrdup(args, GFP_KERNEL);
+ if (!tag)
+ return -ENOMEM;
+
+ cache->tag = tag;
+ return 0;
+}
+
+/*
+ * request a node in the cache be culled from the current working directory
+ * - command: "cull <name>"
+ */
+static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args)
+{
+ struct fs_struct *fs;
+ struct dentry *dir;
+ const struct cred *saved_cred;
+ int ret;
+
+ _enter(",%s", args);
+
+ if (strchr(args, '/'))
+ goto inval;
+
+ if (!test_bit(CACHEFILES_READY, &cache->flags)) {
+ kerror("cull applied to unready cache");
+ return -EIO;
+ }
+
+ if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
+ kerror("cull applied to dead cache");
+ return -EIO;
+ }
+
+ /* extract the directory dentry from the cwd */
+ fs = current->fs;
+ read_lock(&fs->lock);
+ dir = dget(fs->pwd.dentry);
+ read_unlock(&fs->lock);
+
+ if (!S_ISDIR(dir->d_inode->i_mode))
+ goto notdir;
+
+ cachefiles_begin_secure(cache, &saved_cred);
+ ret = cachefiles_cull(cache, dir, args);
+ cachefiles_end_secure(cache, saved_cred);
+
+ dput(dir);
+ _leave(" = %d", ret);
+ return ret;
+
+notdir:
+ dput(dir);
+ kerror("cull command requires dirfd to be a directory");
+ return -ENOTDIR;
+
+inval:
+ kerror("cull command requires dirfd and filename");
+ return -EINVAL;
+}
+
+/*
+ * set debugging mode
+ * - command: "debug <mask>"
+ */
+static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args)
+{
+ unsigned long mask;
+
+ _enter(",%s", args);
+
+ mask = simple_strtoul(args, &args, 0);
+ if (args[0] != '\0')
+ goto inval;
+
+ cachefiles_debug = mask;
+ _leave(" = 0");
+ return 0;
+
+inval:
+ kerror("debug command requires mask");
+ return -EINVAL;
+}
+
+/*
+ * find out whether an object in the current working directory is in use or not
+ * - command: "inuse <name>"
+ */
+static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args)
+{
+ struct fs_struct *fs;
+ struct dentry *dir;
+ const struct cred *saved_cred;
+ int ret;
+
+ //_enter(",%s", args);
+
+ if (strchr(args, '/'))
+ goto inval;
+
+ if (!test_bit(CACHEFILES_READY, &cache->flags)) {
+ kerror("inuse applied to unready cache");
+ return -EIO;
+ }
+
+ if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
+ kerror("inuse applied to dead cache");
+ return -EIO;
+ }
+
+ /* extract the directory dentry from the cwd */
+ fs = current->fs;
+ read_lock(&fs->lock);
+ dir = dget(fs->pwd.dentry);
+ read_unlock(&fs->lock);
+
+ if (!S_ISDIR(dir->d_inode->i_mode))
+ goto notdir;
+
+ cachefiles_begin_secure(cache, &saved_cred);
+ ret = cachefiles_check_in_use(cache, dir, args);
+ cachefiles_end_secure(cache, saved_cred);
+
+ dput(dir);
+ //_leave(" = %d", ret);
+ return ret;
+
+notdir:
+ dput(dir);
+ kerror("inuse command requires dirfd to be a directory");
+ return -ENOTDIR;
+
+inval:
+ kerror("inuse command requires dirfd and filename");
+ return -EINVAL;
+}
+
+/*
+ * see if we have space for a number of pages and/or a number of files in the
+ * cache
+ */
+int cachefiles_has_space(struct cachefiles_cache *cache,
+ unsigned fnr, unsigned bnr)
+{
+ struct kstatfs stats;
+ int ret;
+
+ //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u",
+ // (unsigned long long) cache->frun,
+ // (unsigned long long) cache->fcull,
+ // (unsigned long long) cache->fstop,
+ // (unsigned long long) cache->brun,
+ // (unsigned long long) cache->bcull,
+ // (unsigned long long) cache->bstop,
+ // fnr, bnr);
+
+ /* find out how many pages of blockdev are available */
+ memset(&stats, 0, sizeof(stats));
+
+ ret = vfs_statfs(cache->mnt->mnt_root, &stats);
+ if (ret < 0) {
+ if (ret == -EIO)
+ cachefiles_io_error(cache, "statfs failed");
+ _leave(" = %d", ret);
+ return ret;
+ }
+
+ stats.f_bavail >>= cache->bshift;
+
+ //_debug("avail %llu,%llu",
+ // (unsigned long long) stats.f_ffree,
+ // (unsigned long long) stats.f_bavail);
+
+ /* see if there is sufficient space */
+ if (stats.f_ffree > fnr)
+ stats.f_ffree -= fnr;
+ else
+ stats.f_ffree = 0;
+
+ if (stats.f_bavail > bnr)
+ stats.f_bavail -= bnr;
+ else
+ stats.f_bavail = 0;
+
+ ret = -ENOBUFS;
+ if (stats.f_ffree < cache->fstop ||
+ stats.f_bavail < cache->bstop)
+ goto begin_cull;
+
+ ret = 0;
+ if (stats.f_ffree < cache->fcull ||
+ stats.f_bavail < cache->bcull)
+ goto begin_cull;
+
+ if (test_bit(CACHEFILES_CULLING, &cache->flags) &&
+ stats.f_ffree >= cache->frun &&
+ stats.f_bavail >= cache->brun &&
+ test_and_clear_bit(CACHEFILES_CULLING, &cache->flags)
+ ) {
+ _debug("cease culling");
+ cachefiles_state_changed(cache);
+ }
+
+ //_leave(" = 0");
+ return 0;
+
+begin_cull:
+ if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) {
+ _debug("### CULL CACHE ###");
+ cachefiles_state_changed(cache);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
new file mode 100644
index 000000000000..1e962348d111
--- /dev/null
+++ b/fs/cachefiles/interface.c
@@ -0,0 +1,449 @@
+/* FS-Cache interface to CacheFiles
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/mount.h>
+#include <linux/buffer_head.h>
+#include "internal.h"
+
+#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
+
+struct cachefiles_lookup_data {
+ struct cachefiles_xattr *auxdata; /* auxiliary data */
+ char *key; /* key path */
+};
+
+static int cachefiles_attr_changed(struct fscache_object *_object);
+
+/*
+ * allocate an object record for a cookie lookup and prepare the lookup data
+ */
+static struct fscache_object *cachefiles_alloc_object(
+ struct fscache_cache *_cache,
+ struct fscache_cookie *cookie)
+{
+ struct cachefiles_lookup_data *lookup_data;
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ struct cachefiles_xattr *auxdata;
+ unsigned keylen, auxlen;
+ void *buffer;
+ char *key;
+
+ cache = container_of(_cache, struct cachefiles_cache, cache);
+
+ _enter("{%s},%p,", cache->cache.identifier, cookie);
+
+ lookup_data = kmalloc(sizeof(*lookup_data), GFP_KERNEL);
+ if (!lookup_data)
+ goto nomem_lookup_data;
+
+ /* create a new object record and a temporary leaf image */
+ object = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL);
+ if (!object)
+ goto nomem_object;
+
+ ASSERTCMP(object->backer, ==, NULL);
+
+ BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags));
+ atomic_set(&object->usage, 1);
+
+ fscache_object_init(&object->fscache, cookie, &cache->cache);
+
+ object->type = cookie->def->type;
+
+ /* get hold of the raw key
+ * - stick the length on the front and leave space on the back for the
+ * encoder
+ */
+ buffer = kmalloc((2 + 512) + 3, GFP_KERNEL);
+ if (!buffer)
+ goto nomem_buffer;
+
+ keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512);
+ ASSERTCMP(keylen, <, 512);
+
+ *(uint16_t *)buffer = keylen;
+ ((char *)buffer)[keylen + 2] = 0;
+ ((char *)buffer)[keylen + 3] = 0;
+ ((char *)buffer)[keylen + 4] = 0;
+
+ /* turn the raw key into something that can work with as a filename */
+ key = cachefiles_cook_key(buffer, keylen + 2, object->type);
+ if (!key)
+ goto nomem_key;
+
+ /* get hold of the auxiliary data and prepend the object type */
+ auxdata = buffer;
+ auxlen = 0;
+ if (cookie->def->get_aux) {
+ auxlen = cookie->def->get_aux(cookie->netfs_data,
+ auxdata->data, 511);
+ ASSERTCMP(auxlen, <, 511);
+ }
+
+ auxdata->len = auxlen + 1;
+ auxdata->type = cookie->def->type;
+
+ lookup_data->auxdata = auxdata;
+ lookup_data->key = key;
+ object->lookup_data = lookup_data;
+
+ _leave(" = %p [%p]", &object->fscache, lookup_data);
+ return &object->fscache;
+
+nomem_key:
+ kfree(buffer);
+nomem_buffer:
+ BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags));
+ kmem_cache_free(cachefiles_object_jar, object);
+ fscache_object_destroyed(&cache->cache);
+nomem_object:
+ kfree(lookup_data);
+nomem_lookup_data:
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * attempt to look up the nominated node in this cache
+ */
+static void cachefiles_lookup_object(struct fscache_object *_object)
+{
+ struct cachefiles_lookup_data *lookup_data;
+ struct cachefiles_object *parent, *object;
+ struct cachefiles_cache *cache;
+ const struct cred *saved_cred;
+ int ret;
+
+ _enter("{OBJ%x}", _object->debug_id);
+
+ cache = container_of(_object->cache, struct cachefiles_cache, cache);
+ parent = container_of(_object->parent,
+ struct cachefiles_object, fscache);
+ object = container_of(_object, struct cachefiles_object, fscache);
+ lookup_data = object->lookup_data;
+
+ ASSERTCMP(lookup_data, !=, NULL);
+
+ /* look up the key, creating any missing bits */
+ cachefiles_begin_secure(cache, &saved_cred);
+ ret = cachefiles_walk_to_object(parent, object,
+ lookup_data->key,
+ lookup_data->auxdata);
+ cachefiles_end_secure(cache, saved_cred);
+
+ /* polish off by setting the attributes of non-index files */
+ if (ret == 0 &&
+ object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
+ cachefiles_attr_changed(&object->fscache);
+
+ if (ret < 0) {
+ printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n",
+ ret);
+ fscache_object_lookup_error(&object->fscache);
+ }
+
+ _leave(" [%d]", ret);
+}
+
+/*
+ * indication of lookup completion
+ */
+static void cachefiles_lookup_complete(struct fscache_object *_object)
+{
+ struct cachefiles_object *object;
+
+ object = container_of(_object, struct cachefiles_object, fscache);
+
+ _enter("{OBJ%x,%p}", object->fscache.debug_id, object->lookup_data);
+
+ if (object->lookup_data) {
+ kfree(object->lookup_data->key);
+ kfree(object->lookup_data->auxdata);
+ kfree(object->lookup_data);
+ object->lookup_data = NULL;
+ }
+}
+
+/*
+ * increment the usage count on an inode object (may fail if unmounting)
+ */
+static
+struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
+{
+ struct cachefiles_object *object =
+ container_of(_object, struct cachefiles_object, fscache);
+
+ _enter("{OBJ%x,%d}", _object->debug_id, atomic_read(&object->usage));
+
+#ifdef CACHEFILES_DEBUG_SLAB
+ ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000);
+#endif
+
+ atomic_inc(&object->usage);
+ return &object->fscache;
+}
+
+/*
+ * update the auxilliary data for an object object on disk
+ */
+static void cachefiles_update_object(struct fscache_object *_object)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_xattr *auxdata;
+ struct cachefiles_cache *cache;
+ struct fscache_cookie *cookie;
+ const struct cred *saved_cred;
+ unsigned auxlen;
+
+ _enter("{OBJ%x}", _object->debug_id);
+
+ object = container_of(_object, struct cachefiles_object, fscache);
+ cache = container_of(object->fscache.cache, struct cachefiles_cache,
+ cache);
+ cookie = object->fscache.cookie;
+
+ if (!cookie->def->get_aux) {
+ _leave(" [no aux]");
+ return;
+ }
+
+ auxdata = kmalloc(2 + 512 + 3, GFP_KERNEL);
+ if (!auxdata) {
+ _leave(" [nomem]");
+ return;
+ }
+
+ auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511);
+ ASSERTCMP(auxlen, <, 511);
+
+ auxdata->len = auxlen + 1;
+ auxdata->type = cookie->def->type;
+
+ cachefiles_begin_secure(cache, &saved_cred);
+ cachefiles_update_object_xattr(object, auxdata);
+ cachefiles_end_secure(cache, saved_cred);
+ kfree(auxdata);
+ _leave("");
+}
+
+/*
+ * discard the resources pinned by an object and effect retirement if
+ * requested
+ */
+static void cachefiles_drop_object(struct fscache_object *_object)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ const struct cred *saved_cred;
+
+ ASSERT(_object);
+
+ object = container_of(_object, struct cachefiles_object, fscache);
+
+ _enter("{OBJ%x,%d}",
+ object->fscache.debug_id, atomic_read(&object->usage));
+
+ cache = container_of(object->fscache.cache,
+ struct cachefiles_cache, cache);
+
+#ifdef CACHEFILES_DEBUG_SLAB
+ ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000);
+#endif
+
+ /* delete retired objects */
+ if (object->fscache.state == FSCACHE_OBJECT_RECYCLING &&
+ _object != cache->cache.fsdef
+ ) {
+ _debug("- retire object OBJ%x", object->fscache.debug_id);
+ cachefiles_begin_secure(cache, &saved_cred);
+ cachefiles_delete_object(cache, object);
+ cachefiles_end_secure(cache, saved_cred);
+ }
+
+ /* close the filesystem stuff attached to the object */
+ if (object->backer != object->dentry)
+ dput(object->backer);
+ object->backer = NULL;
+
+ /* note that the object is now inactive */
+ if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) {
+ write_lock(&cache->active_lock);
+ if (!test_and_clear_bit(CACHEFILES_OBJECT_ACTIVE,
+ &object->flags))
+ BUG();
+ rb_erase(&object->active_node, &cache->active_nodes);
+ wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE);
+ write_unlock(&cache->active_lock);
+ }
+
+ dput(object->dentry);
+ object->dentry = NULL;
+
+ _leave("");
+}
+
+/*
+ * dispose of a reference to an object
+ */
+static void cachefiles_put_object(struct fscache_object *_object)
+{
+ struct cachefiles_object *object;
+ struct fscache_cache *cache;
+
+ ASSERT(_object);
+
+ object = container_of(_object, struct cachefiles_object, fscache);
+
+ _enter("{OBJ%x,%d}",
+ object->fscache.debug_id, atomic_read(&object->usage));
+
+#ifdef CACHEFILES_DEBUG_SLAB
+ ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000);
+#endif
+
+ ASSERTIFCMP(object->fscache.parent,
+ object->fscache.parent->n_children, >, 0);
+
+ if (atomic_dec_and_test(&object->usage)) {
+ _debug("- kill object OBJ%x", object->fscache.debug_id);
+
+ ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags));
+ ASSERTCMP(object->fscache.parent, ==, NULL);
+ ASSERTCMP(object->backer, ==, NULL);
+ ASSERTCMP(object->dentry, ==, NULL);
+ ASSERTCMP(object->fscache.n_ops, ==, 0);
+ ASSERTCMP(object->fscache.n_children, ==, 0);
+
+ if (object->lookup_data) {
+ kfree(object->lookup_data->key);
+ kfree(object->lookup_data->auxdata);
+ kfree(object->lookup_data);
+ object->lookup_data = NULL;
+ }
+
+ cache = object->fscache.cache;
+ kmem_cache_free(cachefiles_object_jar, object);
+ fscache_object_destroyed(cache);
+ }
+
+ _leave("");
+}
+
+/*
+ * sync a cache
+ */
+static void cachefiles_sync_cache(struct fscache_cache *_cache)
+{
+ struct cachefiles_cache *cache;
+ const struct cred *saved_cred;
+ int ret;
+
+ _enter("%p", _cache);
+
+ cache = container_of(_cache, struct cachefiles_cache, cache);
+
+ /* make sure all pages pinned by operations on behalf of the netfs are
+ * written to disc */
+ cachefiles_begin_secure(cache, &saved_cred);
+ ret = fsync_super(cache->mnt->mnt_sb);
+ cachefiles_end_secure(cache, saved_cred);
+
+ if (ret == -EIO)
+ cachefiles_io_error(cache,
+ "Attempt to sync backing fs superblock"
+ " returned error %d",
+ ret);
+}
+
+/*
+ * notification the attributes on an object have changed
+ * - called with reads/writes excluded by FS-Cache
+ */
+static int cachefiles_attr_changed(struct fscache_object *_object)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ const struct cred *saved_cred;
+ struct iattr newattrs;
+ uint64_t ni_size;
+ loff_t oi_size;
+ int ret;
+
+ _object->cookie->def->get_attr(_object->cookie->netfs_data, &ni_size);
+
+ _enter("{OBJ%x},[%llu]",
+ _object->debug_id, (unsigned long long) ni_size);
+
+ object = container_of(_object, struct cachefiles_object, fscache);
+ cache = container_of(object->fscache.cache,
+ struct cachefiles_cache, cache);
+
+ if (ni_size == object->i_size)
+ return 0;
+
+ if (!object->backer)
+ return -ENOBUFS;
+
+ ASSERT(S_ISREG(object->backer->d_inode->i_mode));
+
+ fscache_set_store_limit(&object->fscache, ni_size);
+
+ oi_size = i_size_read(object->backer->d_inode);
+ if (oi_size == ni_size)
+ return 0;
+
+ newattrs.ia_size = ni_size;
+ newattrs.ia_valid = ATTR_SIZE;
+
+ cachefiles_begin_secure(cache, &saved_cred);
+ mutex_lock(&object->backer->d_inode->i_mutex);
+ ret = notify_change(object->backer, &newattrs);
+ mutex_unlock(&object->backer->d_inode->i_mutex);
+ cachefiles_end_secure(cache, saved_cred);
+
+ if (ret == -EIO) {
+ fscache_set_store_limit(&object->fscache, 0);
+ cachefiles_io_error_obj(object, "Size set failed");
+ ret = -ENOBUFS;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * dissociate a cache from all the pages it was backing
+ */
+static void cachefiles_dissociate_pages(struct fscache_cache *cache)
+{
+ _enter("");
+}
+
+const struct fscache_cache_ops cachefiles_cache_ops = {
+ .name = "cachefiles",
+ .alloc_object = cachefiles_alloc_object,
+ .lookup_object = cachefiles_lookup_object,
+ .lookup_complete = cachefiles_lookup_complete,
+ .grab_object = cachefiles_grab_object,
+ .update_object = cachefiles_update_object,
+ .drop_object = cachefiles_drop_object,
+ .put_object = cachefiles_put_object,
+ .sync_cache = cachefiles_sync_cache,
+ .attr_changed = cachefiles_attr_changed,
+ .read_or_alloc_page = cachefiles_read_or_alloc_page,
+ .read_or_alloc_pages = cachefiles_read_or_alloc_pages,
+ .allocate_page = cachefiles_allocate_page,
+ .allocate_pages = cachefiles_allocate_pages,
+ .write_page = cachefiles_write_page,
+ .uncache_page = cachefiles_uncache_page,
+ .dissociate_pages = cachefiles_dissociate_pages,
+};
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
new file mode 100644
index 000000000000..19218e1463d6
--- /dev/null
+++ b/fs/cachefiles/internal.h
@@ -0,0 +1,360 @@
+/* General netfs cache on cache files internal defs
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/fscache-cache.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <linux/security.h>
+
+struct cachefiles_cache;
+struct cachefiles_object;
+
+extern unsigned cachefiles_debug;
+#define CACHEFILES_DEBUG_KENTER 1
+#define CACHEFILES_DEBUG_KLEAVE 2
+#define CACHEFILES_DEBUG_KDEBUG 4
+
+/*
+ * node records
+ */
+struct cachefiles_object {
+ struct fscache_object fscache; /* fscache handle */
+ struct cachefiles_lookup_data *lookup_data; /* cached lookup data */
+ struct dentry *dentry; /* the file/dir representing this object */
+ struct dentry *backer; /* backing file */
+ loff_t i_size; /* object size */
+ unsigned long flags;
+#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */
+ atomic_t usage; /* object usage count */
+ uint8_t type; /* object type */
+ uint8_t new; /* T if object new */
+ spinlock_t work_lock;
+ struct rb_node active_node; /* link in active tree (dentry is key) */
+};
+
+extern struct kmem_cache *cachefiles_object_jar;
+
+/*
+ * Cache files cache definition
+ */
+struct cachefiles_cache {
+ struct fscache_cache cache; /* FS-Cache record */
+ struct vfsmount *mnt; /* mountpoint holding the cache */
+ struct dentry *graveyard; /* directory into which dead objects go */
+ struct file *cachefilesd; /* manager daemon handle */
+ const struct cred *cache_cred; /* security override for accessing cache */
+ struct mutex daemon_mutex; /* command serialisation mutex */
+ wait_queue_head_t daemon_pollwq; /* poll waitqueue for daemon */
+ struct rb_root active_nodes; /* active nodes (can't be culled) */
+ rwlock_t active_lock; /* lock for active_nodes */
+ atomic_t gravecounter; /* graveyard uniquifier */
+ unsigned frun_percent; /* when to stop culling (% files) */
+ unsigned fcull_percent; /* when to start culling (% files) */
+ unsigned fstop_percent; /* when to stop allocating (% files) */
+ unsigned brun_percent; /* when to stop culling (% blocks) */
+ unsigned bcull_percent; /* when to start culling (% blocks) */
+ unsigned bstop_percent; /* when to stop allocating (% blocks) */
+ unsigned bsize; /* cache's block size */
+ unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */
+ uint64_t frun; /* when to stop culling */
+ uint64_t fcull; /* when to start culling */
+ uint64_t fstop; /* when to stop allocating */
+ sector_t brun; /* when to stop culling */
+ sector_t bcull; /* when to start culling */
+ sector_t bstop; /* when to stop allocating */
+ unsigned long flags;
+#define CACHEFILES_READY 0 /* T if cache prepared */
+#define CACHEFILES_DEAD 1 /* T if cache dead */
+#define CACHEFILES_CULLING 2 /* T if cull engaged */
+#define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */
+ char *rootdirname; /* name of cache root directory */
+ char *secctx; /* LSM security context */
+ char *tag; /* cache binding tag */
+};
+
+/*
+ * backing file read tracking
+ */
+struct cachefiles_one_read {
+ wait_queue_t monitor; /* link into monitored waitqueue */
+ struct page *back_page; /* backing file page we're waiting for */
+ struct page *netfs_page; /* netfs page we're going to fill */
+ struct fscache_retrieval *op; /* retrieval op covering this */
+ struct list_head op_link; /* link in op's todo list */
+};
+
+/*
+ * backing file write tracking
+ */
+struct cachefiles_one_write {
+ struct page *netfs_page; /* netfs page to copy */
+ struct cachefiles_object *object;
+ struct list_head obj_link; /* link in object's lists */
+ fscache_rw_complete_t end_io_func;
+ void *context;
+};
+
+/*
+ * auxiliary data xattr buffer
+ */
+struct cachefiles_xattr {
+ uint16_t len;
+ uint8_t type;
+ uint8_t data[];
+};
+
+/*
+ * note change of state for daemon
+ */
+static inline void cachefiles_state_changed(struct cachefiles_cache *cache)
+{
+ set_bit(CACHEFILES_STATE_CHANGED, &cache->flags);
+ wake_up_all(&cache->daemon_pollwq);
+}
+
+/*
+ * cf-bind.c
+ */
+extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args);
+extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache);
+
+/*
+ * cf-daemon.c
+ */
+extern const struct file_operations cachefiles_daemon_fops;
+
+extern int cachefiles_has_space(struct cachefiles_cache *cache,
+ unsigned fnr, unsigned bnr);
+
+/*
+ * cf-interface.c
+ */
+extern const struct fscache_cache_ops cachefiles_cache_ops;
+
+/*
+ * cf-key.c
+ */
+extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type);
+
+/*
+ * cf-namei.c
+ */
+extern int cachefiles_delete_object(struct cachefiles_cache *cache,
+ struct cachefiles_object *object);
+extern int cachefiles_walk_to_object(struct cachefiles_object *parent,
+ struct cachefiles_object *object,
+ const char *key,
+ struct cachefiles_xattr *auxdata);
+extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
+ struct dentry *dir,
+ const char *name);
+
+extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
+ char *filename);
+
+extern int cachefiles_check_in_use(struct cachefiles_cache *cache,
+ struct dentry *dir, char *filename);
+
+/*
+ * cf-proc.c
+ */
+#ifdef CONFIG_CACHEFILES_HISTOGRAM
+extern atomic_t cachefiles_lookup_histogram[HZ];
+extern atomic_t cachefiles_mkdir_histogram[HZ];
+extern atomic_t cachefiles_create_histogram[HZ];
+
+extern int __init cachefiles_proc_init(void);
+extern void cachefiles_proc_cleanup(void);
+static inline
+void cachefiles_hist(atomic_t histogram[], unsigned long start_jif)
+{
+ unsigned long jif = jiffies - start_jif;
+ if (jif >= HZ)
+ jif = HZ - 1;
+ atomic_inc(&histogram[jif]);
+}
+
+#else
+#define cachefiles_proc_init() (0)
+#define cachefiles_proc_cleanup() do {} while (0)
+#define cachefiles_hist(hist, start_jif) do {} while (0)
+#endif
+
+/*
+ * cf-rdwr.c
+ */
+extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *,
+ struct page *, gfp_t);
+extern int cachefiles_read_or_alloc_pages(struct fscache_retrieval *,
+ struct list_head *, unsigned *,
+ gfp_t);
+extern int cachefiles_allocate_page(struct fscache_retrieval *, struct page *,
+ gfp_t);
+extern int cachefiles_allocate_pages(struct fscache_retrieval *,
+ struct list_head *, unsigned *, gfp_t);
+extern int cachefiles_write_page(struct fscache_storage *, struct page *);
+extern void cachefiles_uncache_page(struct fscache_object *, struct page *);
+
+/*
+ * cf-security.c
+ */
+extern int cachefiles_get_security_ID(struct cachefiles_cache *cache);
+extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
+ struct dentry *root,
+ const struct cred **_saved_cred);
+
+static inline void cachefiles_begin_secure(struct cachefiles_cache *cache,
+ const struct cred **_saved_cred)
+{
+ *_saved_cred = override_creds(cache->cache_cred);
+}
+
+static inline void cachefiles_end_secure(struct cachefiles_cache *cache,
+ const struct cred *saved_cred)
+{
+ revert_creds(saved_cred);
+}
+
+/*
+ * cf-xattr.c
+ */
+extern int cachefiles_check_object_type(struct cachefiles_object *object);
+extern int cachefiles_set_object_xattr(struct cachefiles_object *object,
+ struct cachefiles_xattr *auxdata);
+extern int cachefiles_update_object_xattr(struct cachefiles_object *object,
+ struct cachefiles_xattr *auxdata);
+extern int cachefiles_check_object_xattr(struct cachefiles_object *object,
+ struct cachefiles_xattr *auxdata);
+extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
+ struct dentry *dentry);
+
+
+/*
+ * error handling
+ */
+#define kerror(FMT, ...) printk(KERN_ERR "CacheFiles: "FMT"\n", ##__VA_ARGS__)
+
+#define cachefiles_io_error(___cache, FMT, ...) \
+do { \
+ kerror("I/O Error: " FMT, ##__VA_ARGS__); \
+ fscache_io_error(&(___cache)->cache); \
+ set_bit(CACHEFILES_DEAD, &(___cache)->flags); \
+} while (0)
+
+#define cachefiles_io_error_obj(object, FMT, ...) \
+do { \
+ struct cachefiles_cache *___cache; \
+ \
+ ___cache = container_of((object)->fscache.cache, \
+ struct cachefiles_cache, cache); \
+ cachefiles_io_error(___cache, FMT, ##__VA_ARGS__); \
+} while (0)
+
+
+/*
+ * debug tracing
+ */
+#define dbgprintk(FMT, ...) \
+ printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
+
+/* make sure we maintain the format strings, even when debugging is disabled */
+static inline void _dbprintk(const char *fmt, ...)
+ __attribute__((format(printf, 1, 2)));
+static inline void _dbprintk(const char *fmt, ...)
+{
+}
+
+#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
+#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
+#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
+
+
+#if defined(__KDEBUG)
+#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
+#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
+#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
+
+#elif defined(CONFIG_CACHEFILES_DEBUG)
+#define _enter(FMT, ...) \
+do { \
+ if (cachefiles_debug & CACHEFILES_DEBUG_KENTER) \
+ kenter(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#define _leave(FMT, ...) \
+do { \
+ if (cachefiles_debug & CACHEFILES_DEBUG_KLEAVE) \
+ kleave(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#define _debug(FMT, ...) \
+do { \
+ if (cachefiles_debug & CACHEFILES_DEBUG_KDEBUG) \
+ kdebug(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#else
+#define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
+#define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
+#define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__)
+#endif
+
+#if 1 /* defined(__KDEBUGALL) */
+
+#define ASSERT(X) \
+do { \
+ if (unlikely(!(X))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+ if (unlikely(!((X) OP (Y)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
+ printk(KERN_ERR "%lx " #OP " %lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTIF(C, X) \
+do { \
+ if (unlikely((C) && !(X))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+ if (unlikely((C) && !((X) OP (Y)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
+ printk(KERN_ERR "%lx " #OP " %lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while (0)
+
+#else
+
+#define ASSERT(X) do {} while (0)
+#define ASSERTCMP(X, OP, Y) do {} while (0)
+#define ASSERTIF(C, X) do {} while (0)
+#define ASSERTIFCMP(C, X, OP, Y) do {} while (0)
+
+#endif
diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c
new file mode 100644
index 000000000000..81b8b2b3a674
--- /dev/null
+++ b/fs/cachefiles/key.c
@@ -0,0 +1,159 @@
+/* Key to pathname encoder
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include "internal.h"
+
+static const char cachefiles_charmap[64] =
+ "0123456789" /* 0 - 9 */
+ "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */
+ "_-" /* 62 - 63 */
+ ;
+
+static const char cachefiles_filecharmap[256] = {
+ /* we skip space and tab and control chars */
+ [33 ... 46] = 1, /* '!' -> '.' */
+ /* we skip '/' as it's significant to pathwalk */
+ [48 ... 127] = 1, /* '0' -> '~' */
+};
+
+/*
+ * turn the raw key into something cooked
+ * - the raw key should include the length in the two bytes at the front
+ * - the key may be up to 514 bytes in length (including the length word)
+ * - "base64" encode the strange keys, mapping 3 bytes of raw to four of
+ * cooked
+ * - need to cut the cooked key into 252 char lengths (189 raw bytes)
+ */
+char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type)
+{
+ unsigned char csum, ch;
+ unsigned int acc;
+ char *key;
+ int loop, len, max, seg, mark, print;
+
+ _enter(",%d", keylen);
+
+ BUG_ON(keylen < 2 || keylen > 514);
+
+ csum = raw[0] + raw[1];
+ print = 1;
+ for (loop = 2; loop < keylen; loop++) {
+ ch = raw[loop];
+ csum += ch;
+ print &= cachefiles_filecharmap[ch];
+ }
+
+ if (print) {
+ /* if the path is usable ASCII, then we render it directly */
+ max = keylen - 2;
+ max += 2; /* two base64'd length chars on the front */
+ max += 5; /* @checksum/M */
+ max += 3 * 2; /* maximum number of segment dividers (".../M")
+ * is ((514 + 251) / 252) = 3
+ */
+ max += 1; /* NUL on end */
+ } else {
+ /* calculate the maximum length of the cooked key */
+ keylen = (keylen + 2) / 3;
+
+ max = keylen * 4;
+ max += 5; /* @checksum/M */
+ max += 3 * 2; /* maximum number of segment dividers (".../M")
+ * is ((514 + 188) / 189) = 3
+ */
+ max += 1; /* NUL on end */
+ }
+
+ max += 1; /* 2nd NUL on end */
+
+ _debug("max: %d", max);
+
+ key = kmalloc(max, GFP_KERNEL);
+ if (!key)
+ return NULL;
+
+ len = 0;
+
+ /* build the cooked key */
+ sprintf(key, "@%02x%c+", (unsigned) csum, 0);
+ len = 5;
+ mark = len - 1;
+
+ if (print) {
+ acc = *(uint16_t *) raw;
+ raw += 2;
+
+ key[len + 1] = cachefiles_charmap[acc & 63];
+ acc >>= 6;
+ key[len] = cachefiles_charmap[acc & 63];
+ len += 2;
+
+ seg = 250;
+ for (loop = keylen; loop > 0; loop--) {
+ if (seg <= 0) {
+ key[len++] = '\0';
+ mark = len;
+ key[len++] = '+';
+ seg = 252;
+ }
+
+ key[len++] = *raw++;
+ ASSERT(len < max);
+ }
+
+ switch (type) {
+ case FSCACHE_COOKIE_TYPE_INDEX: type = 'I'; break;
+ case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'D'; break;
+ default: type = 'S'; break;
+ }
+ } else {
+ seg = 252;
+ for (loop = keylen; loop > 0; loop--) {
+ if (seg <= 0) {
+ key[len++] = '\0';
+ mark = len;
+ key[len++] = '+';
+ seg = 252;
+ }
+
+ acc = *raw++;
+ acc |= *raw++ << 8;
+ acc |= *raw++ << 16;
+
+ _debug("acc: %06x", acc);
+
+ key[len++] = cachefiles_charmap[acc & 63];
+ acc >>= 6;
+ key[len++] = cachefiles_charmap[acc & 63];
+ acc >>= 6;
+ key[len++] = cachefiles_charmap[acc & 63];
+ acc >>= 6;
+ key[len++] = cachefiles_charmap[acc & 63];
+
+ ASSERT(len < max);
+ }
+
+ switch (type) {
+ case FSCACHE_COOKIE_TYPE_INDEX: type = 'J'; break;
+ case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'E'; break;
+ default: type = 'T'; break;
+ }
+ }
+
+ key[mark] = type;
+ key[len++] = 0;
+ key[len] = 0;
+
+ _leave(" = %p %d", key, len);
+ return key;
+}
diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c
new file mode 100644
index 000000000000..4bfa8cf43bf5
--- /dev/null
+++ b/fs/cachefiles/main.c
@@ -0,0 +1,106 @@
+/* Network filesystem caching backend to use cache files on a premounted
+ * filesystem
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/statfs.h>
+#include <linux/sysctl.h>
+#include <linux/miscdevice.h>
+#include "internal.h"
+
+unsigned cachefiles_debug;
+module_param_named(debug, cachefiles_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(cachefiles_debug, "CacheFiles debugging mask");
+
+MODULE_DESCRIPTION("Mounted-filesystem based cache");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+struct kmem_cache *cachefiles_object_jar;
+
+static struct miscdevice cachefiles_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "cachefiles",
+ .fops = &cachefiles_daemon_fops,
+};
+
+static void cachefiles_object_init_once(void *_object)
+{
+ struct cachefiles_object *object = _object;
+
+ memset(object, 0, sizeof(*object));
+ spin_lock_init(&object->work_lock);
+}
+
+/*
+ * initialise the fs caching module
+ */
+static int __init cachefiles_init(void)
+{
+ int ret;
+
+ ret = misc_register(&cachefiles_dev);
+ if (ret < 0)
+ goto error_dev;
+
+ /* create an object jar */
+ ret = -ENOMEM;
+ cachefiles_object_jar =
+ kmem_cache_create("cachefiles_object_jar",
+ sizeof(struct cachefiles_object),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ cachefiles_object_init_once);
+ if (!cachefiles_object_jar) {
+ printk(KERN_NOTICE
+ "CacheFiles: Failed to allocate an object jar\n");
+ goto error_object_jar;
+ }
+
+ ret = cachefiles_proc_init();
+ if (ret < 0)
+ goto error_proc;
+
+ printk(KERN_INFO "CacheFiles: Loaded\n");
+ return 0;
+
+error_proc:
+ kmem_cache_destroy(cachefiles_object_jar);
+error_object_jar:
+ misc_deregister(&cachefiles_dev);
+error_dev:
+ kerror("failed to register: %d", ret);
+ return ret;
+}
+
+fs_initcall(cachefiles_init);
+
+/*
+ * clean up on module removal
+ */
+static void __exit cachefiles_exit(void)
+{
+ printk(KERN_INFO "CacheFiles: Unloading\n");
+
+ cachefiles_proc_cleanup();
+ kmem_cache_destroy(cachefiles_object_jar);
+ misc_deregister(&cachefiles_dev);
+}
+
+module_exit(cachefiles_exit);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
new file mode 100644
index 000000000000..4ce818ae39ea
--- /dev/null
+++ b/fs/cachefiles/namei.c
@@ -0,0 +1,771 @@
+/* CacheFiles path walking and related routines
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/quotaops.h>
+#include <linux/xattr.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/security.h>
+#include "internal.h"
+
+static int cachefiles_wait_bit(void *flags)
+{
+ schedule();
+ return 0;
+}
+
+/*
+ * record the fact that an object is now active
+ */
+static void cachefiles_mark_object_active(struct cachefiles_cache *cache,
+ struct cachefiles_object *object)
+{
+ struct cachefiles_object *xobject;
+ struct rb_node **_p, *_parent = NULL;
+ struct dentry *dentry;
+
+ _enter(",%p", object);
+
+try_again:
+ write_lock(&cache->active_lock);
+
+ if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags))
+ BUG();
+
+ dentry = object->dentry;
+ _p = &cache->active_nodes.rb_node;
+ while (*_p) {
+ _parent = *_p;
+ xobject = rb_entry(_parent,
+ struct cachefiles_object, active_node);
+
+ ASSERT(xobject != object);
+
+ if (xobject->dentry > dentry)
+ _p = &(*_p)->rb_left;
+ else if (xobject->dentry < dentry)
+ _p = &(*_p)->rb_right;
+ else
+ goto wait_for_old_object;
+ }
+
+ rb_link_node(&object->active_node, _parent, _p);
+ rb_insert_color(&object->active_node, &cache->active_nodes);
+
+ write_unlock(&cache->active_lock);
+ _leave("");
+ return;
+
+ /* an old object from a previous incarnation is hogging the slot - we
+ * need to wait for it to be destroyed */
+wait_for_old_object:
+ if (xobject->fscache.state < FSCACHE_OBJECT_DYING) {
+ printk(KERN_ERR "\n");
+ printk(KERN_ERR "CacheFiles: Error:"
+ " Unexpected object collision\n");
+ printk(KERN_ERR "xobject: OBJ%x\n",
+ xobject->fscache.debug_id);
+ printk(KERN_ERR "xobjstate=%s\n",
+ fscache_object_states[xobject->fscache.state]);
+ printk(KERN_ERR "xobjflags=%lx\n", xobject->fscache.flags);
+ printk(KERN_ERR "xobjevent=%lx [%lx]\n",
+ xobject->fscache.events, xobject->fscache.event_mask);
+ printk(KERN_ERR "xops=%u inp=%u exc=%u\n",
+ xobject->fscache.n_ops, xobject->fscache.n_in_progress,
+ xobject->fscache.n_exclusive);
+ printk(KERN_ERR "xcookie=%p [pr=%p nd=%p fl=%lx]\n",
+ xobject->fscache.cookie,
+ xobject->fscache.cookie->parent,
+ xobject->fscache.cookie->netfs_data,
+ xobject->fscache.cookie->flags);
+ printk(KERN_ERR "xparent=%p\n",
+ xobject->fscache.parent);
+ printk(KERN_ERR "object: OBJ%x\n",
+ object->fscache.debug_id);
+ printk(KERN_ERR "cookie=%p [pr=%p nd=%p fl=%lx]\n",
+ object->fscache.cookie,
+ object->fscache.cookie->parent,
+ object->fscache.cookie->netfs_data,
+ object->fscache.cookie->flags);
+ printk(KERN_ERR "parent=%p\n",
+ object->fscache.parent);
+ BUG();
+ }
+ atomic_inc(&xobject->usage);
+ write_unlock(&cache->active_lock);
+
+ _debug(">>> wait");
+ wait_on_bit(&xobject->flags, CACHEFILES_OBJECT_ACTIVE,
+ cachefiles_wait_bit, TASK_UNINTERRUPTIBLE);
+ _debug("<<< waited");
+
+ cache->cache.ops->put_object(&xobject->fscache);
+ goto try_again;
+}
+
+/*
+ * delete an object representation from the cache
+ * - file backed objects are unlinked
+ * - directory backed objects are stuffed into the graveyard for userspace to
+ * delete
+ * - unlocks the directory mutex
+ */
+static int cachefiles_bury_object(struct cachefiles_cache *cache,
+ struct dentry *dir,
+ struct dentry *rep)
+{
+ struct dentry *grave, *trap;
+ char nbuffer[8 + 8 + 1];
+ int ret;
+
+ _enter(",'%*.*s','%*.*s'",
+ dir->d_name.len, dir->d_name.len, dir->d_name.name,
+ rep->d_name.len, rep->d_name.len, rep->d_name.name);
+
+ /* non-directories can just be unlinked */
+ if (!S_ISDIR(rep->d_inode->i_mode)) {
+ _debug("unlink stale object");
+ ret = vfs_unlink(dir->d_inode, rep);
+
+ mutex_unlock(&dir->d_inode->i_mutex);
+
+ if (ret == -EIO)
+ cachefiles_io_error(cache, "Unlink failed");
+
+ _leave(" = %d", ret);
+ return ret;
+ }
+
+ /* directories have to be moved to the graveyard */
+ _debug("move stale object to graveyard");
+ mutex_unlock(&dir->d_inode->i_mutex);
+
+try_again:
+ /* first step is to make up a grave dentry in the graveyard */
+ sprintf(nbuffer, "%08x%08x",
+ (uint32_t) get_seconds(),
+ (uint32_t) atomic_inc_return(&cache->gravecounter));
+
+ /* do the multiway lock magic */
+ trap = lock_rename(cache->graveyard, dir);
+
+ /* do some checks before getting the grave dentry */
+ if (rep->d_parent != dir) {
+ /* the entry was probably culled when we dropped the parent dir
+ * lock */
+ unlock_rename(cache->graveyard, dir);
+ _leave(" = 0 [culled?]");
+ return 0;
+ }
+
+ if (!S_ISDIR(cache->graveyard->d_inode->i_mode)) {
+ unlock_rename(cache->graveyard, dir);
+ cachefiles_io_error(cache, "Graveyard no longer a directory");
+ return -EIO;
+ }
+
+ if (trap == rep) {
+ unlock_rename(cache->graveyard, dir);
+ cachefiles_io_error(cache, "May not make directory loop");
+ return -EIO;
+ }
+
+ if (d_mountpoint(rep)) {
+ unlock_rename(cache->graveyard, dir);
+ cachefiles_io_error(cache, "Mountpoint in cache");
+ return -EIO;
+ }
+
+ grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer));
+ if (IS_ERR(grave)) {
+ unlock_rename(cache->graveyard, dir);
+
+ if (PTR_ERR(grave) == -ENOMEM) {
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ cachefiles_io_error(cache, "Lookup error %ld",
+ PTR_ERR(grave));
+ return -EIO;
+ }
+
+ if (grave->d_inode) {
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ grave = NULL;
+ cond_resched();
+ goto try_again;
+ }
+
+ if (d_mountpoint(grave)) {
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ cachefiles_io_error(cache, "Mountpoint in graveyard");
+ return -EIO;
+ }
+
+ /* target should not be an ancestor of source */
+ if (trap == grave) {
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ cachefiles_io_error(cache, "May not make directory loop");
+ return -EIO;
+ }
+
+ /* attempt the rename */
+ ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave);
+ if (ret != 0 && ret != -ENOMEM)
+ cachefiles_io_error(cache, "Rename failed with error %d", ret);
+
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * delete an object representation from the cache
+ */
+int cachefiles_delete_object(struct cachefiles_cache *cache,
+ struct cachefiles_object *object)
+{
+ struct dentry *dir;
+ int ret;
+
+ _enter(",{%p}", object->dentry);
+
+ ASSERT(object->dentry);
+ ASSERT(object->dentry->d_inode);
+ ASSERT(object->dentry->d_parent);
+
+ dir = dget_parent(object->dentry);
+
+ mutex_lock(&dir->d_inode->i_mutex);
+ ret = cachefiles_bury_object(cache, dir, object->dentry);
+
+ dput(dir);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * walk from the parent object to the child object through the backing
+ * filesystem, creating directories as we go
+ */
+int cachefiles_walk_to_object(struct cachefiles_object *parent,
+ struct cachefiles_object *object,
+ const char *key,
+ struct cachefiles_xattr *auxdata)
+{
+ struct cachefiles_cache *cache;
+ struct dentry *dir, *next = NULL;
+ unsigned long start;
+ const char *name;
+ int ret, nlen;
+
+ _enter("{%p},,%s,", parent->dentry, key);
+
+ cache = container_of(parent->fscache.cache,
+ struct cachefiles_cache, cache);
+
+ ASSERT(parent->dentry);
+ ASSERT(parent->dentry->d_inode);
+
+ if (!(S_ISDIR(parent->dentry->d_inode->i_mode))) {
+ // TODO: convert file to dir
+ _leave("looking up in none directory");
+ return -ENOBUFS;
+ }
+
+ dir = dget(parent->dentry);
+
+advance:
+ /* attempt to transit the first directory component */
+ name = key;
+ nlen = strlen(key);
+
+ /* key ends in a double NUL */
+ key = key + nlen + 1;
+ if (!*key)
+ key = NULL;
+
+lookup_again:
+ /* search the current directory for the element name */
+ _debug("lookup '%s'", name);
+
+ mutex_lock(&dir->d_inode->i_mutex);
+
+ start = jiffies;
+ next = lookup_one_len(name, dir, nlen);
+ cachefiles_hist(cachefiles_lookup_histogram, start);
+ if (IS_ERR(next))
+ goto lookup_error;
+
+ _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative");
+
+ if (!key)
+ object->new = !next->d_inode;
+
+ /* if this element of the path doesn't exist, then the lookup phase
+ * failed, and we can release any readers in the certain knowledge that
+ * there's nothing for them to actually read */
+ if (!next->d_inode)
+ fscache_object_lookup_negative(&object->fscache);
+
+ /* we need to create the object if it's negative */
+ if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) {
+ /* index objects and intervening tree levels must be subdirs */
+ if (!next->d_inode) {
+ ret = cachefiles_has_space(cache, 1, 0);
+ if (ret < 0)
+ goto create_error;
+
+ start = jiffies;
+ ret = vfs_mkdir(dir->d_inode, next, 0);
+ cachefiles_hist(cachefiles_mkdir_histogram, start);
+ if (ret < 0)
+ goto create_error;
+
+ ASSERT(next->d_inode);
+
+ _debug("mkdir -> %p{%p{ino=%lu}}",
+ next, next->d_inode, next->d_inode->i_ino);
+
+ } else if (!S_ISDIR(next->d_inode->i_mode)) {
+ kerror("inode %lu is not a directory",
+ next->d_inode->i_ino);
+ ret = -ENOBUFS;
+ goto error;
+ }
+
+ } else {
+ /* non-index objects start out life as files */
+ if (!next->d_inode) {
+ ret = cachefiles_has_space(cache, 1, 0);
+ if (ret < 0)
+ goto create_error;
+
+ start = jiffies;
+ ret = vfs_create(dir->d_inode, next, S_IFREG, NULL);
+ cachefiles_hist(cachefiles_create_histogram, start);
+ if (ret < 0)
+ goto create_error;
+
+ ASSERT(next->d_inode);
+
+ _debug("create -> %p{%p{ino=%lu}}",
+ next, next->d_inode, next->d_inode->i_ino);
+
+ } else if (!S_ISDIR(next->d_inode->i_mode) &&
+ !S_ISREG(next->d_inode->i_mode)
+ ) {
+ kerror("inode %lu is not a file or directory",
+ next->d_inode->i_ino);
+ ret = -ENOBUFS;
+ goto error;
+ }
+ }
+
+ /* process the next component */
+ if (key) {
+ _debug("advance");
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(dir);
+ dir = next;
+ next = NULL;
+ goto advance;
+ }
+
+ /* we've found the object we were looking for */
+ object->dentry = next;
+
+ /* if we've found that the terminal object exists, then we need to
+ * check its attributes and delete it if it's out of date */
+ if (!object->new) {
+ _debug("validate '%*.*s'",
+ next->d_name.len, next->d_name.len, next->d_name.name);
+
+ ret = cachefiles_check_object_xattr(object, auxdata);
+ if (ret == -ESTALE) {
+ /* delete the object (the deleter drops the directory
+ * mutex) */
+ object->dentry = NULL;
+
+ ret = cachefiles_bury_object(cache, dir, next);
+ dput(next);
+ next = NULL;
+
+ if (ret < 0)
+ goto delete_error;
+
+ _debug("redo lookup");
+ goto lookup_again;
+ }
+ }
+
+ /* note that we're now using this object */
+ cachefiles_mark_object_active(cache, object);
+
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(dir);
+ dir = NULL;
+
+ _debug("=== OBTAINED_OBJECT ===");
+
+ if (object->new) {
+ /* attach data to a newly constructed terminal object */
+ ret = cachefiles_set_object_xattr(object, auxdata);
+ if (ret < 0)
+ goto check_error;
+ } else {
+ /* always update the atime on an object we've just looked up
+ * (this is used to keep track of culling, and atimes are only
+ * updated by read, write and readdir but not lookup or
+ * open) */
+ touch_atime(cache->mnt, next);
+ }
+
+ /* open a file interface onto a data file */
+ if (object->type != FSCACHE_COOKIE_TYPE_INDEX) {
+ if (S_ISREG(object->dentry->d_inode->i_mode)) {
+ const struct address_space_operations *aops;
+
+ ret = -EPERM;
+ aops = object->dentry->d_inode->i_mapping->a_ops;
+ if (!aops->bmap)
+ goto check_error;
+
+ object->backer = object->dentry;
+ } else {
+ BUG(); // TODO: open file in data-class subdir
+ }
+ }
+
+ object->new = 0;
+ fscache_obtained_object(&object->fscache);
+
+ _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino);
+ return 0;
+
+create_error:
+ _debug("create error %d", ret);
+ if (ret == -EIO)
+ cachefiles_io_error(cache, "Create/mkdir failed");
+ goto error;
+
+check_error:
+ _debug("check error %d", ret);
+ write_lock(&cache->active_lock);
+ rb_erase(&object->active_node, &cache->active_nodes);
+ clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
+ wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE);
+ write_unlock(&cache->active_lock);
+
+ dput(object->dentry);
+ object->dentry = NULL;
+ goto error_out;
+
+delete_error:
+ _debug("delete error %d", ret);
+ goto error_out2;
+
+lookup_error:
+ _debug("lookup error %ld", PTR_ERR(next));
+ ret = PTR_ERR(next);
+ if (ret == -EIO)
+ cachefiles_io_error(cache, "Lookup failed");
+ next = NULL;
+error:
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(next);
+error_out2:
+ dput(dir);
+error_out:
+ if (ret == -ENOSPC)
+ ret = -ENOBUFS;
+
+ _leave(" = error %d", -ret);
+ return ret;
+}
+
+/*
+ * get a subdirectory
+ */
+struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
+ struct dentry *dir,
+ const char *dirname)
+{
+ struct dentry *subdir;
+ unsigned long start;
+ int ret;
+
+ _enter(",,%s", dirname);
+
+ /* search the current directory for the element name */
+ mutex_lock(&dir->d_inode->i_mutex);
+
+ start = jiffies;
+ subdir = lookup_one_len(dirname, dir, strlen(dirname));
+ cachefiles_hist(cachefiles_lookup_histogram, start);
+ if (IS_ERR(subdir)) {
+ if (PTR_ERR(subdir) == -ENOMEM)
+ goto nomem_d_alloc;
+ goto lookup_error;
+ }
+
+ _debug("subdir -> %p %s",
+ subdir, subdir->d_inode ? "positive" : "negative");
+
+ /* we need to create the subdir if it doesn't exist yet */
+ if (!subdir->d_inode) {
+ ret = cachefiles_has_space(cache, 1, 0);
+ if (ret < 0)
+ goto mkdir_error;
+
+ _debug("attempt mkdir");
+
+ ret = vfs_mkdir(dir->d_inode, subdir, 0700);
+ if (ret < 0)
+ goto mkdir_error;
+
+ ASSERT(subdir->d_inode);
+
+ _debug("mkdir -> %p{%p{ino=%lu}}",
+ subdir,
+ subdir->d_inode,
+ subdir->d_inode->i_ino);
+ }
+
+ mutex_unlock(&dir->d_inode->i_mutex);
+
+ /* we need to make sure the subdir is a directory */
+ ASSERT(subdir->d_inode);
+
+ if (!S_ISDIR(subdir->d_inode->i_mode)) {
+ kerror("%s is not a directory", dirname);
+ ret = -EIO;
+ goto check_error;
+ }
+
+ ret = -EPERM;
+ if (!subdir->d_inode->i_op ||
+ !subdir->d_inode->i_op->setxattr ||
+ !subdir->d_inode->i_op->getxattr ||
+ !subdir->d_inode->i_op->lookup ||
+ !subdir->d_inode->i_op->mkdir ||
+ !subdir->d_inode->i_op->create ||
+ !subdir->d_inode->i_op->rename ||
+ !subdir->d_inode->i_op->rmdir ||
+ !subdir->d_inode->i_op->unlink)
+ goto check_error;
+
+ _leave(" = [%lu]", subdir->d_inode->i_ino);
+ return subdir;
+
+check_error:
+ dput(subdir);
+ _leave(" = %d [check]", ret);
+ return ERR_PTR(ret);
+
+mkdir_error:
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(subdir);
+ kerror("mkdir %s failed with error %d", dirname, ret);
+ return ERR_PTR(ret);
+
+lookup_error:
+ mutex_unlock(&dir->d_inode->i_mutex);
+ ret = PTR_ERR(subdir);
+ kerror("Lookup %s failed with error %d", dirname, ret);
+ return ERR_PTR(ret);
+
+nomem_d_alloc:
+ mutex_unlock(&dir->d_inode->i_mutex);
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * find out if an object is in use or not
+ * - if finds object and it's not in use:
+ * - returns a pointer to the object and a reference on it
+ * - returns with the directory locked
+ */
+static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
+ struct dentry *dir,
+ char *filename)
+{
+ struct cachefiles_object *object;
+ struct rb_node *_n;
+ struct dentry *victim;
+ unsigned long start;
+ int ret;
+
+ //_enter(",%*.*s/,%s",
+ // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
+
+ /* look up the victim */
+ mutex_lock_nested(&dir->d_inode->i_mutex, 1);
+
+ start = jiffies;
+ victim = lookup_one_len(filename, dir, strlen(filename));
+ cachefiles_hist(cachefiles_lookup_histogram, start);
+ if (IS_ERR(victim))
+ goto lookup_error;
+
+ //_debug("victim -> %p %s",
+ // victim, victim->d_inode ? "positive" : "negative");
+
+ /* if the object is no longer there then we probably retired the object
+ * at the netfs's request whilst the cull was in progress
+ */
+ if (!victim->d_inode) {
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(victim);
+ _leave(" = -ENOENT [absent]");
+ return ERR_PTR(-ENOENT);
+ }
+
+ /* check to see if we're using this object */
+ read_lock(&cache->active_lock);
+
+ _n = cache->active_nodes.rb_node;
+
+ while (_n) {
+ object = rb_entry(_n, struct cachefiles_object, active_node);
+
+ if (object->dentry > victim)
+ _n = _n->rb_left;
+ else if (object->dentry < victim)
+ _n = _n->rb_right;
+ else
+ goto object_in_use;
+ }
+
+ read_unlock(&cache->active_lock);
+
+ //_leave(" = %p", victim);
+ return victim;
+
+object_in_use:
+ read_unlock(&cache->active_lock);
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(victim);
+ //_leave(" = -EBUSY [in use]");
+ return ERR_PTR(-EBUSY);
+
+lookup_error:
+ mutex_unlock(&dir->d_inode->i_mutex);
+ ret = PTR_ERR(victim);
+ if (ret == -ENOENT) {
+ /* file or dir now absent - probably retired by netfs */
+ _leave(" = -ESTALE [absent]");
+ return ERR_PTR(-ESTALE);
+ }
+
+ if (ret == -EIO) {
+ cachefiles_io_error(cache, "Lookup failed");
+ } else if (ret != -ENOMEM) {
+ kerror("Internal error: %d", ret);
+ ret = -EIO;
+ }
+
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * cull an object if it's not in use
+ * - called only by cache manager daemon
+ */
+int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
+ char *filename)
+{
+ struct dentry *victim;
+ int ret;
+
+ _enter(",%*.*s/,%s",
+ dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
+
+ victim = cachefiles_check_active(cache, dir, filename);
+ if (IS_ERR(victim))
+ return PTR_ERR(victim);
+
+ _debug("victim -> %p %s",
+ victim, victim->d_inode ? "positive" : "negative");
+
+ /* okay... the victim is not being used so we can cull it
+ * - start by marking it as stale
+ */
+ _debug("victim is cullable");
+
+ ret = cachefiles_remove_object_xattr(cache, victim);
+ if (ret < 0)
+ goto error_unlock;
+
+ /* actually remove the victim (drops the dir mutex) */
+ _debug("bury");
+
+ ret = cachefiles_bury_object(cache, dir, victim);
+ if (ret < 0)
+ goto error;
+
+ dput(victim);
+ _leave(" = 0");
+ return 0;
+
+error_unlock:
+ mutex_unlock(&dir->d_inode->i_mutex);
+error:
+ dput(victim);
+ if (ret == -ENOENT) {
+ /* file or dir now absent - probably retired by netfs */
+ _leave(" = -ESTALE [absent]");
+ return -ESTALE;
+ }
+
+ if (ret != -ENOMEM) {
+ kerror("Internal error: %d", ret);
+ ret = -EIO;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * find out if an object is in use or not
+ * - called only by cache manager daemon
+ * - returns -EBUSY or 0 to indicate whether an object is in use or not
+ */
+int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
+ char *filename)
+{
+ struct dentry *victim;
+
+ //_enter(",%*.*s/,%s",
+ // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
+
+ victim = cachefiles_check_active(cache, dir, filename);
+ if (IS_ERR(victim))
+ return PTR_ERR(victim);
+
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(victim);
+ //_leave(" = 0");
+ return 0;
+}
diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c
new file mode 100644
index 000000000000..eccd33941199
--- /dev/null
+++ b/fs/cachefiles/proc.c
@@ -0,0 +1,134 @@
+/* CacheFiles statistics
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include "internal.h"
+
+atomic_t cachefiles_lookup_histogram[HZ];
+atomic_t cachefiles_mkdir_histogram[HZ];
+atomic_t cachefiles_create_histogram[HZ];
+
+/*
+ * display the latency histogram
+ */
+static int cachefiles_histogram_show(struct seq_file *m, void *v)
+{
+ unsigned long index;
+ unsigned x, y, z, t;
+
+ switch ((unsigned long) v) {
+ case 1:
+ seq_puts(m, "JIFS SECS LOOKUPS MKDIRS CREATES\n");
+ return 0;
+ case 2:
+ seq_puts(m, "===== ===== ========= ========= =========\n");
+ return 0;
+ default:
+ index = (unsigned long) v - 3;
+ x = atomic_read(&cachefiles_lookup_histogram[index]);
+ y = atomic_read(&cachefiles_mkdir_histogram[index]);
+ z = atomic_read(&cachefiles_create_histogram[index]);
+ if (x == 0 && y == 0 && z == 0)
+ return 0;
+
+ t = (index * 1000) / HZ;
+
+ seq_printf(m, "%4lu 0.%03u %9u %9u %9u\n", index, t, x, y, z);
+ return 0;
+ }
+}
+
+/*
+ * set up the iterator to start reading from the first line
+ */
+static void *cachefiles_histogram_start(struct seq_file *m, loff_t *_pos)
+{
+ if ((unsigned long long)*_pos >= HZ + 2)
+ return NULL;
+ if (*_pos == 0)
+ *_pos = 1;
+ return (void *)(unsigned long) *_pos;
+}
+
+/*
+ * move to the next line
+ */
+static void *cachefiles_histogram_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return (unsigned long long)*pos > HZ + 2 ?
+ NULL : (void *)(unsigned long) *pos;
+}
+
+/*
+ * clean up after reading
+ */
+static void cachefiles_histogram_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations cachefiles_histogram_ops = {
+ .start = cachefiles_histogram_start,
+ .stop = cachefiles_histogram_stop,
+ .next = cachefiles_histogram_next,
+ .show = cachefiles_histogram_show,
+};
+
+/*
+ * open "/proc/fs/cachefiles/XXX" which provide statistics summaries
+ */
+static int cachefiles_histogram_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &cachefiles_histogram_ops);
+}
+
+static const struct file_operations cachefiles_histogram_fops = {
+ .owner = THIS_MODULE,
+ .open = cachefiles_histogram_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+/*
+ * initialise the /proc/fs/cachefiles/ directory
+ */
+int __init cachefiles_proc_init(void)
+{
+ _enter("");
+
+ if (!proc_mkdir("fs/cachefiles", NULL))
+ goto error_dir;
+
+ if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL,
+ &cachefiles_histogram_fops))
+ goto error_histogram;
+
+ _leave(" = 0");
+ return 0;
+
+error_histogram:
+ remove_proc_entry("fs/cachefiles", NULL);
+error_dir:
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+}
+
+/*
+ * clean up the /proc/fs/cachefiles/ directory
+ */
+void cachefiles_proc_cleanup(void)
+{
+ remove_proc_entry("fs/cachefiles/histogram", NULL);
+ remove_proc_entry("fs/cachefiles", NULL);
+}
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
new file mode 100644
index 000000000000..a69787e7dd96
--- /dev/null
+++ b/fs/cachefiles/rdwr.c
@@ -0,0 +1,879 @@
+/* Storage object read/write
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/mount.h>
+#include <linux/file.h>
+#include "internal.h"
+
+/*
+ * detect wake up events generated by the unlocking of pages in which we're
+ * interested
+ * - we use this to detect read completion of backing pages
+ * - the caller holds the waitqueue lock
+ */
+static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
+ int sync, void *_key)
+{
+ struct cachefiles_one_read *monitor =
+ container_of(wait, struct cachefiles_one_read, monitor);
+ struct cachefiles_object *object;
+ struct wait_bit_key *key = _key;
+ struct page *page = wait->private;
+
+ ASSERT(key);
+
+ _enter("{%lu},%u,%d,{%p,%u}",
+ monitor->netfs_page->index, mode, sync,
+ key->flags, key->bit_nr);
+
+ if (key->flags != &page->flags ||
+ key->bit_nr != PG_locked)
+ return 0;
+
+ _debug("--- monitor %p %lx ---", page, page->flags);
+
+ if (!PageUptodate(page) && !PageError(page))
+ dump_stack();
+
+ /* remove from the waitqueue */
+ list_del(&wait->task_list);
+
+ /* move onto the action list and queue for FS-Cache thread pool */
+ ASSERT(monitor->op);
+
+ object = container_of(monitor->op->op.object,
+ struct cachefiles_object, fscache);
+
+ spin_lock(&object->work_lock);
+ list_add_tail(&monitor->op_link, &monitor->op->to_do);
+ spin_unlock(&object->work_lock);
+
+ fscache_enqueue_retrieval(monitor->op);
+ return 0;
+}
+
+/*
+ * copy data from backing pages to netfs pages to complete a read operation
+ * - driven by FS-Cache's thread pool
+ */
+static void cachefiles_read_copier(struct fscache_operation *_op)
+{
+ struct cachefiles_one_read *monitor;
+ struct cachefiles_object *object;
+ struct fscache_retrieval *op;
+ struct pagevec pagevec;
+ int error, max;
+
+ op = container_of(_op, struct fscache_retrieval, op);
+ object = container_of(op->op.object,
+ struct cachefiles_object, fscache);
+
+ _enter("{ino=%lu}", object->backer->d_inode->i_ino);
+
+ pagevec_init(&pagevec, 0);
+
+ max = 8;
+ spin_lock_irq(&object->work_lock);
+
+ while (!list_empty(&op->to_do)) {
+ monitor = list_entry(op->to_do.next,
+ struct cachefiles_one_read, op_link);
+ list_del(&monitor->op_link);
+
+ spin_unlock_irq(&object->work_lock);
+
+ _debug("- copy {%lu}", monitor->back_page->index);
+
+ error = -EIO;
+ if (PageUptodate(monitor->back_page)) {
+ copy_highpage(monitor->netfs_page, monitor->back_page);
+
+ pagevec_add(&pagevec, monitor->netfs_page);
+ fscache_mark_pages_cached(monitor->op, &pagevec);
+ error = 0;
+ }
+
+ if (error)
+ cachefiles_io_error_obj(
+ object,
+ "Readpage failed on backing file %lx",
+ (unsigned long) monitor->back_page->flags);
+
+ page_cache_release(monitor->back_page);
+
+ fscache_end_io(op, monitor->netfs_page, error);
+ page_cache_release(monitor->netfs_page);
+ fscache_put_retrieval(op);
+ kfree(monitor);
+
+ /* let the thread pool have some air occasionally */
+ max--;
+ if (max < 0 || need_resched()) {
+ if (!list_empty(&op->to_do))
+ fscache_enqueue_retrieval(op);
+ _leave(" [maxed out]");
+ return;
+ }
+
+ spin_lock_irq(&object->work_lock);
+ }
+
+ spin_unlock_irq(&object->work_lock);
+ _leave("");
+}
+
+/*
+ * read the corresponding page to the given set from the backing file
+ * - an uncertain page is simply discarded, to be tried again another time
+ */
+static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
+ struct fscache_retrieval *op,
+ struct page *netpage,
+ struct pagevec *pagevec)
+{
+ struct cachefiles_one_read *monitor;
+ struct address_space *bmapping;
+ struct page *newpage, *backpage;
+ int ret;
+
+ _enter("");
+
+ pagevec_reinit(pagevec);
+
+ _debug("read back %p{%lu,%d}",
+ netpage, netpage->index, page_count(netpage));
+
+ monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
+ if (!monitor)
+ goto nomem;
+
+ monitor->netfs_page = netpage;
+ monitor->op = fscache_get_retrieval(op);
+
+ init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
+
+ /* attempt to get hold of the backing page */
+ bmapping = object->backer->d_inode->i_mapping;
+ newpage = NULL;
+
+ for (;;) {
+ backpage = find_get_page(bmapping, netpage->index);
+ if (backpage)
+ goto backing_page_already_present;
+
+ if (!newpage) {
+ newpage = page_cache_alloc_cold(bmapping);
+ if (!newpage)
+ goto nomem_monitor;
+ }
+
+ ret = add_to_page_cache(newpage, bmapping,
+ netpage->index, GFP_KERNEL);
+ if (ret == 0)
+ goto installed_new_backing_page;
+ if (ret != -EEXIST)
+ goto nomem_page;
+ }
+
+ /* we've installed a new backing page, so now we need to add it
+ * to the LRU list and start it reading */
+installed_new_backing_page:
+ _debug("- new %p", newpage);
+
+ backpage = newpage;
+ newpage = NULL;
+
+ page_cache_get(backpage);
+ pagevec_add(pagevec, backpage);
+ __pagevec_lru_add_file(pagevec);
+
+read_backing_page:
+ ret = bmapping->a_ops->readpage(NULL, backpage);
+ if (ret < 0)
+ goto read_error;
+
+ /* set the monitor to transfer the data across */
+monitor_backing_page:
+ _debug("- monitor add");
+
+ /* install the monitor */
+ page_cache_get(monitor->netfs_page);
+ page_cache_get(backpage);
+ monitor->back_page = backpage;
+ monitor->monitor.private = backpage;
+ add_page_wait_queue(backpage, &monitor->monitor);
+ monitor = NULL;
+
+ /* but the page may have been read before the monitor was installed, so
+ * the monitor may miss the event - so we have to ensure that we do get
+ * one in such a case */
+ if (trylock_page(backpage)) {
+ _debug("jumpstart %p {%lx}", backpage, backpage->flags);
+ unlock_page(backpage);
+ }
+ goto success;
+
+ /* if the backing page is already present, it can be in one of
+ * three states: read in progress, read failed or read okay */
+backing_page_already_present:
+ _debug("- present");
+
+ if (newpage) {
+ page_cache_release(newpage);
+ newpage = NULL;
+ }
+
+ if (PageError(backpage))
+ goto io_error;
+
+ if (PageUptodate(backpage))
+ goto backing_page_already_uptodate;
+
+ if (!trylock_page(backpage))
+ goto monitor_backing_page;
+ _debug("read %p {%lx}", backpage, backpage->flags);
+ goto read_backing_page;
+
+ /* the backing page is already up to date, attach the netfs
+ * page to the pagecache and LRU and copy the data across */
+backing_page_already_uptodate:
+ _debug("- uptodate");
+
+ pagevec_add(pagevec, netpage);
+ fscache_mark_pages_cached(op, pagevec);
+
+ copy_highpage(netpage, backpage);
+ fscache_end_io(op, netpage, 0);
+
+success:
+ _debug("success");
+ ret = 0;
+
+out:
+ if (backpage)
+ page_cache_release(backpage);
+ if (monitor) {
+ fscache_put_retrieval(monitor->op);
+ kfree(monitor);
+ }
+ _leave(" = %d", ret);
+ return ret;
+
+read_error:
+ _debug("read error %d", ret);
+ if (ret == -ENOMEM)
+ goto out;
+io_error:
+ cachefiles_io_error_obj(object, "Page read error on backing file");
+ ret = -ENOBUFS;
+ goto out;
+
+nomem_page:
+ page_cache_release(newpage);
+nomem_monitor:
+ fscache_put_retrieval(monitor->op);
+ kfree(monitor);
+nomem:
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+}
+
+/*
+ * read a page from the cache or allocate a block in which to store it
+ * - cache withdrawal is prevented by the caller
+ * - returns -EINTR if interrupted
+ * - returns -ENOMEM if ran out of memory
+ * - returns -ENOBUFS if no buffers can be made available
+ * - returns -ENOBUFS if page is beyond EOF
+ * - if the page is backed by a block in the cache:
+ * - a read will be started which will call the callback on completion
+ * - 0 will be returned
+ * - else if the page is unbacked:
+ * - the metadata will be retained
+ * - -ENODATA will be returned
+ */
+int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
+ struct page *page,
+ gfp_t gfp)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ struct pagevec pagevec;
+ struct inode *inode;
+ sector_t block0, block;
+ unsigned shift;
+ int ret;
+
+ object = container_of(op->op.object,
+ struct cachefiles_object, fscache);
+ cache = container_of(object->fscache.cache,
+ struct cachefiles_cache, cache);
+
+ _enter("{%p},{%lx},,,", object, page->index);
+
+ if (!object->backer)
+ return -ENOBUFS;
+
+ inode = object->backer->d_inode;
+ ASSERT(S_ISREG(inode->i_mode));
+ ASSERT(inode->i_mapping->a_ops->bmap);
+ ASSERT(inode->i_mapping->a_ops->readpages);
+
+ /* calculate the shift required to use bmap */
+ if (inode->i_sb->s_blocksize > PAGE_SIZE)
+ return -ENOBUFS;
+
+ shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
+
+ op->op.flags = FSCACHE_OP_FAST;
+ op->op.processor = cachefiles_read_copier;
+
+ pagevec_init(&pagevec, 0);
+
+ /* we assume the absence or presence of the first block is a good
+ * enough indication for the page as a whole
+ * - TODO: don't use bmap() for this as it is _not_ actually good
+ * enough for this as it doesn't indicate errors, but it's all we've
+ * got for the moment
+ */
+ block0 = page->index;
+ block0 <<= shift;
+
+ block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
+ _debug("%llx -> %llx",
+ (unsigned long long) block0,
+ (unsigned long long) block);
+
+ if (block) {
+ /* submit the apparently valid page to the backing fs to be
+ * read from disk */
+ ret = cachefiles_read_backing_file_one(object, op, page,
+ &pagevec);
+ } else if (cachefiles_has_space(cache, 0, 1) == 0) {
+ /* there's space in the cache we can use */
+ pagevec_add(&pagevec, page);
+ fscache_mark_pages_cached(op, &pagevec);
+ ret = -ENODATA;
+ } else {
+ ret = -ENOBUFS;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * read the corresponding pages to the given set from the backing file
+ * - any uncertain pages are simply discarded, to be tried again another time
+ */
+static int cachefiles_read_backing_file(struct cachefiles_object *object,
+ struct fscache_retrieval *op,
+ struct list_head *list,
+ struct pagevec *mark_pvec)
+{
+ struct cachefiles_one_read *monitor = NULL;
+ struct address_space *bmapping = object->backer->d_inode->i_mapping;
+ struct pagevec lru_pvec;
+ struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
+ int ret = 0;
+
+ _enter("");
+
+ pagevec_init(&lru_pvec, 0);
+
+ list_for_each_entry_safe(netpage, _n, list, lru) {
+ list_del(&netpage->lru);
+
+ _debug("read back %p{%lu,%d}",
+ netpage, netpage->index, page_count(netpage));
+
+ if (!monitor) {
+ monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
+ if (!monitor)
+ goto nomem;
+
+ monitor->op = fscache_get_retrieval(op);
+ init_waitqueue_func_entry(&monitor->monitor,
+ cachefiles_read_waiter);
+ }
+
+ for (;;) {
+ backpage = find_get_page(bmapping, netpage->index);
+ if (backpage)
+ goto backing_page_already_present;
+
+ if (!newpage) {
+ newpage = page_cache_alloc_cold(bmapping);
+ if (!newpage)
+ goto nomem;
+ }
+
+ ret = add_to_page_cache(newpage, bmapping,
+ netpage->index, GFP_KERNEL);
+ if (ret == 0)
+ goto installed_new_backing_page;
+ if (ret != -EEXIST)
+ goto nomem;
+ }
+
+ /* we've installed a new backing page, so now we need to add it
+ * to the LRU list and start it reading */
+ installed_new_backing_page:
+ _debug("- new %p", newpage);
+
+ backpage = newpage;
+ newpage = NULL;
+
+ page_cache_get(backpage);
+ if (!pagevec_add(&lru_pvec, backpage))
+ __pagevec_lru_add_file(&lru_pvec);
+
+ reread_backing_page:
+ ret = bmapping->a_ops->readpage(NULL, backpage);
+ if (ret < 0)
+ goto read_error;
+
+ /* add the netfs page to the pagecache and LRU, and set the
+ * monitor to transfer the data across */
+ monitor_backing_page:
+ _debug("- monitor add");
+
+ ret = add_to_page_cache(netpage, op->mapping, netpage->index,
+ GFP_KERNEL);
+ if (ret < 0) {
+ if (ret == -EEXIST) {
+ page_cache_release(netpage);
+ continue;
+ }
+ goto nomem;
+ }
+
+ page_cache_get(netpage);
+ if (!pagevec_add(&lru_pvec, netpage))
+ __pagevec_lru_add_file(&lru_pvec);
+
+ /* install a monitor */
+ page_cache_get(netpage);
+ monitor->netfs_page = netpage;
+
+ page_cache_get(backpage);
+ monitor->back_page = backpage;
+ monitor->monitor.private = backpage;
+ add_page_wait_queue(backpage, &monitor->monitor);
+ monitor = NULL;
+
+ /* but the page may have been read before the monitor was
+ * installed, so the monitor may miss the event - so we have to
+ * ensure that we do get one in such a case */
+ if (trylock_page(backpage)) {
+ _debug("2unlock %p {%lx}", backpage, backpage->flags);
+ unlock_page(backpage);
+ }
+
+ page_cache_release(backpage);
+ backpage = NULL;
+
+ page_cache_release(netpage);
+ netpage = NULL;
+ continue;
+
+ /* if the backing page is already present, it can be in one of
+ * three states: read in progress, read failed or read okay */
+ backing_page_already_present:
+ _debug("- present %p", backpage);
+
+ if (PageError(backpage))
+ goto io_error;
+
+ if (PageUptodate(backpage))
+ goto backing_page_already_uptodate;
+
+ _debug("- not ready %p{%lx}", backpage, backpage->flags);
+
+ if (!trylock_page(backpage))
+ goto monitor_backing_page;
+
+ if (PageError(backpage)) {
+ _debug("error %lx", backpage->flags);
+ unlock_page(backpage);
+ goto io_error;
+ }
+
+ if (PageUptodate(backpage))
+ goto backing_page_already_uptodate_unlock;
+
+ /* we've locked a page that's neither up to date nor erroneous,
+ * so we need to attempt to read it again */
+ goto reread_backing_page;
+
+ /* the backing page is already up to date, attach the netfs
+ * page to the pagecache and LRU and copy the data across */
+ backing_page_already_uptodate_unlock:
+ _debug("uptodate %lx", backpage->flags);
+ unlock_page(backpage);
+ backing_page_already_uptodate:
+ _debug("- uptodate");
+
+ ret = add_to_page_cache(netpage, op->mapping, netpage->index,
+ GFP_KERNEL);
+ if (ret < 0) {
+ if (ret == -EEXIST) {
+ page_cache_release(netpage);
+ continue;
+ }
+ goto nomem;
+ }
+
+ copy_highpage(netpage, backpage);
+
+ page_cache_release(backpage);
+ backpage = NULL;
+
+ if (!pagevec_add(mark_pvec, netpage))
+ fscache_mark_pages_cached(op, mark_pvec);
+
+ page_cache_get(netpage);
+ if (!pagevec_add(&lru_pvec, netpage))
+ __pagevec_lru_add_file(&lru_pvec);
+
+ fscache_end_io(op, netpage, 0);
+ page_cache_release(netpage);
+ netpage = NULL;
+ continue;
+ }
+
+ netpage = NULL;
+
+ _debug("out");
+
+out:
+ /* tidy up */
+ pagevec_lru_add_file(&lru_pvec);
+
+ if (newpage)
+ page_cache_release(newpage);
+ if (netpage)
+ page_cache_release(netpage);
+ if (backpage)
+ page_cache_release(backpage);
+ if (monitor) {
+ fscache_put_retrieval(op);
+ kfree(monitor);
+ }
+
+ list_for_each_entry_safe(netpage, _n, list, lru) {
+ list_del(&netpage->lru);
+ page_cache_release(netpage);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+
+nomem:
+ _debug("nomem");
+ ret = -ENOMEM;
+ goto out;
+
+read_error:
+ _debug("read error %d", ret);
+ if (ret == -ENOMEM)
+ goto out;
+io_error:
+ cachefiles_io_error_obj(object, "Page read error on backing file");
+ ret = -ENOBUFS;
+ goto out;
+}
+
+/*
+ * read a list of pages from the cache or allocate blocks in which to store
+ * them
+ */
+int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
+ struct list_head *pages,
+ unsigned *nr_pages,
+ gfp_t gfp)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ struct list_head backpages;
+ struct pagevec pagevec;
+ struct inode *inode;
+ struct page *page, *_n;
+ unsigned shift, nrbackpages;
+ int ret, ret2, space;
+
+ object = container_of(op->op.object,
+ struct cachefiles_object, fscache);
+ cache = container_of(object->fscache.cache,
+ struct cachefiles_cache, cache);
+
+ _enter("{OBJ%x,%d},,%d,,",
+ object->fscache.debug_id, atomic_read(&op->op.usage),
+ *nr_pages);
+
+ if (!object->backer)
+ return -ENOBUFS;
+
+ space = 1;
+ if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
+ space = 0;
+
+ inode = object->backer->d_inode;
+ ASSERT(S_ISREG(inode->i_mode));
+ ASSERT(inode->i_mapping->a_ops->bmap);
+ ASSERT(inode->i_mapping->a_ops->readpages);
+
+ /* calculate the shift required to use bmap */
+ if (inode->i_sb->s_blocksize > PAGE_SIZE)
+ return -ENOBUFS;
+
+ shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
+
+ pagevec_init(&pagevec, 0);
+
+ op->op.flags = FSCACHE_OP_FAST;
+ op->op.processor = cachefiles_read_copier;
+
+ INIT_LIST_HEAD(&backpages);
+ nrbackpages = 0;
+
+ ret = space ? -ENODATA : -ENOBUFS;
+ list_for_each_entry_safe(page, _n, pages, lru) {
+ sector_t block0, block;
+
+ /* we assume the absence or presence of the first block is a
+ * good enough indication for the page as a whole
+ * - TODO: don't use bmap() for this as it is _not_ actually
+ * good enough for this as it doesn't indicate errors, but
+ * it's all we've got for the moment
+ */
+ block0 = page->index;
+ block0 <<= shift;
+
+ block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
+ block0);
+ _debug("%llx -> %llx",
+ (unsigned long long) block0,
+ (unsigned long long) block);
+
+ if (block) {
+ /* we have data - add it to the list to give to the
+ * backing fs */
+ list_move(&page->lru, &backpages);
+ (*nr_pages)--;
+ nrbackpages++;
+ } else if (space && pagevec_add(&pagevec, page) == 0) {
+ fscache_mark_pages_cached(op, &pagevec);
+ ret = -ENODATA;
+ }
+ }
+
+ if (pagevec_count(&pagevec) > 0)
+ fscache_mark_pages_cached(op, &pagevec);
+
+ if (list_empty(pages))
+ ret = 0;
+
+ /* submit the apparently valid pages to the backing fs to be read from
+ * disk */
+ if (nrbackpages > 0) {
+ ret2 = cachefiles_read_backing_file(object, op, &backpages,
+ &pagevec);
+ if (ret2 == -ENOMEM || ret2 == -EINTR)
+ ret = ret2;
+ }
+
+ if (pagevec_count(&pagevec) > 0)
+ fscache_mark_pages_cached(op, &pagevec);
+
+ _leave(" = %d [nr=%u%s]",
+ ret, *nr_pages, list_empty(pages) ? " empty" : "");
+ return ret;
+}
+
+/*
+ * allocate a block in the cache in which to store a page
+ * - cache withdrawal is prevented by the caller
+ * - returns -EINTR if interrupted
+ * - returns -ENOMEM if ran out of memory
+ * - returns -ENOBUFS if no buffers can be made available
+ * - returns -ENOBUFS if page is beyond EOF
+ * - otherwise:
+ * - the metadata will be retained
+ * - 0 will be returned
+ */
+int cachefiles_allocate_page(struct fscache_retrieval *op,
+ struct page *page,
+ gfp_t gfp)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ struct pagevec pagevec;
+ int ret;
+
+ object = container_of(op->op.object,
+ struct cachefiles_object, fscache);
+ cache = container_of(object->fscache.cache,
+ struct cachefiles_cache, cache);
+
+ _enter("%p,{%lx},", object, page->index);
+
+ ret = cachefiles_has_space(cache, 0, 1);
+ if (ret == 0) {
+ pagevec_init(&pagevec, 0);
+ pagevec_add(&pagevec, page);
+ fscache_mark_pages_cached(op, &pagevec);
+ } else {
+ ret = -ENOBUFS;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * allocate blocks in the cache in which to store a set of pages
+ * - cache withdrawal is prevented by the caller
+ * - returns -EINTR if interrupted
+ * - returns -ENOMEM if ran out of memory
+ * - returns -ENOBUFS if some buffers couldn't be made available
+ * - returns -ENOBUFS if some pages are beyond EOF
+ * - otherwise:
+ * - -ENODATA will be returned
+ * - metadata will be retained for any page marked
+ */
+int cachefiles_allocate_pages(struct fscache_retrieval *op,
+ struct list_head *pages,
+ unsigned *nr_pages,
+ gfp_t gfp)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ struct pagevec pagevec;
+ struct page *page;
+ int ret;
+
+ object = container_of(op->op.object,
+ struct cachefiles_object, fscache);
+ cache = container_of(object->fscache.cache,
+ struct cachefiles_cache, cache);
+
+ _enter("%p,,,%d,", object, *nr_pages);
+
+ ret = cachefiles_has_space(cache, 0, *nr_pages);
+ if (ret == 0) {
+ pagevec_init(&pagevec, 0);
+
+ list_for_each_entry(page, pages, lru) {
+ if (pagevec_add(&pagevec, page) == 0)
+ fscache_mark_pages_cached(op, &pagevec);
+ }
+
+ if (pagevec_count(&pagevec) > 0)
+ fscache_mark_pages_cached(op, &pagevec);
+ ret = -ENODATA;
+ } else {
+ ret = -ENOBUFS;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * request a page be stored in the cache
+ * - cache withdrawal is prevented by the caller
+ * - this request may be ignored if there's no cache block available, in which
+ * case -ENOBUFS will be returned
+ * - if the op is in progress, 0 will be returned
+ */
+int cachefiles_write_page(struct fscache_storage *op, struct page *page)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ mm_segment_t old_fs;
+ struct file *file;
+ loff_t pos;
+ void *data;
+ int ret;
+
+ ASSERT(op != NULL);
+ ASSERT(page != NULL);
+
+ object = container_of(op->op.object,
+ struct cachefiles_object, fscache);
+
+ _enter("%p,%p{%lx},,,", object, page, page->index);
+
+ if (!object->backer) {
+ _leave(" = -ENOBUFS");
+ return -ENOBUFS;
+ }
+
+ ASSERT(S_ISREG(object->backer->d_inode->i_mode));
+
+ cache = container_of(object->fscache.cache,
+ struct cachefiles_cache, cache);
+
+ /* write the page to the backing filesystem and let it store it in its
+ * own time */
+ dget(object->backer);
+ mntget(cache->mnt);
+ file = dentry_open(object->backer, cache->mnt, O_RDWR,
+ cache->cache_cred);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ } else {
+ ret = -EIO;
+ if (file->f_op->write) {
+ pos = (loff_t) page->index << PAGE_SHIFT;
+ data = kmap(page);
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = file->f_op->write(
+ file, (const void __user *) data, PAGE_SIZE,
+ &pos);
+ set_fs(old_fs);
+ kunmap(page);
+ if (ret != PAGE_SIZE)
+ ret = -EIO;
+ }
+ fput(file);
+ }
+
+ if (ret < 0) {
+ if (ret == -EIO)
+ cachefiles_io_error_obj(
+ object, "Write page to backing file failed");
+ ret = -ENOBUFS;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * detach a backing block from a page
+ * - cache withdrawal is prevented by the caller
+ */
+void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+
+ object = container_of(_object, struct cachefiles_object, fscache);
+ cache = container_of(object->fscache.cache,
+ struct cachefiles_cache, cache);
+
+ _enter("%p,{%lu}", object, page->index);
+
+ spin_unlock(&object->fscache.cookie->lock);
+}
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
new file mode 100644
index 000000000000..b5808cdb2232
--- /dev/null
+++ b/fs/cachefiles/security.c
@@ -0,0 +1,116 @@
+/* CacheFiles security management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/fs.h>
+#include <linux/cred.h>
+#include "internal.h"
+
+/*
+ * determine the security context within which we access the cache from within
+ * the kernel
+ */
+int cachefiles_get_security_ID(struct cachefiles_cache *cache)
+{
+ struct cred *new;
+ int ret;
+
+ _enter("{%s}", cache->secctx);
+
+ new = prepare_kernel_cred(current);
+ if (!new) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ if (cache->secctx) {
+ ret = set_security_override_from_ctx(new, cache->secctx);
+ if (ret < 0) {
+ put_cred(new);
+ printk(KERN_ERR "CacheFiles:"
+ " Security denies permission to nominate"
+ " security context: error %d\n",
+ ret);
+ goto error;
+ }
+ }
+
+ cache->cache_cred = new;
+ ret = 0;
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * see if mkdir and create can be performed in the root directory
+ */
+static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
+ struct dentry *root)
+{
+ int ret;
+
+ ret = security_inode_mkdir(root->d_inode, root, 0);
+ if (ret < 0) {
+ printk(KERN_ERR "CacheFiles:"
+ " Security denies permission to make dirs: error %d",
+ ret);
+ return ret;
+ }
+
+ ret = security_inode_create(root->d_inode, root, 0);
+ if (ret < 0)
+ printk(KERN_ERR "CacheFiles:"
+ " Security denies permission to create files: error %d",
+ ret);
+
+ return ret;
+}
+
+/*
+ * check the security details of the on-disk cache
+ * - must be called with security override in force
+ */
+int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
+ struct dentry *root,
+ const struct cred **_saved_cred)
+{
+ struct cred *new;
+ int ret;
+
+ _enter("");
+
+ /* duplicate the cache creds for COW (the override is currently in
+ * force, so we can use prepare_creds() to do this) */
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+
+ cachefiles_end_secure(cache, *_saved_cred);
+
+ /* use the cache root dir's security context as the basis with
+ * which create files */
+ ret = set_create_files_as(new, root->d_inode);
+ if (ret < 0) {
+ _leave(" = %d [cfa]", ret);
+ return ret;
+ }
+
+ put_cred(cache->cache_cred);
+ cache->cache_cred = new;
+
+ cachefiles_begin_secure(cache, _saved_cred);
+ ret = cachefiles_check_cache_dir(cache, root);
+
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
new file mode 100644
index 000000000000..f3e7a0bf068b
--- /dev/null
+++ b/fs/cachefiles/xattr.c
@@ -0,0 +1,291 @@
+/* CacheFiles extended attribute management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/quotaops.h>
+#include <linux/xattr.h>
+#include "internal.h"
+
+static const char cachefiles_xattr_cache[] =
+ XATTR_USER_PREFIX "CacheFiles.cache";
+
+/*
+ * check the type label on an object
+ * - done using xattrs
+ */
+int cachefiles_check_object_type(struct cachefiles_object *object)
+{
+ struct dentry *dentry = object->dentry;
+ char type[3], xtype[3];
+ int ret;
+
+ ASSERT(dentry);
+ ASSERT(dentry->d_inode);
+
+ if (!object->fscache.cookie)
+ strcpy(type, "C3");
+ else
+ snprintf(type, 3, "%02x", object->fscache.cookie->def->type);
+
+ _enter("%p{%s}", object, type);
+
+ /* attempt to install a type label directly */
+ ret = vfs_setxattr(dentry, cachefiles_xattr_cache, type, 2,
+ XATTR_CREATE);
+ if (ret == 0) {
+ _debug("SET"); /* we succeeded */
+ goto error;
+ }
+
+ if (ret != -EEXIST) {
+ kerror("Can't set xattr on %*.*s [%lu] (err %d)",
+ dentry->d_name.len, dentry->d_name.len,
+ dentry->d_name.name, dentry->d_inode->i_ino,
+ -ret);
+ goto error;
+ }
+
+ /* read the current type label */
+ ret = vfs_getxattr(dentry, cachefiles_xattr_cache, xtype, 3);
+ if (ret < 0) {
+ if (ret == -ERANGE)
+ goto bad_type_length;
+
+ kerror("Can't read xattr on %*.*s [%lu] (err %d)",
+ dentry->d_name.len, dentry->d_name.len,
+ dentry->d_name.name, dentry->d_inode->i_ino,
+ -ret);
+ goto error;
+ }
+
+ /* check the type is what we're expecting */
+ if (ret != 2)
+ goto bad_type_length;
+
+ if (xtype[0] != type[0] || xtype[1] != type[1])
+ goto bad_type;
+
+ ret = 0;
+
+error:
+ _leave(" = %d", ret);
+ return ret;
+
+bad_type_length:
+ kerror("Cache object %lu type xattr length incorrect",
+ dentry->d_inode->i_ino);
+ ret = -EIO;
+ goto error;
+
+bad_type:
+ xtype[2] = 0;
+ kerror("Cache object %*.*s [%lu] type %s not %s",
+ dentry->d_name.len, dentry->d_name.len,
+ dentry->d_name.name, dentry->d_inode->i_ino,
+ xtype, type);
+ ret = -EIO;
+ goto error;
+}
+
+/*
+ * set the state xattr on a cache file
+ */
+int cachefiles_set_object_xattr(struct cachefiles_object *object,
+ struct cachefiles_xattr *auxdata)
+{
+ struct dentry *dentry = object->dentry;
+ int ret;
+
+ ASSERT(object->fscache.cookie);
+ ASSERT(dentry);
+
+ _enter("%p,#%d", object, auxdata->len);
+
+ /* attempt to install the cache metadata directly */
+ _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len);
+
+ ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
+ &auxdata->type, auxdata->len,
+ XATTR_CREATE);
+ if (ret < 0 && ret != -ENOMEM)
+ cachefiles_io_error_obj(
+ object,
+ "Failed to set xattr with error %d", ret);
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * update the state xattr on a cache file
+ */
+int cachefiles_update_object_xattr(struct cachefiles_object *object,
+ struct cachefiles_xattr *auxdata)
+{
+ struct dentry *dentry = object->dentry;
+ int ret;
+
+ ASSERT(object->fscache.cookie);
+ ASSERT(dentry);
+
+ _enter("%p,#%d", object, auxdata->len);
+
+ /* attempt to install the cache metadata directly */
+ _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len);
+
+ ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
+ &auxdata->type, auxdata->len,
+ XATTR_REPLACE);
+ if (ret < 0 && ret != -ENOMEM)
+ cachefiles_io_error_obj(
+ object,
+ "Failed to update xattr with error %d", ret);
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * check the state xattr on a cache file
+ * - return -ESTALE if the object should be deleted
+ */
+int cachefiles_check_object_xattr(struct cachefiles_object *object,
+ struct cachefiles_xattr *auxdata)
+{
+ struct cachefiles_xattr *auxbuf;
+ struct dentry *dentry = object->dentry;
+ int ret;
+
+ _enter("%p,#%d", object, auxdata->len);
+
+ ASSERT(dentry);
+ ASSERT(dentry->d_inode);
+
+ auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL);
+ if (!auxbuf) {
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ /* read the current type label */
+ ret = vfs_getxattr(dentry, cachefiles_xattr_cache,
+ &auxbuf->type, 512 + 1);
+ if (ret < 0) {
+ if (ret == -ENODATA)
+ goto stale; /* no attribute - power went off
+ * mid-cull? */
+
+ if (ret == -ERANGE)
+ goto bad_type_length;
+
+ cachefiles_io_error_obj(object,
+ "Can't read xattr on %lu (err %d)",
+ dentry->d_inode->i_ino, -ret);
+ goto error;
+ }
+
+ /* check the on-disk object */
+ if (ret < 1)
+ goto bad_type_length;
+
+ if (auxbuf->type != auxdata->type)
+ goto stale;
+
+ auxbuf->len = ret;
+
+ /* consult the netfs */
+ if (object->fscache.cookie->def->check_aux) {
+ enum fscache_checkaux result;
+ unsigned int dlen;
+
+ dlen = auxbuf->len - 1;
+
+ _debug("checkaux %s #%u",
+ object->fscache.cookie->def->name, dlen);
+
+ result = fscache_check_aux(&object->fscache,
+ &auxbuf->data, dlen);
+
+ switch (result) {
+ /* entry okay as is */
+ case FSCACHE_CHECKAUX_OKAY:
+ goto okay;
+
+ /* entry requires update */
+ case FSCACHE_CHECKAUX_NEEDS_UPDATE:
+ break;
+
+ /* entry requires deletion */
+ case FSCACHE_CHECKAUX_OBSOLETE:
+ goto stale;
+
+ default:
+ BUG();
+ }
+
+ /* update the current label */
+ ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
+ &auxdata->type, auxdata->len,
+ XATTR_REPLACE);
+ if (ret < 0) {
+ cachefiles_io_error_obj(object,
+ "Can't update xattr on %lu"
+ " (error %d)",
+ dentry->d_inode->i_ino, -ret);
+ goto error;
+ }
+ }
+
+okay:
+ ret = 0;
+
+error:
+ kfree(auxbuf);
+ _leave(" = %d", ret);
+ return ret;
+
+bad_type_length:
+ kerror("Cache object %lu xattr length incorrect",
+ dentry->d_inode->i_ino);
+ ret = -EIO;
+ goto error;
+
+stale:
+ ret = -ESTALE;
+ goto error;
+}
+
+/*
+ * remove the object's xattr to mark it stale
+ */
+int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
+ struct dentry *dentry)
+{
+ int ret;
+
+ ret = vfs_removexattr(dentry, cachefiles_xattr_cache);
+ if (ret < 0) {
+ if (ret == -ENOENT || ret == -ENODATA)
+ ret = 0;
+ else if (ret != -ENOMEM)
+ cachefiles_io_error(cache,
+ "Can't remove xattr from %lu"
+ " (error %d)",
+ dentry->d_inode->i_ino, -ret);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
new file mode 100644
index 000000000000..9bbb8ce7bea0
--- /dev/null
+++ b/fs/fscache/Kconfig
@@ -0,0 +1,56 @@
+
+config FSCACHE
+ tristate "General filesystem local caching manager"
+ depends on EXPERIMENTAL
+ select SLOW_WORK
+ help
+ This option enables a generic filesystem caching manager that can be
+ used by various network and other filesystems to cache data locally.
+ Different sorts of caches can be plugged in, depending on the
+ resources available.
+
+ See Documentation/filesystems/caching/fscache.txt for more information.
+
+config FSCACHE_STATS
+ bool "Gather statistical information on local caching"
+ depends on FSCACHE && PROC_FS
+ help
+ This option causes statistical information to be gathered on local
+ caching and exported through file:
+
+ /proc/fs/fscache/stats
+
+ The gathering of statistics adds a certain amount of overhead to
+ execution as there are a quite a few stats gathered, and on a
+ multi-CPU system these may be on cachelines that keep bouncing
+ between CPUs. On the other hand, the stats are very useful for
+ debugging purposes. Saying 'Y' here is recommended.
+
+ See Documentation/filesystems/caching/fscache.txt for more information.
+
+config FSCACHE_HISTOGRAM
+ bool "Gather latency information on local caching"
+ depends on FSCACHE && PROC_FS
+ help
+ This option causes latency information to be gathered on local
+ caching and exported through file:
+
+ /proc/fs/fscache/histogram
+
+ The generation of this histogram adds a certain amount of overhead to
+ execution as there are a number of points at which data is gathered,
+ and on a multi-CPU system these may be on cachelines that keep
+ bouncing between CPUs. On the other hand, the histogram may be
+ useful for debugging purposes. Saying 'N' here is recommended.
+
+ See Documentation/filesystems/caching/fscache.txt for more information.
+
+config FSCACHE_DEBUG
+ bool "Debug FS-Cache"
+ depends on FSCACHE
+ help
+ This permits debugging to be dynamically enabled in the local caching
+ management module. If this is set, the debugging output may be
+ enabled by setting bits in /sys/modules/fscache/parameter/debug.
+
+ See Documentation/filesystems/caching/fscache.txt for more information.
diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile
new file mode 100644
index 000000000000..91571b95aacc
--- /dev/null
+++ b/fs/fscache/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for general filesystem caching code
+#
+
+fscache-y := \
+ cache.o \
+ cookie.o \
+ fsdef.o \
+ main.o \
+ netfs.o \
+ object.o \
+ operation.o \
+ page.o
+
+fscache-$(CONFIG_PROC_FS) += proc.o
+fscache-$(CONFIG_FSCACHE_STATS) += stats.o
+fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o
+
+obj-$(CONFIG_FSCACHE) := fscache.o
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
new file mode 100644
index 000000000000..e21985bbb1fb
--- /dev/null
+++ b/fs/fscache/cache.c
@@ -0,0 +1,415 @@
+/* FS-Cache cache handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define FSCACHE_DEBUG_LEVEL CACHE
+#include <linux/module.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+LIST_HEAD(fscache_cache_list);
+DECLARE_RWSEM(fscache_addremove_sem);
+DECLARE_WAIT_QUEUE_HEAD(fscache_cache_cleared_wq);
+EXPORT_SYMBOL(fscache_cache_cleared_wq);
+
+static LIST_HEAD(fscache_cache_tag_list);
+
+/*
+ * look up a cache tag
+ */
+struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *name)
+{
+ struct fscache_cache_tag *tag, *xtag;
+
+ /* firstly check for the existence of the tag under read lock */
+ down_read(&fscache_addremove_sem);
+
+ list_for_each_entry(tag, &fscache_cache_tag_list, link) {
+ if (strcmp(tag->name, name) == 0) {
+ atomic_inc(&tag->usage);
+ up_read(&fscache_addremove_sem);
+ return tag;
+ }
+ }
+
+ up_read(&fscache_addremove_sem);
+
+ /* the tag does not exist - create a candidate */
+ xtag = kzalloc(sizeof(*xtag) + strlen(name) + 1, GFP_KERNEL);
+ if (!xtag)
+ /* return a dummy tag if out of memory */
+ return ERR_PTR(-ENOMEM);
+
+ atomic_set(&xtag->usage, 1);
+ strcpy(xtag->name, name);
+
+ /* write lock, search again and add if still not present */
+ down_write(&fscache_addremove_sem);
+
+ list_for_each_entry(tag, &fscache_cache_tag_list, link) {
+ if (strcmp(tag->name, name) == 0) {
+ atomic_inc(&tag->usage);
+ up_write(&fscache_addremove_sem);
+ kfree(xtag);
+ return tag;
+ }
+ }
+
+ list_add_tail(&xtag->link, &fscache_cache_tag_list);
+ up_write(&fscache_addremove_sem);
+ return xtag;
+}
+
+/*
+ * release a reference to a cache tag
+ */
+void __fscache_release_cache_tag(struct fscache_cache_tag *tag)
+{
+ if (tag != ERR_PTR(-ENOMEM)) {
+ down_write(&fscache_addremove_sem);
+
+ if (atomic_dec_and_test(&tag->usage))
+ list_del_init(&tag->link);
+ else
+ tag = NULL;
+
+ up_write(&fscache_addremove_sem);
+
+ kfree(tag);
+ }
+}
+
+/*
+ * select a cache in which to store an object
+ * - the cache addremove semaphore must be at least read-locked by the caller
+ * - the object will never be an index
+ */
+struct fscache_cache *fscache_select_cache_for_object(
+ struct fscache_cookie *cookie)
+{
+ struct fscache_cache_tag *tag;
+ struct fscache_object *object;
+ struct fscache_cache *cache;
+
+ _enter("");
+
+ if (list_empty(&fscache_cache_list)) {
+ _leave(" = NULL [no cache]");
+ return NULL;
+ }
+
+ /* we check the parent to determine the cache to use */
+ spin_lock(&cookie->lock);
+
+ /* the first in the parent's backing list should be the preferred
+ * cache */
+ if (!hlist_empty(&cookie->backing_objects)) {
+ object = hlist_entry(cookie->backing_objects.first,
+ struct fscache_object, cookie_link);
+
+ cache = object->cache;
+ if (object->state >= FSCACHE_OBJECT_DYING ||
+ test_bit(FSCACHE_IOERROR, &cache->flags))
+ cache = NULL;
+
+ spin_unlock(&cookie->lock);
+ _leave(" = %p [parent]", cache);
+ return cache;
+ }
+
+ /* the parent is unbacked */
+ if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
+ /* cookie not an index and is unbacked */
+ spin_unlock(&cookie->lock);
+ _leave(" = NULL [cookie ub,ni]");
+ return NULL;
+ }
+
+ spin_unlock(&cookie->lock);
+
+ if (!cookie->def->select_cache)
+ goto no_preference;
+
+ /* ask the netfs for its preference */
+ tag = cookie->def->select_cache(cookie->parent->netfs_data,
+ cookie->netfs_data);
+ if (!tag)
+ goto no_preference;
+
+ if (tag == ERR_PTR(-ENOMEM)) {
+ _leave(" = NULL [nomem tag]");
+ return NULL;
+ }
+
+ if (!tag->cache) {
+ _leave(" = NULL [unbacked tag]");
+ return NULL;
+ }
+
+ if (test_bit(FSCACHE_IOERROR, &tag->cache->flags))
+ return NULL;
+
+ _leave(" = %p [specific]", tag->cache);
+ return tag->cache;
+
+no_preference:
+ /* netfs has no preference - just select first cache */
+ cache = list_entry(fscache_cache_list.next,
+ struct fscache_cache, link);
+ _leave(" = %p [first]", cache);
+ return cache;
+}
+
+/**
+ * fscache_init_cache - Initialise a cache record
+ * @cache: The cache record to be initialised
+ * @ops: The cache operations to be installed in that record
+ * @idfmt: Format string to define identifier
+ * @...: sprintf-style arguments
+ *
+ * Initialise a record of a cache and fill in the name.
+ *
+ * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * description.
+ */
+void fscache_init_cache(struct fscache_cache *cache,
+ const struct fscache_cache_ops *ops,
+ const char *idfmt,
+ ...)
+{
+ va_list va;
+
+ memset(cache, 0, sizeof(*cache));
+
+ cache->ops = ops;
+
+ va_start(va, idfmt);
+ vsnprintf(cache->identifier, sizeof(cache->identifier), idfmt, va);
+ va_end(va);
+
+ INIT_WORK(&cache->op_gc, fscache_operation_gc);
+ INIT_LIST_HEAD(&cache->link);
+ INIT_LIST_HEAD(&cache->object_list);
+ INIT_LIST_HEAD(&cache->op_gc_list);
+ spin_lock_init(&cache->object_list_lock);
+ spin_lock_init(&cache->op_gc_list_lock);
+}
+EXPORT_SYMBOL(fscache_init_cache);
+
+/**
+ * fscache_add_cache - Declare a cache as being open for business
+ * @cache: The record describing the cache
+ * @ifsdef: The record of the cache object describing the top-level index
+ * @tagname: The tag describing this cache
+ *
+ * Add a cache to the system, making it available for netfs's to use.
+ *
+ * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * description.
+ */
+int fscache_add_cache(struct fscache_cache *cache,
+ struct fscache_object *ifsdef,
+ const char *tagname)
+{
+ struct fscache_cache_tag *tag;
+
+ BUG_ON(!cache->ops);
+ BUG_ON(!ifsdef);
+
+ cache->flags = 0;
+ ifsdef->event_mask = ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED);
+ ifsdef->state = FSCACHE_OBJECT_ACTIVE;
+
+ if (!tagname)
+ tagname = cache->identifier;
+
+ BUG_ON(!tagname[0]);
+
+ _enter("{%s.%s},,%s", cache->ops->name, cache->identifier, tagname);
+
+ /* we use the cache tag to uniquely identify caches */
+ tag = __fscache_lookup_cache_tag(tagname);
+ if (IS_ERR(tag))
+ goto nomem;
+
+ if (test_and_set_bit(FSCACHE_TAG_RESERVED, &tag->flags))
+ goto tag_in_use;
+
+ cache->kobj = kobject_create_and_add(tagname, fscache_root);
+ if (!cache->kobj)
+ goto error;
+
+ ifsdef->cookie = &fscache_fsdef_index;
+ ifsdef->cache = cache;
+ cache->fsdef = ifsdef;
+
+ down_write(&fscache_addremove_sem);
+
+ tag->cache = cache;
+ cache->tag = tag;
+
+ /* add the cache to the list */
+ list_add(&cache->link, &fscache_cache_list);
+
+ /* add the cache's netfs definition index object to the cache's
+ * list */
+ spin_lock(&cache->object_list_lock);
+ list_add_tail(&ifsdef->cache_link, &cache->object_list);
+ spin_unlock(&cache->object_list_lock);
+
+ /* add the cache's netfs definition index object to the top level index
+ * cookie as a known backing object */
+ spin_lock(&fscache_fsdef_index.lock);
+
+ hlist_add_head(&ifsdef->cookie_link,
+ &fscache_fsdef_index.backing_objects);
+
+ atomic_inc(&fscache_fsdef_index.usage);
+
+ /* done */
+ spin_unlock(&fscache_fsdef_index.lock);
+ up_write(&fscache_addremove_sem);
+
+ printk(KERN_NOTICE "FS-Cache: Cache \"%s\" added (type %s)\n",
+ cache->tag->name, cache->ops->name);
+ kobject_uevent(cache->kobj, KOBJ_ADD);
+
+ _leave(" = 0 [%s]", cache->identifier);
+ return 0;
+
+tag_in_use:
+ printk(KERN_ERR "FS-Cache: Cache tag '%s' already in use\n", tagname);
+ __fscache_release_cache_tag(tag);
+ _leave(" = -EXIST");
+ return -EEXIST;
+
+error:
+ __fscache_release_cache_tag(tag);
+ _leave(" = -EINVAL");
+ return -EINVAL;
+
+nomem:
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(fscache_add_cache);
+
+/**
+ * fscache_io_error - Note a cache I/O error
+ * @cache: The record describing the cache
+ *
+ * Note that an I/O error occurred in a cache and that it should no longer be
+ * used for anything. This also reports the error into the kernel log.
+ *
+ * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * description.
+ */
+void fscache_io_error(struct fscache_cache *cache)
+{
+ set_bit(FSCACHE_IOERROR, &cache->flags);
+
+ printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n",
+ cache->ops->name);
+}
+EXPORT_SYMBOL(fscache_io_error);
+
+/*
+ * request withdrawal of all the objects in a cache
+ * - all the objects being withdrawn are moved onto the supplied list
+ */
+static void fscache_withdraw_all_objects(struct fscache_cache *cache,
+ struct list_head *dying_objects)
+{
+ struct fscache_object *object;
+
+ spin_lock(&cache->object_list_lock);
+
+ while (!list_empty(&cache->object_list)) {
+ object = list_entry(cache->object_list.next,
+ struct fscache_object, cache_link);
+ list_move_tail(&object->cache_link, dying_objects);
+
+ _debug("withdraw %p", object->cookie);
+
+ spin_lock(&object->lock);
+ spin_unlock(&cache->object_list_lock);
+ fscache_raise_event(object, FSCACHE_OBJECT_EV_WITHDRAW);
+ spin_unlock(&object->lock);
+
+ cond_resched();
+ spin_lock(&cache->object_list_lock);
+ }
+
+ spin_unlock(&cache->object_list_lock);
+}
+
+/**
+ * fscache_withdraw_cache - Withdraw a cache from the active service
+ * @cache: The record describing the cache
+ *
+ * Withdraw a cache from service, unbinding all its cache objects from the
+ * netfs cookies they're currently representing.
+ *
+ * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * description.
+ */
+void fscache_withdraw_cache(struct fscache_cache *cache)
+{
+ LIST_HEAD(dying_objects);
+
+ _enter("");
+
+ printk(KERN_NOTICE "FS-Cache: Withdrawing cache \"%s\"\n",
+ cache->tag->name);
+
+ /* make the cache unavailable for cookie acquisition */
+ if (test_and_set_bit(FSCACHE_CACHE_WITHDRAWN, &cache->flags))
+ BUG();
+
+ down_write(&fscache_addremove_sem);
+ list_del_init(&cache->link);
+ cache->tag->cache = NULL;
+ up_write(&fscache_addremove_sem);
+
+ /* make sure all pages pinned by operations on behalf of the netfs are
+ * written to disk */
+ cache->ops->sync_cache(cache);
+
+ /* dissociate all the netfs pages backed by this cache from the block
+ * mappings in the cache */
+ cache->ops->dissociate_pages(cache);
+
+ /* we now have to destroy all the active objects pertaining to this
+ * cache - which we do by passing them off to thread pool to be
+ * disposed of */
+ _debug("destroy");
+
+ fscache_withdraw_all_objects(cache, &dying_objects);
+
+ /* wait for all extant objects to finish their outstanding operations
+ * and go away */
+ _debug("wait for finish");
+ wait_event(fscache_cache_cleared_wq,
+ atomic_read(&cache->object_count) == 0);
+ _debug("wait for clearance");
+ wait_event(fscache_cache_cleared_wq,
+ list_empty(&cache->object_list));
+ _debug("cleared");
+ ASSERT(list_empty(&dying_objects));
+
+ kobject_put(cache->kobj);
+
+ clear_bit(FSCACHE_TAG_RESERVED, &cache->tag->flags);
+ fscache_release_cache_tag(cache->tag);
+ cache->tag = NULL;
+
+ _leave("");
+}
+EXPORT_SYMBOL(fscache_withdraw_cache);
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
new file mode 100644
index 000000000000..72fd18f6c71f
--- /dev/null
+++ b/fs/fscache/cookie.c
@@ -0,0 +1,500 @@
+/* netfs cookie management
+ *
+ * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for more information on
+ * the netfs API.
+ */
+
+#define FSCACHE_DEBUG_LEVEL COOKIE
+#include <linux/module.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+struct kmem_cache *fscache_cookie_jar;
+
+static atomic_t fscache_object_debug_id = ATOMIC_INIT(0);
+
+static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie);
+static int fscache_alloc_object(struct fscache_cache *cache,
+ struct fscache_cookie *cookie);
+static int fscache_attach_object(struct fscache_cookie *cookie,
+ struct fscache_object *object);
+
+/*
+ * initialise an cookie jar slab element prior to any use
+ */
+void fscache_cookie_init_once(void *_cookie)
+{
+ struct fscache_cookie *cookie = _cookie;
+
+ memset(cookie, 0, sizeof(*cookie));
+ spin_lock_init(&cookie->lock);
+ INIT_HLIST_HEAD(&cookie->backing_objects);
+}
+
+/*
+ * request a cookie to represent an object (index, datafile, xattr, etc)
+ * - parent specifies the parent object
+ * - the top level index cookie for each netfs is stored in the fscache_netfs
+ * struct upon registration
+ * - def points to the definition
+ * - the netfs_data will be passed to the functions pointed to in *def
+ * - all attached caches will be searched to see if they contain this object
+ * - index objects aren't stored on disk until there's a dependent file that
+ * needs storing
+ * - other objects are stored in a selected cache immediately, and all the
+ * indices forming the path to it are instantiated if necessary
+ * - we never let on to the netfs about errors
+ * - we may set a negative cookie pointer, but that's okay
+ */
+struct fscache_cookie *__fscache_acquire_cookie(
+ struct fscache_cookie *parent,
+ const struct fscache_cookie_def *def,
+ void *netfs_data)
+{
+ struct fscache_cookie *cookie;
+
+ BUG_ON(!def);
+
+ _enter("{%s},{%s},%p",
+ parent ? (char *) parent->def->name : "<no-parent>",
+ def->name, netfs_data);
+
+ fscache_stat(&fscache_n_acquires);
+
+ /* if there's no parent cookie, then we don't create one here either */
+ if (!parent) {
+ fscache_stat(&fscache_n_acquires_null);
+ _leave(" [no parent]");
+ return NULL;
+ }
+
+ /* validate the definition */
+ BUG_ON(!def->get_key);
+ BUG_ON(!def->name[0]);
+
+ BUG_ON(def->type == FSCACHE_COOKIE_TYPE_INDEX &&
+ parent->def->type != FSCACHE_COOKIE_TYPE_INDEX);
+
+ /* allocate and initialise a cookie */
+ cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL);
+ if (!cookie) {
+ fscache_stat(&fscache_n_acquires_oom);
+ _leave(" [ENOMEM]");
+ return NULL;
+ }
+
+ atomic_set(&cookie->usage, 1);
+ atomic_set(&cookie->n_children, 0);
+
+ atomic_inc(&parent->usage);
+ atomic_inc(&parent->n_children);
+
+ cookie->def = def;
+ cookie->parent = parent;
+ cookie->netfs_data = netfs_data;
+ cookie->flags = 0;
+
+ INIT_RADIX_TREE(&cookie->stores, GFP_NOFS);
+
+ switch (cookie->def->type) {
+ case FSCACHE_COOKIE_TYPE_INDEX:
+ fscache_stat(&fscache_n_cookie_index);
+ break;
+ case FSCACHE_COOKIE_TYPE_DATAFILE:
+ fscache_stat(&fscache_n_cookie_data);
+ break;
+ default:
+ fscache_stat(&fscache_n_cookie_special);
+ break;
+ }
+
+ /* if the object is an index then we need do nothing more here - we
+ * create indices on disk when we need them as an index may exist in
+ * multiple caches */
+ if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
+ if (fscache_acquire_non_index_cookie(cookie) < 0) {
+ atomic_dec(&parent->n_children);
+ __fscache_cookie_put(cookie);
+ fscache_stat(&fscache_n_acquires_nobufs);
+ _leave(" = NULL");
+ return NULL;
+ }
+ }
+
+ fscache_stat(&fscache_n_acquires_ok);
+ _leave(" = %p", cookie);
+ return cookie;
+}
+EXPORT_SYMBOL(__fscache_acquire_cookie);
+
+/*
+ * acquire a non-index cookie
+ * - this must make sure the index chain is instantiated and instantiate the
+ * object representation too
+ */
+static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
+{
+ struct fscache_object *object;
+ struct fscache_cache *cache;
+ uint64_t i_size;
+ int ret;
+
+ _enter("");
+
+ cookie->flags = 1 << FSCACHE_COOKIE_UNAVAILABLE;
+
+ /* now we need to see whether the backing objects for this cookie yet
+ * exist, if not there'll be nothing to search */
+ down_read(&fscache_addremove_sem);
+
+ if (list_empty(&fscache_cache_list)) {
+ up_read(&fscache_addremove_sem);
+ _leave(" = 0 [no caches]");
+ return 0;
+ }
+
+ /* select a cache in which to store the object */
+ cache = fscache_select_cache_for_object(cookie->parent);
+ if (!cache) {
+ up_read(&fscache_addremove_sem);
+ fscache_stat(&fscache_n_acquires_no_cache);
+ _leave(" = -ENOMEDIUM [no cache]");
+ return -ENOMEDIUM;
+ }
+
+ _debug("cache %s", cache->tag->name);
+
+ cookie->flags =
+ (1 << FSCACHE_COOKIE_LOOKING_UP) |
+ (1 << FSCACHE_COOKIE_CREATING) |
+ (1 << FSCACHE_COOKIE_NO_DATA_YET);
+
+ /* ask the cache to allocate objects for this cookie and its parent
+ * chain */
+ ret = fscache_alloc_object(cache, cookie);
+ if (ret < 0) {
+ up_read(&fscache_addremove_sem);
+ _leave(" = %d", ret);
+ return ret;
+ }
+
+ /* pass on how big the object we're caching is supposed to be */
+ cookie->def->get_attr(cookie->netfs_data, &i_size);
+
+ spin_lock(&cookie->lock);
+ if (hlist_empty(&cookie->backing_objects)) {
+ spin_unlock(&cookie->lock);
+ goto unavailable;
+ }
+
+ object = hlist_entry(cookie->backing_objects.first,
+ struct fscache_object, cookie_link);
+
+ fscache_set_store_limit(object, i_size);
+
+ /* initiate the process of looking up all the objects in the chain
+ * (done by fscache_initialise_object()) */
+ fscache_enqueue_object(object);
+
+ spin_unlock(&cookie->lock);
+
+ /* we may be required to wait for lookup to complete at this point */
+ if (!fscache_defer_lookup) {
+ _debug("non-deferred lookup %p", &cookie->flags);
+ wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
+ fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ _debug("complete");
+ if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags))
+ goto unavailable;
+ }
+
+ up_read(&fscache_addremove_sem);
+ _leave(" = 0 [deferred]");
+ return 0;
+
+unavailable:
+ up_read(&fscache_addremove_sem);
+ _leave(" = -ENOBUFS");
+ return -ENOBUFS;
+}
+
+/*
+ * recursively allocate cache object records for a cookie/cache combination
+ * - caller must be holding the addremove sem
+ */
+static int fscache_alloc_object(struct fscache_cache *cache,
+ struct fscache_cookie *cookie)
+{
+ struct fscache_object *object;
+ struct hlist_node *_n;
+ int ret;
+
+ _enter("%p,%p{%s}", cache, cookie, cookie->def->name);
+
+ spin_lock(&cookie->lock);
+ hlist_for_each_entry(object, _n, &cookie->backing_objects,
+ cookie_link) {
+ if (object->cache == cache)
+ goto object_already_extant;
+ }
+ spin_unlock(&cookie->lock);
+
+ /* ask the cache to allocate an object (we may end up with duplicate
+ * objects at this stage, but we sort that out later) */
+ object = cache->ops->alloc_object(cache, cookie);
+ if (IS_ERR(object)) {
+ fscache_stat(&fscache_n_object_no_alloc);
+ ret = PTR_ERR(object);
+ goto error;
+ }
+
+ fscache_stat(&fscache_n_object_alloc);
+
+ object->debug_id = atomic_inc_return(&fscache_object_debug_id);
+
+ _debug("ALLOC OBJ%x: %s {%lx}",
+ object->debug_id, cookie->def->name, object->events);
+
+ ret = fscache_alloc_object(cache, cookie->parent);
+ if (ret < 0)
+ goto error_put;
+
+ /* only attach if we managed to allocate all we needed, otherwise
+ * discard the object we just allocated and instead use the one
+ * attached to the cookie */
+ if (fscache_attach_object(cookie, object) < 0)
+ cache->ops->put_object(object);
+
+ _leave(" = 0");
+ return 0;
+
+object_already_extant:
+ ret = -ENOBUFS;
+ if (object->state >= FSCACHE_OBJECT_DYING) {
+ spin_unlock(&cookie->lock);
+ goto error;
+ }
+ spin_unlock(&cookie->lock);
+ _leave(" = 0 [found]");
+ return 0;
+
+error_put:
+ cache->ops->put_object(object);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * attach a cache object to a cookie
+ */
+static int fscache_attach_object(struct fscache_cookie *cookie,
+ struct fscache_object *object)
+{
+ struct fscache_object *p;
+ struct fscache_cache *cache = object->cache;
+ struct hlist_node *_n;
+ int ret;
+
+ _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id);
+
+ spin_lock(&cookie->lock);
+
+ /* there may be multiple initial creations of this object, but we only
+ * want one */
+ ret = -EEXIST;
+ hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) {
+ if (p->cache == object->cache) {
+ if (p->state >= FSCACHE_OBJECT_DYING)
+ ret = -ENOBUFS;
+ goto cant_attach_object;
+ }
+ }
+
+ /* pin the parent object */
+ spin_lock_nested(&cookie->parent->lock, 1);
+ hlist_for_each_entry(p, _n, &cookie->parent->backing_objects,
+ cookie_link) {
+ if (p->cache == object->cache) {
+ if (p->state >= FSCACHE_OBJECT_DYING) {
+ ret = -ENOBUFS;
+ spin_unlock(&cookie->parent->lock);
+ goto cant_attach_object;
+ }
+ object->parent = p;
+ spin_lock(&p->lock);
+ p->n_children++;
+ spin_unlock(&p->lock);
+ break;
+ }
+ }
+ spin_unlock(&cookie->parent->lock);
+
+ /* attach to the cache's object list */
+ if (list_empty(&object->cache_link)) {
+ spin_lock(&cache->object_list_lock);
+ list_add(&object->cache_link, &cache->object_list);
+ spin_unlock(&cache->object_list_lock);
+ }
+
+ /* attach to the cookie */
+ object->cookie = cookie;
+ atomic_inc(&cookie->usage);
+ hlist_add_head(&object->cookie_link, &cookie->backing_objects);
+ ret = 0;
+
+cant_attach_object:
+ spin_unlock(&cookie->lock);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * update the index entries backing a cookie
+ */
+void __fscache_update_cookie(struct fscache_cookie *cookie)
+{
+ struct fscache_object *object;
+ struct hlist_node *_p;
+
+ fscache_stat(&fscache_n_updates);
+
+ if (!cookie) {
+ fscache_stat(&fscache_n_updates_null);
+ _leave(" [no cookie]");
+ return;
+ }
+
+ _enter("{%s}", cookie->def->name);
+
+ BUG_ON(!cookie->def->get_aux);
+
+ spin_lock(&cookie->lock);
+
+ /* update the index entry on disk in each cache backing this cookie */
+ hlist_for_each_entry(object, _p,
+ &cookie->backing_objects, cookie_link) {
+ fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
+ }
+
+ spin_unlock(&cookie->lock);
+ _leave("");
+}
+EXPORT_SYMBOL(__fscache_update_cookie);
+
+/*
+ * release a cookie back to the cache
+ * - the object will be marked as recyclable on disk if retire is true
+ * - all dependents of this cookie must have already been unregistered
+ * (indices/files/pages)
+ */
+void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
+{
+ struct fscache_cache *cache;
+ struct fscache_object *object;
+ unsigned long event;
+
+ fscache_stat(&fscache_n_relinquishes);
+
+ if (!cookie) {
+ fscache_stat(&fscache_n_relinquishes_null);
+ _leave(" [no cookie]");
+ return;
+ }
+
+ _enter("%p{%s,%p},%d",
+ cookie, cookie->def->name, cookie->netfs_data, retire);
+
+ if (atomic_read(&cookie->n_children) != 0) {
+ printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n",
+ cookie->def->name);
+ BUG();
+ }
+
+ /* wait for the cookie to finish being instantiated (or to fail) */
+ if (test_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) {
+ fscache_stat(&fscache_n_relinquishes_waitcrt);
+ wait_on_bit(&cookie->flags, FSCACHE_COOKIE_CREATING,
+ fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ }
+
+ event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE;
+
+ /* detach pointers back to the netfs */
+ spin_lock(&cookie->lock);
+
+ cookie->netfs_data = NULL;
+ cookie->def = NULL;
+
+ /* break links with all the active objects */
+ while (!hlist_empty(&cookie->backing_objects)) {
+ object = hlist_entry(cookie->backing_objects.first,
+ struct fscache_object,
+ cookie_link);
+
+ _debug("RELEASE OBJ%x", object->debug_id);
+
+ /* detach each cache object from the object cookie */
+ spin_lock(&object->lock);
+ hlist_del_init(&object->cookie_link);
+
+ cache = object->cache;
+ object->cookie = NULL;
+ fscache_raise_event(object, event);
+ spin_unlock(&object->lock);
+
+ if (atomic_dec_and_test(&cookie->usage))
+ /* the cookie refcount shouldn't be reduced to 0 yet */
+ BUG();
+ }
+
+ spin_unlock(&cookie->lock);
+
+ if (cookie->parent) {
+ ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0);
+ ASSERTCMP(atomic_read(&cookie->parent->n_children), >, 0);
+ atomic_dec(&cookie->parent->n_children);
+ }
+
+ /* finally dispose of the cookie */
+ ASSERTCMP(atomic_read(&cookie->usage), >, 0);
+ fscache_cookie_put(cookie);
+
+ _leave("");
+}
+EXPORT_SYMBOL(__fscache_relinquish_cookie);
+
+/*
+ * destroy a cookie
+ */
+void __fscache_cookie_put(struct fscache_cookie *cookie)
+{
+ struct fscache_cookie *parent;
+
+ _enter("%p", cookie);
+
+ for (;;) {
+ _debug("FREE COOKIE %p", cookie);
+ parent = cookie->parent;
+ BUG_ON(!hlist_empty(&cookie->backing_objects));
+ kmem_cache_free(fscache_cookie_jar, cookie);
+
+ if (!parent)
+ break;
+
+ cookie = parent;
+ BUG_ON(atomic_read(&cookie->usage) <= 0);
+ if (!atomic_dec_and_test(&cookie->usage))
+ break;
+ }
+
+ _leave("");
+}
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c
new file mode 100644
index 000000000000..f5b4baee7352
--- /dev/null
+++ b/fs/fscache/fsdef.c
@@ -0,0 +1,144 @@
+/* Filesystem index definition
+ *
+ * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define FSCACHE_DEBUG_LEVEL CACHE
+#include <linux/module.h>
+#include "internal.h"
+
+static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax);
+
+static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax);
+
+static
+enum fscache_checkaux fscache_fsdef_netfs_check_aux(void *cookie_netfs_data,
+ const void *data,
+ uint16_t datalen);
+
+/*
+ * The root index is owned by FS-Cache itself.
+ *
+ * When a netfs requests caching facilities, FS-Cache will, if one doesn't
+ * already exist, create an entry in the root index with the key being the name
+ * of the netfs ("AFS" for example), and the auxiliary data holding the index
+ * structure version supplied by the netfs:
+ *
+ * FSDEF
+ * |
+ * +-----------+
+ * | |
+ * NFS AFS
+ * [v=1] [v=1]
+ *
+ * If an entry with the appropriate name does already exist, the version is
+ * compared. If the version is different, the entire subtree from that entry
+ * will be discarded and a new entry created.
+ *
+ * The new entry will be an index, and a cookie referring to it will be passed
+ * to the netfs. This is then the root handle by which the netfs accesses the
+ * cache. It can create whatever objects it likes in that index, including
+ * further indices.
+ */
+static struct fscache_cookie_def fscache_fsdef_index_def = {
+ .name = ".FS-Cache",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+};
+
+struct fscache_cookie fscache_fsdef_index = {
+ .usage = ATOMIC_INIT(1),
+ .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock),
+ .backing_objects = HLIST_HEAD_INIT,
+ .def = &fscache_fsdef_index_def,
+};
+EXPORT_SYMBOL(fscache_fsdef_index);
+
+/*
+ * Definition of an entry in the root index. Each entry is an index, keyed to
+ * a specific netfs and only applicable to a particular version of the index
+ * structure used by that netfs.
+ */
+struct fscache_cookie_def fscache_fsdef_netfs_def = {
+ .name = "FSDEF.netfs",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = fscache_fsdef_netfs_get_key,
+ .get_aux = fscache_fsdef_netfs_get_aux,
+ .check_aux = fscache_fsdef_netfs_check_aux,
+};
+
+/*
+ * get the key data for an FSDEF index record - this is the name of the netfs
+ * for which this entry is created
+ */
+static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ const struct fscache_netfs *netfs = cookie_netfs_data;
+ unsigned klen;
+
+ _enter("{%s.%u},", netfs->name, netfs->version);
+
+ klen = strlen(netfs->name);
+ if (klen > bufmax)
+ return 0;
+
+ memcpy(buffer, netfs->name, klen);
+ return klen;
+}
+
+/*
+ * get the auxiliary data for an FSDEF index record - this is the index
+ * structure version number of the netfs for which this version is created
+ */
+static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ const struct fscache_netfs *netfs = cookie_netfs_data;
+ unsigned dlen;
+
+ _enter("{%s.%u},", netfs->name, netfs->version);
+
+ dlen = sizeof(uint32_t);
+ if (dlen > bufmax)
+ return 0;
+
+ memcpy(buffer, &netfs->version, dlen);
+ return dlen;
+}
+
+/*
+ * check that the index structure version number stored in the auxiliary data
+ * matches the one the netfs gave us
+ */
+static enum fscache_checkaux fscache_fsdef_netfs_check_aux(
+ void *cookie_netfs_data,
+ const void *data,
+ uint16_t datalen)
+{
+ struct fscache_netfs *netfs = cookie_netfs_data;
+ uint32_t version;
+
+ _enter("{%s},,%hu", netfs->name, datalen);
+
+ if (datalen != sizeof(version)) {
+ _leave(" = OBSOLETE [dl=%d v=%zu]", datalen, sizeof(version));
+ return FSCACHE_CHECKAUX_OBSOLETE;
+ }
+
+ memcpy(&version, data, sizeof(version));
+ if (version != netfs->version) {
+ _leave(" = OBSOLETE [ver=%x net=%x]", version, netfs->version);
+ return FSCACHE_CHECKAUX_OBSOLETE;
+ }
+
+ _leave(" = OKAY");
+ return FSCACHE_CHECKAUX_OKAY;
+}
diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c
new file mode 100644
index 000000000000..bad496748a59
--- /dev/null
+++ b/fs/fscache/histogram.c
@@ -0,0 +1,109 @@
+/* FS-Cache latency histogram
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define FSCACHE_DEBUG_LEVEL THREAD
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include "internal.h"
+
+atomic_t fscache_obj_instantiate_histogram[HZ];
+atomic_t fscache_objs_histogram[HZ];
+atomic_t fscache_ops_histogram[HZ];
+atomic_t fscache_retrieval_delay_histogram[HZ];
+atomic_t fscache_retrieval_histogram[HZ];
+
+/*
+ * display the time-taken histogram
+ */
+static int fscache_histogram_show(struct seq_file *m, void *v)
+{
+ unsigned long index;
+ unsigned n[5], t;
+
+ switch ((unsigned long) v) {
+ case 1:
+ seq_puts(m, "JIFS SECS OBJ INST OP RUNS OBJ RUNS "
+ " RETRV DLY RETRIEVLS\n");
+ return 0;
+ case 2:
+ seq_puts(m, "===== ===== ========= ========= ========="
+ " ========= =========\n");
+ return 0;
+ default:
+ index = (unsigned long) v - 3;
+ n[0] = atomic_read(&fscache_obj_instantiate_histogram[index]);
+ n[1] = atomic_read(&fscache_ops_histogram[index]);
+ n[2] = atomic_read(&fscache_objs_histogram[index]);
+ n[3] = atomic_read(&fscache_retrieval_delay_histogram[index]);
+ n[4] = atomic_read(&fscache_retrieval_histogram[index]);
+ if (!(n[0] | n[1] | n[2] | n[3] | n[4]))
+ return 0;
+
+ t = (index * 1000) / HZ;
+
+ seq_printf(m, "%4lu 0.%03u %9u %9u %9u %9u %9u\n",
+ index, t, n[0], n[1], n[2], n[3], n[4]);
+ return 0;
+ }
+}
+
+/*
+ * set up the iterator to start reading from the first line
+ */
+static void *fscache_histogram_start(struct seq_file *m, loff_t *_pos)
+{
+ if ((unsigned long long)*_pos >= HZ + 2)
+ return NULL;
+ if (*_pos == 0)
+ *_pos = 1;
+ return (void *)(unsigned long) *_pos;
+}
+
+/*
+ * move to the next line
+ */
+static void *fscache_histogram_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return (unsigned long long)*pos > HZ + 2 ?
+ NULL : (void *)(unsigned long) *pos;
+}
+
+/*
+ * clean up after reading
+ */
+static void fscache_histogram_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations fscache_histogram_ops = {
+ .start = fscache_histogram_start,
+ .stop = fscache_histogram_stop,
+ .next = fscache_histogram_next,
+ .show = fscache_histogram_show,
+};
+
+/*
+ * open "/proc/fs/fscache/histogram" to provide latency data
+ */
+static int fscache_histogram_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &fscache_histogram_ops);
+}
+
+const struct file_operations fscache_histogram_fops = {
+ .owner = THIS_MODULE,
+ .open = fscache_histogram_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
new file mode 100644
index 000000000000..e0cbd16f6dc9
--- /dev/null
+++ b/fs/fscache/internal.h
@@ -0,0 +1,380 @@
+/* Internal definitions for FS-Cache
+ *
+ * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Lock order, in the order in which multiple locks should be obtained:
+ * - fscache_addremove_sem
+ * - cookie->lock
+ * - cookie->parent->lock
+ * - cache->object_list_lock
+ * - object->lock
+ * - object->parent->lock
+ * - fscache_thread_lock
+ *
+ */
+
+#include <linux/fscache-cache.h>
+#include <linux/sched.h>
+
+#define FSCACHE_MIN_THREADS 4
+#define FSCACHE_MAX_THREADS 32
+
+/*
+ * fsc-cache.c
+ */
+extern struct list_head fscache_cache_list;
+extern struct rw_semaphore fscache_addremove_sem;
+
+extern struct fscache_cache *fscache_select_cache_for_object(
+ struct fscache_cookie *);
+
+/*
+ * fsc-cookie.c
+ */
+extern struct kmem_cache *fscache_cookie_jar;
+
+extern void fscache_cookie_init_once(void *);
+extern void __fscache_cookie_put(struct fscache_cookie *);
+
+/*
+ * fsc-fsdef.c
+ */
+extern struct fscache_cookie fscache_fsdef_index;
+extern struct fscache_cookie_def fscache_fsdef_netfs_def;
+
+/*
+ * fsc-histogram.c
+ */
+#ifdef CONFIG_FSCACHE_HISTOGRAM
+extern atomic_t fscache_obj_instantiate_histogram[HZ];
+extern atomic_t fscache_objs_histogram[HZ];
+extern atomic_t fscache_ops_histogram[HZ];
+extern atomic_t fscache_retrieval_delay_histogram[HZ];
+extern atomic_t fscache_retrieval_histogram[HZ];
+
+static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif)
+{
+ unsigned long jif = jiffies - start_jif;
+ if (jif >= HZ)
+ jif = HZ - 1;
+ atomic_inc(&histogram[jif]);
+}
+
+extern const struct file_operations fscache_histogram_fops;
+
+#else
+#define fscache_hist(hist, start_jif) do {} while (0)
+#endif
+
+/*
+ * fsc-main.c
+ */
+extern unsigned fscache_defer_lookup;
+extern unsigned fscache_defer_create;
+extern unsigned fscache_debug;
+extern struct kobject *fscache_root;
+
+extern int fscache_wait_bit(void *);
+extern int fscache_wait_bit_interruptible(void *);
+
+/*
+ * fsc-object.c
+ */
+extern void fscache_withdrawing_object(struct fscache_cache *,
+ struct fscache_object *);
+extern void fscache_enqueue_object(struct fscache_object *);
+
+/*
+ * fsc-operation.c
+ */
+extern int fscache_submit_exclusive_op(struct fscache_object *,
+ struct fscache_operation *);
+extern int fscache_submit_op(struct fscache_object *,
+ struct fscache_operation *);
+extern void fscache_abort_object(struct fscache_object *);
+extern void fscache_start_operations(struct fscache_object *);
+extern void fscache_operation_gc(struct work_struct *);
+
+/*
+ * fsc-proc.c
+ */
+#ifdef CONFIG_PROC_FS
+extern int __init fscache_proc_init(void);
+extern void fscache_proc_cleanup(void);
+#else
+#define fscache_proc_init() (0)
+#define fscache_proc_cleanup() do {} while (0)
+#endif
+
+/*
+ * fsc-stats.c
+ */
+#ifdef CONFIG_FSCACHE_STATS
+extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS];
+extern atomic_t fscache_n_objs_processed[FSCACHE_MAX_THREADS];
+
+extern atomic_t fscache_n_op_pend;
+extern atomic_t fscache_n_op_run;
+extern atomic_t fscache_n_op_enqueue;
+extern atomic_t fscache_n_op_deferred_release;
+extern atomic_t fscache_n_op_release;
+extern atomic_t fscache_n_op_gc;
+
+extern atomic_t fscache_n_attr_changed;
+extern atomic_t fscache_n_attr_changed_ok;
+extern atomic_t fscache_n_attr_changed_nobufs;
+extern atomic_t fscache_n_attr_changed_nomem;
+extern atomic_t fscache_n_attr_changed_calls;
+
+extern atomic_t fscache_n_allocs;
+extern atomic_t fscache_n_allocs_ok;
+extern atomic_t fscache_n_allocs_wait;
+extern atomic_t fscache_n_allocs_nobufs;
+extern atomic_t fscache_n_alloc_ops;
+extern atomic_t fscache_n_alloc_op_waits;
+
+extern atomic_t fscache_n_retrievals;
+extern atomic_t fscache_n_retrievals_ok;
+extern atomic_t fscache_n_retrievals_wait;
+extern atomic_t fscache_n_retrievals_nodata;
+extern atomic_t fscache_n_retrievals_nobufs;
+extern atomic_t fscache_n_retrievals_intr;
+extern atomic_t fscache_n_retrievals_nomem;
+extern atomic_t fscache_n_retrieval_ops;
+extern atomic_t fscache_n_retrieval_op_waits;
+
+extern atomic_t fscache_n_stores;
+extern atomic_t fscache_n_stores_ok;
+extern atomic_t fscache_n_stores_again;
+extern atomic_t fscache_n_stores_nobufs;
+extern atomic_t fscache_n_stores_oom;
+extern atomic_t fscache_n_store_ops;
+extern atomic_t fscache_n_store_calls;
+
+extern atomic_t fscache_n_marks;
+extern atomic_t fscache_n_uncaches;
+
+extern atomic_t fscache_n_acquires;
+extern atomic_t fscache_n_acquires_null;
+extern atomic_t fscache_n_acquires_no_cache;
+extern atomic_t fscache_n_acquires_ok;
+extern atomic_t fscache_n_acquires_nobufs;
+extern atomic_t fscache_n_acquires_oom;
+
+extern atomic_t fscache_n_updates;
+extern atomic_t fscache_n_updates_null;
+extern atomic_t fscache_n_updates_run;
+
+extern atomic_t fscache_n_relinquishes;
+extern atomic_t fscache_n_relinquishes_null;
+extern atomic_t fscache_n_relinquishes_waitcrt;
+
+extern atomic_t fscache_n_cookie_index;
+extern atomic_t fscache_n_cookie_data;
+extern atomic_t fscache_n_cookie_special;
+
+extern atomic_t fscache_n_object_alloc;
+extern atomic_t fscache_n_object_no_alloc;
+extern atomic_t fscache_n_object_lookups;
+extern atomic_t fscache_n_object_lookups_negative;
+extern atomic_t fscache_n_object_lookups_positive;
+extern atomic_t fscache_n_object_created;
+extern atomic_t fscache_n_object_avail;
+extern atomic_t fscache_n_object_dead;
+
+extern atomic_t fscache_n_checkaux_none;
+extern atomic_t fscache_n_checkaux_okay;
+extern atomic_t fscache_n_checkaux_update;
+extern atomic_t fscache_n_checkaux_obsolete;
+
+static inline void fscache_stat(atomic_t *stat)
+{
+ atomic_inc(stat);
+}
+
+extern const struct file_operations fscache_stats_fops;
+#else
+
+#define fscache_stat(stat) do {} while (0)
+#endif
+
+/*
+ * raise an event on an object
+ * - if the event is not masked for that object, then the object is
+ * queued for attention by the thread pool.
+ */
+static inline void fscache_raise_event(struct fscache_object *object,
+ unsigned event)
+{
+ if (!test_and_set_bit(event, &object->events) &&
+ test_bit(event, &object->event_mask))
+ fscache_enqueue_object(object);
+}
+
+/*
+ * drop a reference to a cookie
+ */
+static inline void fscache_cookie_put(struct fscache_cookie *cookie)
+{
+ BUG_ON(atomic_read(&cookie->usage) <= 0);
+ if (atomic_dec_and_test(&cookie->usage))
+ __fscache_cookie_put(cookie);
+}
+
+/*
+ * get an extra reference to a netfs retrieval context
+ */
+static inline
+void *fscache_get_context(struct fscache_cookie *cookie, void *context)
+{
+ if (cookie->def->get_context)
+ cookie->def->get_context(cookie->netfs_data, context);
+ return context;
+}
+
+/*
+ * release a reference to a netfs retrieval context
+ */
+static inline
+void fscache_put_context(struct fscache_cookie *cookie, void *context)
+{
+ if (cookie->def->put_context)
+ cookie->def->put_context(cookie->netfs_data, context);
+}
+
+/*****************************************************************************/
+/*
+ * debug tracing
+ */
+#define dbgprintk(FMT, ...) \
+ printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
+
+/* make sure we maintain the format strings, even when debugging is disabled */
+static inline __attribute__((format(printf, 1, 2)))
+void _dbprintk(const char *fmt, ...)
+{
+}
+
+#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
+#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
+#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
+
+#define kjournal(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__)
+
+#ifdef __KDEBUG
+#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
+#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
+#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
+
+#elif defined(CONFIG_FSCACHE_DEBUG)
+#define _enter(FMT, ...) \
+do { \
+ if (__do_kdebug(ENTER)) \
+ kenter(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#define _leave(FMT, ...) \
+do { \
+ if (__do_kdebug(LEAVE)) \
+ kleave(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#define _debug(FMT, ...) \
+do { \
+ if (__do_kdebug(DEBUG)) \
+ kdebug(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#else
+#define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
+#define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
+#define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__)
+#endif
+
+/*
+ * determine whether a particular optional debugging point should be logged
+ * - we need to go through three steps to persuade cpp to correctly join the
+ * shorthand in FSCACHE_DEBUG_LEVEL with its prefix
+ */
+#define ____do_kdebug(LEVEL, POINT) \
+ unlikely((fscache_debug & \
+ (FSCACHE_POINT_##POINT << (FSCACHE_DEBUG_ ## LEVEL * 3))))
+#define ___do_kdebug(LEVEL, POINT) \
+ ____do_kdebug(LEVEL, POINT)
+#define __do_kdebug(POINT) \
+ ___do_kdebug(FSCACHE_DEBUG_LEVEL, POINT)
+
+#define FSCACHE_DEBUG_CACHE 0
+#define FSCACHE_DEBUG_COOKIE 1
+#define FSCACHE_DEBUG_PAGE 2
+#define FSCACHE_DEBUG_OPERATION 3
+
+#define FSCACHE_POINT_ENTER 1
+#define FSCACHE_POINT_LEAVE 2
+#define FSCACHE_POINT_DEBUG 4
+
+#ifndef FSCACHE_DEBUG_LEVEL
+#define FSCACHE_DEBUG_LEVEL CACHE
+#endif
+
+/*
+ * assertions
+ */
+#if 1 /* defined(__KDEBUGALL) */
+
+#define ASSERT(X) \
+do { \
+ if (unlikely(!(X))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+ if (unlikely(!((X) OP (Y)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
+ printk(KERN_ERR "%lx " #OP " %lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTIF(C, X) \
+do { \
+ if (unlikely((C) && !(X))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+ if (unlikely((C) && !((X) OP (Y)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
+ printk(KERN_ERR "%lx " #OP " %lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while (0)
+
+#else
+
+#define ASSERT(X) do {} while (0)
+#define ASSERTCMP(X, OP, Y) do {} while (0)
+#define ASSERTIF(C, X) do {} while (0)
+#define ASSERTIFCMP(C, X, OP, Y) do {} while (0)
+
+#endif /* assert or not */
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
new file mode 100644
index 000000000000..4de41b597499
--- /dev/null
+++ b/fs/fscache/main.c
@@ -0,0 +1,124 @@
+/* General filesystem local caching manager
+ *
+ * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define FSCACHE_DEBUG_LEVEL CACHE
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+MODULE_DESCRIPTION("FS Cache Manager");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+unsigned fscache_defer_lookup = 1;
+module_param_named(defer_lookup, fscache_defer_lookup, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(fscache_defer_lookup,
+ "Defer cookie lookup to background thread");
+
+unsigned fscache_defer_create = 1;
+module_param_named(defer_create, fscache_defer_create, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(fscache_defer_create,
+ "Defer cookie creation to background thread");
+
+unsigned fscache_debug;
+module_param_named(debug, fscache_debug, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(fscache_debug,
+ "FS-Cache debugging mask");
+
+struct kobject *fscache_root;
+
+/*
+ * initialise the fs caching module
+ */
+static int __init fscache_init(void)
+{
+ int ret;
+
+ ret = slow_work_register_user();
+ if (ret < 0)
+ goto error_slow_work;
+
+ ret = fscache_proc_init();
+ if (ret < 0)
+ goto error_proc;
+
+ fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar",
+ sizeof(struct fscache_cookie),
+ 0,
+ 0,
+ fscache_cookie_init_once);
+ if (!fscache_cookie_jar) {
+ printk(KERN_NOTICE
+ "FS-Cache: Failed to allocate a cookie jar\n");
+ ret = -ENOMEM;
+ goto error_cookie_jar;
+ }
+
+ fscache_root = kobject_create_and_add("fscache", kernel_kobj);
+ if (!fscache_root)
+ goto error_kobj;
+
+ printk(KERN_NOTICE "FS-Cache: Loaded\n");
+ return 0;
+
+error_kobj:
+ kmem_cache_destroy(fscache_cookie_jar);
+error_cookie_jar:
+ fscache_proc_cleanup();
+error_proc:
+ slow_work_unregister_user();
+error_slow_work:
+ return ret;
+}
+
+fs_initcall(fscache_init);
+
+/*
+ * clean up on module removal
+ */
+static void __exit fscache_exit(void)
+{
+ _enter("");
+
+ kobject_put(fscache_root);
+ kmem_cache_destroy(fscache_cookie_jar);
+ fscache_proc_cleanup();
+ slow_work_unregister_user();
+ printk(KERN_NOTICE "FS-Cache: Unloaded\n");
+}
+
+module_exit(fscache_exit);
+
+/*
+ * wait_on_bit() sleep function for uninterruptible waiting
+ */
+int fscache_wait_bit(void *flags)
+{
+ schedule();
+ return 0;
+}
+EXPORT_SYMBOL(fscache_wait_bit);
+
+/*
+ * wait_on_bit() sleep function for interruptible waiting
+ */
+int fscache_wait_bit_interruptible(void *flags)
+{
+ schedule();
+ return signal_pending(current);
+}
+EXPORT_SYMBOL(fscache_wait_bit_interruptible);
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
new file mode 100644
index 000000000000..e028b8eb1c40
--- /dev/null
+++ b/fs/fscache/netfs.c
@@ -0,0 +1,103 @@
+/* FS-Cache netfs (client) registration
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define FSCACHE_DEBUG_LEVEL COOKIE
+#include <linux/module.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+static LIST_HEAD(fscache_netfs_list);
+
+/*
+ * register a network filesystem for caching
+ */
+int __fscache_register_netfs(struct fscache_netfs *netfs)
+{
+ struct fscache_netfs *ptr;
+ int ret;
+
+ _enter("{%s}", netfs->name);
+
+ INIT_LIST_HEAD(&netfs->link);
+
+ /* allocate a cookie for the primary index */
+ netfs->primary_index =
+ kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL);
+
+ if (!netfs->primary_index) {
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ /* initialise the primary index cookie */
+ atomic_set(&netfs->primary_index->usage, 1);
+ atomic_set(&netfs->primary_index->n_children, 0);
+
+ netfs->primary_index->def = &fscache_fsdef_netfs_def;
+ netfs->primary_index->parent = &fscache_fsdef_index;
+ netfs->primary_index->netfs_data = netfs;
+
+ atomic_inc(&netfs->primary_index->parent->usage);
+ atomic_inc(&netfs->primary_index->parent->n_children);
+
+ spin_lock_init(&netfs->primary_index->lock);
+ INIT_HLIST_HEAD(&netfs->primary_index->backing_objects);
+
+ /* check the netfs type is not already present */
+ down_write(&fscache_addremove_sem);
+
+ ret = -EEXIST;
+ list_for_each_entry(ptr, &fscache_netfs_list, link) {
+ if (strcmp(ptr->name, netfs->name) == 0)
+ goto already_registered;
+ }
+
+ list_add(&netfs->link, &fscache_netfs_list);
+ ret = 0;
+
+ printk(KERN_NOTICE "FS-Cache: Netfs '%s' registered for caching\n",
+ netfs->name);
+
+already_registered:
+ up_write(&fscache_addremove_sem);
+
+ if (ret < 0) {
+ netfs->primary_index->parent = NULL;
+ __fscache_cookie_put(netfs->primary_index);
+ netfs->primary_index = NULL;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+EXPORT_SYMBOL(__fscache_register_netfs);
+
+/*
+ * unregister a network filesystem from the cache
+ * - all cookies must have been released first
+ */
+void __fscache_unregister_netfs(struct fscache_netfs *netfs)
+{
+ _enter("{%s.%u}", netfs->name, netfs->version);
+
+ down_write(&fscache_addremove_sem);
+
+ list_del(&netfs->link);
+ fscache_relinquish_cookie(netfs->primary_index, 0);
+
+ up_write(&fscache_addremove_sem);
+
+ printk(KERN_NOTICE "FS-Cache: Netfs '%s' unregistered from caching\n",
+ netfs->name);
+
+ _leave("");
+}
+EXPORT_SYMBOL(__fscache_unregister_netfs);
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
new file mode 100644
index 000000000000..392a41b1b79d
--- /dev/null
+++ b/fs/fscache/object.c
@@ -0,0 +1,810 @@
+/* FS-Cache object state machine handler
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * See Documentation/filesystems/caching/object.txt for a description of the
+ * object state machine and the in-kernel representations.
+ */
+
+#define FSCACHE_DEBUG_LEVEL COOKIE
+#include <linux/module.h>
+#include "internal.h"
+
+const char *fscache_object_states[] = {
+ [FSCACHE_OBJECT_INIT] = "OBJECT_INIT",
+ [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP",
+ [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING",
+ [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE",
+ [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE",
+ [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING",
+ [FSCACHE_OBJECT_DYING] = "OBJECT_DYING",
+ [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING",
+ [FSCACHE_OBJECT_ABORT_INIT] = "OBJECT_ABORT_INIT",
+ [FSCACHE_OBJECT_RELEASING] = "OBJECT_RELEASING",
+ [FSCACHE_OBJECT_RECYCLING] = "OBJECT_RECYCLING",
+ [FSCACHE_OBJECT_WITHDRAWING] = "OBJECT_WITHDRAWING",
+ [FSCACHE_OBJECT_DEAD] = "OBJECT_DEAD",
+};
+EXPORT_SYMBOL(fscache_object_states);
+
+static void fscache_object_slow_work_put_ref(struct slow_work *);
+static int fscache_object_slow_work_get_ref(struct slow_work *);
+static void fscache_object_slow_work_execute(struct slow_work *);
+static void fscache_initialise_object(struct fscache_object *);
+static void fscache_lookup_object(struct fscache_object *);
+static void fscache_object_available(struct fscache_object *);
+static void fscache_release_object(struct fscache_object *);
+static void fscache_withdraw_object(struct fscache_object *);
+static void fscache_enqueue_dependents(struct fscache_object *);
+static void fscache_dequeue_object(struct fscache_object *);
+
+const struct slow_work_ops fscache_object_slow_work_ops = {
+ .get_ref = fscache_object_slow_work_get_ref,
+ .put_ref = fscache_object_slow_work_put_ref,
+ .execute = fscache_object_slow_work_execute,
+};
+EXPORT_SYMBOL(fscache_object_slow_work_ops);
+
+/*
+ * we need to notify the parent when an op completes that we had outstanding
+ * upon it
+ */
+static inline void fscache_done_parent_op(struct fscache_object *object)
+{
+ struct fscache_object *parent = object->parent;
+
+ _enter("OBJ%x {OBJ%x,%x}",
+ object->debug_id, parent->debug_id, parent->n_ops);
+
+ spin_lock_nested(&parent->lock, 1);
+ parent->n_ops--;
+ parent->n_obj_ops--;
+ if (parent->n_ops == 0)
+ fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED);
+ spin_unlock(&parent->lock);
+}
+
+/*
+ * process events that have been sent to an object's state machine
+ * - initiates parent lookup
+ * - does object lookup
+ * - does object creation
+ * - does object recycling and retirement
+ * - does object withdrawal
+ */
+static void fscache_object_state_machine(struct fscache_object *object)
+{
+ enum fscache_object_state new_state;
+
+ ASSERT(object != NULL);
+
+ _enter("{OBJ%x,%s,%lx}",
+ object->debug_id, fscache_object_states[object->state],
+ object->events);
+
+ switch (object->state) {
+ /* wait for the parent object to become ready */
+ case FSCACHE_OBJECT_INIT:
+ object->event_mask =
+ ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED);
+ fscache_initialise_object(object);
+ goto done;
+
+ /* look up the object metadata on disk */
+ case FSCACHE_OBJECT_LOOKING_UP:
+ fscache_lookup_object(object);
+ goto lookup_transit;
+
+ /* create the object metadata on disk */
+ case FSCACHE_OBJECT_CREATING:
+ fscache_lookup_object(object);
+ goto lookup_transit;
+
+ /* handle an object becoming available; start pending
+ * operations and queue dependent operations for processing */
+ case FSCACHE_OBJECT_AVAILABLE:
+ fscache_object_available(object);
+ goto active_transit;
+
+ /* normal running state */
+ case FSCACHE_OBJECT_ACTIVE:
+ goto active_transit;
+
+ /* update the object metadata on disk */
+ case FSCACHE_OBJECT_UPDATING:
+ clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events);
+ fscache_stat(&fscache_n_updates_run);
+ object->cache->ops->update_object(object);
+ goto active_transit;
+
+ /* handle an object dying during lookup or creation */
+ case FSCACHE_OBJECT_LC_DYING:
+ object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE);
+ object->cache->ops->lookup_complete(object);
+
+ spin_lock(&object->lock);
+ object->state = FSCACHE_OBJECT_DYING;
+ if (test_and_clear_bit(FSCACHE_COOKIE_CREATING,
+ &object->cookie->flags))
+ wake_up_bit(&object->cookie->flags,
+ FSCACHE_COOKIE_CREATING);
+ spin_unlock(&object->lock);
+
+ fscache_done_parent_op(object);
+
+ /* wait for completion of all active operations on this object
+ * and the death of all child objects of this object */
+ case FSCACHE_OBJECT_DYING:
+ dying:
+ clear_bit(FSCACHE_OBJECT_EV_CLEARED, &object->events);
+ spin_lock(&object->lock);
+ _debug("dying OBJ%x {%d,%d}",
+ object->debug_id, object->n_ops, object->n_children);
+ if (object->n_ops == 0 && object->n_children == 0) {
+ object->event_mask &=
+ ~(1 << FSCACHE_OBJECT_EV_CLEARED);
+ object->event_mask |=
+ (1 << FSCACHE_OBJECT_EV_WITHDRAW) |
+ (1 << FSCACHE_OBJECT_EV_RETIRE) |
+ (1 << FSCACHE_OBJECT_EV_RELEASE) |
+ (1 << FSCACHE_OBJECT_EV_ERROR);
+ } else {
+ object->event_mask &=
+ ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) |
+ (1 << FSCACHE_OBJECT_EV_RETIRE) |
+ (1 << FSCACHE_OBJECT_EV_RELEASE) |
+ (1 << FSCACHE_OBJECT_EV_ERROR));
+ object->event_mask |=
+ 1 << FSCACHE_OBJECT_EV_CLEARED;
+ }
+ spin_unlock(&object->lock);
+ fscache_enqueue_dependents(object);
+ goto terminal_transit;
+
+ /* handle an abort during initialisation */
+ case FSCACHE_OBJECT_ABORT_INIT:
+ _debug("handle abort init %lx", object->events);
+ object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE);
+
+ spin_lock(&object->lock);
+ fscache_dequeue_object(object);
+
+ object->state = FSCACHE_OBJECT_DYING;
+ if (test_and_clear_bit(FSCACHE_COOKIE_CREATING,
+ &object->cookie->flags))
+ wake_up_bit(&object->cookie->flags,
+ FSCACHE_COOKIE_CREATING);
+ spin_unlock(&object->lock);
+ goto dying;
+
+ /* handle the netfs releasing an object and possibly marking it
+ * obsolete too */
+ case FSCACHE_OBJECT_RELEASING:
+ case FSCACHE_OBJECT_RECYCLING:
+ object->event_mask &=
+ ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) |
+ (1 << FSCACHE_OBJECT_EV_RETIRE) |
+ (1 << FSCACHE_OBJECT_EV_RELEASE) |
+ (1 << FSCACHE_OBJECT_EV_ERROR));
+ fscache_release_object(object);
+ spin_lock(&object->lock);
+ object->state = FSCACHE_OBJECT_DEAD;
+ spin_unlock(&object->lock);
+ fscache_stat(&fscache_n_object_dead);
+ goto terminal_transit;
+
+ /* handle the parent cache of this object being withdrawn from
+ * active service */
+ case FSCACHE_OBJECT_WITHDRAWING:
+ object->event_mask &=
+ ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) |
+ (1 << FSCACHE_OBJECT_EV_RETIRE) |
+ (1 << FSCACHE_OBJECT_EV_RELEASE) |
+ (1 << FSCACHE_OBJECT_EV_ERROR));
+ fscache_withdraw_object(object);
+ spin_lock(&object->lock);
+ object->state = FSCACHE_OBJECT_DEAD;
+ spin_unlock(&object->lock);
+ fscache_stat(&fscache_n_object_dead);
+ goto terminal_transit;
+
+ /* complain about the object being woken up once it is
+ * deceased */
+ case FSCACHE_OBJECT_DEAD:
+ printk(KERN_ERR "FS-Cache:"
+ " Unexpected event in dead state %lx\n",
+ object->events & object->event_mask);
+ BUG();
+
+ default:
+ printk(KERN_ERR "FS-Cache: Unknown object state %u\n",
+ object->state);
+ BUG();
+ }
+
+ /* determine the transition from a lookup state */
+lookup_transit:
+ switch (fls(object->events & object->event_mask) - 1) {
+ case FSCACHE_OBJECT_EV_WITHDRAW:
+ case FSCACHE_OBJECT_EV_RETIRE:
+ case FSCACHE_OBJECT_EV_RELEASE:
+ case FSCACHE_OBJECT_EV_ERROR:
+ new_state = FSCACHE_OBJECT_LC_DYING;
+ goto change_state;
+ case FSCACHE_OBJECT_EV_REQUEUE:
+ goto done;
+ case -1:
+ goto done; /* sleep until event */
+ default:
+ goto unsupported_event;
+ }
+
+ /* determine the transition from an active state */
+active_transit:
+ switch (fls(object->events & object->event_mask) - 1) {
+ case FSCACHE_OBJECT_EV_WITHDRAW:
+ case FSCACHE_OBJECT_EV_RETIRE:
+ case FSCACHE_OBJECT_EV_RELEASE:
+ case FSCACHE_OBJECT_EV_ERROR:
+ new_state = FSCACHE_OBJECT_DYING;
+ goto change_state;
+ case FSCACHE_OBJECT_EV_UPDATE:
+ new_state = FSCACHE_OBJECT_UPDATING;
+ goto change_state;
+ case -1:
+ new_state = FSCACHE_OBJECT_ACTIVE;
+ goto change_state; /* sleep until event */
+ default:
+ goto unsupported_event;
+ }
+
+ /* determine the transition from a terminal state */
+terminal_transit:
+ switch (fls(object->events & object->event_mask) - 1) {
+ case FSCACHE_OBJECT_EV_WITHDRAW:
+ new_state = FSCACHE_OBJECT_WITHDRAWING;
+ goto change_state;
+ case FSCACHE_OBJECT_EV_RETIRE:
+ new_state = FSCACHE_OBJECT_RECYCLING;
+ goto change_state;
+ case FSCACHE_OBJECT_EV_RELEASE:
+ new_state = FSCACHE_OBJECT_RELEASING;
+ goto change_state;
+ case FSCACHE_OBJECT_EV_ERROR:
+ new_state = FSCACHE_OBJECT_WITHDRAWING;
+ goto change_state;
+ case FSCACHE_OBJECT_EV_CLEARED:
+ new_state = FSCACHE_OBJECT_DYING;
+ goto change_state;
+ case -1:
+ goto done; /* sleep until event */
+ default:
+ goto unsupported_event;
+ }
+
+change_state:
+ spin_lock(&object->lock);
+ object->state = new_state;
+ spin_unlock(&object->lock);
+
+done:
+ _leave(" [->%s]", fscache_object_states[object->state]);
+ return;
+
+unsupported_event:
+ printk(KERN_ERR "FS-Cache:"
+ " Unsupported event %lx [mask %lx] in state %s\n",
+ object->events, object->event_mask,
+ fscache_object_states[object->state]);
+ BUG();
+}
+
+/*
+ * execute an object
+ */
+static void fscache_object_slow_work_execute(struct slow_work *work)
+{
+ struct fscache_object *object =
+ container_of(work, struct fscache_object, work);
+ unsigned long start;
+
+ _enter("{OBJ%x}", object->debug_id);
+
+ clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
+
+ start = jiffies;
+ fscache_object_state_machine(object);
+ fscache_hist(fscache_objs_histogram, start);
+ if (object->events & object->event_mask)
+ fscache_enqueue_object(object);
+}
+
+/*
+ * initialise an object
+ * - check the specified object's parent to see if we can make use of it
+ * immediately to do a creation
+ * - we may need to start the process of creating a parent and we need to wait
+ * for the parent's lookup and creation to complete if it's not there yet
+ * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the
+ * leaf-most cookies of the object and all its children
+ */
+static void fscache_initialise_object(struct fscache_object *object)
+{
+ struct fscache_object *parent;
+
+ _enter("");
+ ASSERT(object->cookie != NULL);
+ ASSERT(object->cookie->parent != NULL);
+ ASSERT(list_empty(&object->work.link));
+
+ if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) |
+ (1 << FSCACHE_OBJECT_EV_RELEASE) |
+ (1 << FSCACHE_OBJECT_EV_RETIRE) |
+ (1 << FSCACHE_OBJECT_EV_WITHDRAW))) {
+ _debug("abort init %lx", object->events);
+ spin_lock(&object->lock);
+ object->state = FSCACHE_OBJECT_ABORT_INIT;
+ spin_unlock(&object->lock);
+ return;
+ }
+
+ spin_lock(&object->cookie->lock);
+ spin_lock_nested(&object->cookie->parent->lock, 1);
+
+ parent = object->parent;
+ if (!parent) {
+ _debug("no parent");
+ set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events);
+ } else {
+ spin_lock(&object->lock);
+ spin_lock_nested(&parent->lock, 1);
+ _debug("parent %s", fscache_object_states[parent->state]);
+
+ if (parent->state >= FSCACHE_OBJECT_DYING) {
+ _debug("bad parent");
+ set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events);
+ } else if (parent->state < FSCACHE_OBJECT_AVAILABLE) {
+ _debug("wait");
+
+ /* we may get woken up in this state by child objects
+ * binding on to us, so we need to make sure we don't
+ * add ourself to the list multiple times */
+ if (list_empty(&object->dep_link)) {
+ object->cache->ops->grab_object(object);
+ list_add(&object->dep_link,
+ &parent->dependents);
+
+ /* fscache_acquire_non_index_cookie() uses this
+ * to wake the chain up */
+ if (parent->state == FSCACHE_OBJECT_INIT)
+ fscache_enqueue_object(parent);
+ }
+ } else {
+ _debug("go");
+ parent->n_ops++;
+ parent->n_obj_ops++;
+ object->lookup_jif = jiffies;
+ object->state = FSCACHE_OBJECT_LOOKING_UP;
+ set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
+ }
+
+ spin_unlock(&parent->lock);
+ spin_unlock(&object->lock);
+ }
+
+ spin_unlock(&object->cookie->parent->lock);
+ spin_unlock(&object->cookie->lock);
+ _leave("");
+}
+
+/*
+ * look an object up in the cache from which it was allocated
+ * - we hold an "access lock" on the parent object, so the parent object cannot
+ * be withdrawn by either party till we've finished
+ * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the
+ * leaf-most cookies of the object and all its children
+ */
+static void fscache_lookup_object(struct fscache_object *object)
+{
+ struct fscache_cookie *cookie = object->cookie;
+ struct fscache_object *parent;
+
+ _enter("");
+
+ parent = object->parent;
+ ASSERT(parent != NULL);
+ ASSERTCMP(parent->n_ops, >, 0);
+ ASSERTCMP(parent->n_obj_ops, >, 0);
+
+ /* make sure the parent is still available */
+ ASSERTCMP(parent->state, >=, FSCACHE_OBJECT_AVAILABLE);
+
+ if (parent->state >= FSCACHE_OBJECT_DYING ||
+ test_bit(FSCACHE_IOERROR, &object->cache->flags)) {
+ _debug("unavailable");
+ set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events);
+ _leave("");
+ return;
+ }
+
+ _debug("LOOKUP \"%s/%s\" in \"%s\"",
+ parent->cookie->def->name, cookie->def->name,
+ object->cache->tag->name);
+
+ fscache_stat(&fscache_n_object_lookups);
+ object->cache->ops->lookup_object(object);
+
+ if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events))
+ set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);
+
+ _leave("");
+}
+
+/**
+ * fscache_object_lookup_negative - Note negative cookie lookup
+ * @object: Object pointing to cookie to mark
+ *
+ * Note negative lookup, permitting those waiting to read data from an already
+ * existing backing object to continue as there's no data for them to read.
+ */
+void fscache_object_lookup_negative(struct fscache_object *object)
+{
+ struct fscache_cookie *cookie = object->cookie;
+
+ _enter("{OBJ%x,%s}",
+ object->debug_id, fscache_object_states[object->state]);
+
+ spin_lock(&object->lock);
+ if (object->state == FSCACHE_OBJECT_LOOKING_UP) {
+ fscache_stat(&fscache_n_object_lookups_negative);
+
+ /* transit here to allow write requests to begin stacking up
+ * and read requests to begin returning ENODATA */
+ object->state = FSCACHE_OBJECT_CREATING;
+ spin_unlock(&object->lock);
+
+ set_bit(FSCACHE_COOKIE_PENDING_FILL, &cookie->flags);
+ set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
+
+ _debug("wake up lookup %p", &cookie->flags);
+ smp_mb__before_clear_bit();
+ clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP);
+ set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
+ } else {
+ ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING);
+ spin_unlock(&object->lock);
+ }
+
+ _leave("");
+}
+EXPORT_SYMBOL(fscache_object_lookup_negative);
+
+/**
+ * fscache_obtained_object - Note successful object lookup or creation
+ * @object: Object pointing to cookie to mark
+ *
+ * Note successful lookup and/or creation, permitting those waiting to write
+ * data to a backing object to continue.
+ *
+ * Note that after calling this, an object's cookie may be relinquished by the
+ * netfs, and so must be accessed with object lock held.
+ */
+void fscache_obtained_object(struct fscache_object *object)
+{
+ struct fscache_cookie *cookie = object->cookie;
+
+ _enter("{OBJ%x,%s}",
+ object->debug_id, fscache_object_states[object->state]);
+
+ /* if we were still looking up, then we must have a positive lookup
+ * result, in which case there may be data available */
+ spin_lock(&object->lock);
+ if (object->state == FSCACHE_OBJECT_LOOKING_UP) {
+ fscache_stat(&fscache_n_object_lookups_positive);
+
+ clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
+
+ object->state = FSCACHE_OBJECT_AVAILABLE;
+ spin_unlock(&object->lock);
+
+ smp_mb__before_clear_bit();
+ clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP);
+ set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
+ } else {
+ ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING);
+ fscache_stat(&fscache_n_object_created);
+
+ object->state = FSCACHE_OBJECT_AVAILABLE;
+ spin_unlock(&object->lock);
+ set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
+ smp_wmb();
+ }
+
+ if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &cookie->flags))
+ wake_up_bit(&cookie->flags, FSCACHE_COOKIE_CREATING);
+
+ _leave("");
+}
+EXPORT_SYMBOL(fscache_obtained_object);
+
+/*
+ * handle an object that has just become available
+ */
+static void fscache_object_available(struct fscache_object *object)
+{
+ _enter("{OBJ%x}", object->debug_id);
+
+ spin_lock(&object->lock);
+
+ if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags))
+ wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING);
+
+ fscache_done_parent_op(object);
+ if (object->n_in_progress == 0) {
+ if (object->n_ops > 0) {
+ ASSERTCMP(object->n_ops, >=, object->n_obj_ops);
+ ASSERTIF(object->n_ops > object->n_obj_ops,
+ !list_empty(&object->pending_ops));
+ fscache_start_operations(object);
+ } else {
+ ASSERT(list_empty(&object->pending_ops));
+ }
+ }
+ spin_unlock(&object->lock);
+
+ object->cache->ops->lookup_complete(object);
+ fscache_enqueue_dependents(object);
+
+ fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif);
+ fscache_stat(&fscache_n_object_avail);
+
+ _leave("");
+}
+
+/*
+ * drop an object's attachments
+ */
+static void fscache_drop_object(struct fscache_object *object)
+{
+ struct fscache_object *parent = object->parent;
+ struct fscache_cache *cache = object->cache;
+
+ _enter("{OBJ%x,%d}", object->debug_id, object->n_children);
+
+ spin_lock(&cache->object_list_lock);
+ list_del_init(&object->cache_link);
+ spin_unlock(&cache->object_list_lock);
+
+ cache->ops->drop_object(object);
+
+ if (parent) {
+ _debug("release parent OBJ%x {%d}",
+ parent->debug_id, parent->n_children);
+
+ spin_lock(&parent->lock);
+ parent->n_children--;
+ if (parent->n_children == 0)
+ fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED);
+ spin_unlock(&parent->lock);
+ object->parent = NULL;
+ }
+
+ /* this just shifts the object release to the slow work processor */
+ object->cache->ops->put_object(object);
+
+ _leave("");
+}
+
+/*
+ * release or recycle an object that the netfs has discarded
+ */
+static void fscache_release_object(struct fscache_object *object)
+{
+ _enter("");
+
+ fscache_drop_object(object);
+}
+
+/*
+ * withdraw an object from active service
+ */
+static void fscache_withdraw_object(struct fscache_object *object)
+{
+ struct fscache_cookie *cookie;
+ bool detached;
+
+ _enter("");
+
+ spin_lock(&object->lock);
+ cookie = object->cookie;
+ if (cookie) {
+ /* need to get the cookie lock before the object lock, starting
+ * from the object pointer */
+ atomic_inc(&cookie->usage);
+ spin_unlock(&object->lock);
+
+ detached = false;
+ spin_lock(&cookie->lock);
+ spin_lock(&object->lock);
+
+ if (object->cookie == cookie) {
+ hlist_del_init(&object->cookie_link);
+ object->cookie = NULL;
+ detached = true;
+ }
+ spin_unlock(&cookie->lock);
+ fscache_cookie_put(cookie);
+ if (detached)
+ fscache_cookie_put(cookie);
+ }
+
+ spin_unlock(&object->lock);
+
+ fscache_drop_object(object);
+}
+
+/*
+ * withdraw an object from active service at the behest of the cache
+ * - need break the links to a cached object cookie
+ * - called under two situations:
+ * (1) recycler decides to reclaim an in-use object
+ * (2) a cache is unmounted
+ * - have to take care as the cookie can be being relinquished by the netfs
+ * simultaneously
+ * - the object is pinned by the caller holding a refcount on it
+ */
+void fscache_withdrawing_object(struct fscache_cache *cache,
+ struct fscache_object *object)
+{
+ bool enqueue = false;
+
+ _enter(",OBJ%x", object->debug_id);
+
+ spin_lock(&object->lock);
+ if (object->state < FSCACHE_OBJECT_WITHDRAWING) {
+ object->state = FSCACHE_OBJECT_WITHDRAWING;
+ enqueue = true;
+ }
+ spin_unlock(&object->lock);
+
+ if (enqueue)
+ fscache_enqueue_object(object);
+
+ _leave("");
+}
+
+/*
+ * allow the slow work item processor to get a ref on an object
+ */
+static int fscache_object_slow_work_get_ref(struct slow_work *work)
+{
+ struct fscache_object *object =
+ container_of(work, struct fscache_object, work);
+
+ return object->cache->ops->grab_object(object) ? 0 : -EAGAIN;
+}
+
+/*
+ * allow the slow work item processor to discard a ref on a work item
+ */
+static void fscache_object_slow_work_put_ref(struct slow_work *work)
+{
+ struct fscache_object *object =
+ container_of(work, struct fscache_object, work);
+
+ return object->cache->ops->put_object(object);
+}
+
+/*
+ * enqueue an object for metadata-type processing
+ */
+void fscache_enqueue_object(struct fscache_object *object)
+{
+ _enter("{OBJ%x}", object->debug_id);
+
+ slow_work_enqueue(&object->work);
+}
+
+/*
+ * enqueue the dependents of an object for metadata-type processing
+ * - the caller must hold the object's lock
+ * - this may cause an already locked object to wind up being processed again
+ */
+static void fscache_enqueue_dependents(struct fscache_object *object)
+{
+ struct fscache_object *dep;
+
+ _enter("{OBJ%x}", object->debug_id);
+
+ if (list_empty(&object->dependents))
+ return;
+
+ spin_lock(&object->lock);
+
+ while (!list_empty(&object->dependents)) {
+ dep = list_entry(object->dependents.next,
+ struct fscache_object, dep_link);
+ list_del_init(&dep->dep_link);
+
+
+ /* sort onto appropriate lists */
+ fscache_enqueue_object(dep);
+ dep->cache->ops->put_object(dep);
+
+ if (!list_empty(&object->dependents))
+ cond_resched_lock(&object->lock);
+ }
+
+ spin_unlock(&object->lock);
+}
+
+/*
+ * remove an object from whatever queue it's waiting on
+ * - the caller must hold object->lock
+ */
+void fscache_dequeue_object(struct fscache_object *object)
+{
+ _enter("{OBJ%x}", object->debug_id);
+
+ if (!list_empty(&object->dep_link)) {
+ spin_lock(&object->parent->lock);
+ list_del_init(&object->dep_link);
+ spin_unlock(&object->parent->lock);
+ }
+
+ _leave("");
+}
+
+/**
+ * fscache_check_aux - Ask the netfs whether an object on disk is still valid
+ * @object: The object to ask about
+ * @data: The auxiliary data for the object
+ * @datalen: The size of the auxiliary data
+ *
+ * This function consults the netfs about the coherency state of an object
+ */
+enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
+ const void *data, uint16_t datalen)
+{
+ enum fscache_checkaux result;
+
+ if (!object->cookie->def->check_aux) {
+ fscache_stat(&fscache_n_checkaux_none);
+ return FSCACHE_CHECKAUX_OKAY;
+ }
+
+ result = object->cookie->def->check_aux(object->cookie->netfs_data,
+ data, datalen);
+ switch (result) {
+ /* entry okay as is */
+ case FSCACHE_CHECKAUX_OKAY:
+ fscache_stat(&fscache_n_checkaux_okay);
+ break;
+
+ /* entry requires update */
+ case FSCACHE_CHECKAUX_NEEDS_UPDATE:
+ fscache_stat(&fscache_n_checkaux_update);
+ break;
+
+ /* entry requires deletion */
+ case FSCACHE_CHECKAUX_OBSOLETE:
+ fscache_stat(&fscache_n_checkaux_obsolete);
+ break;
+
+ default:
+ BUG();
+ }
+
+ return result;
+}
+EXPORT_SYMBOL(fscache_check_aux);
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
new file mode 100644
index 000000000000..e7f8d53b8b6b
--- /dev/null
+++ b/fs/fscache/operation.c
@@ -0,0 +1,459 @@
+/* FS-Cache worker operation management routines
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * See Documentation/filesystems/caching/operations.txt
+ */
+
+#define FSCACHE_DEBUG_LEVEL OPERATION
+#include <linux/module.h>
+#include "internal.h"
+
+atomic_t fscache_op_debug_id;
+EXPORT_SYMBOL(fscache_op_debug_id);
+
+/**
+ * fscache_enqueue_operation - Enqueue an operation for processing
+ * @op: The operation to enqueue
+ *
+ * Enqueue an operation for processing by the FS-Cache thread pool.
+ *
+ * This will get its own ref on the object.
+ */
+void fscache_enqueue_operation(struct fscache_operation *op)
+{
+ _enter("{OBJ%x OP%x,%u}",
+ op->object->debug_id, op->debug_id, atomic_read(&op->usage));
+
+ ASSERT(op->processor != NULL);
+ ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
+ ASSERTCMP(atomic_read(&op->usage), >, 0);
+
+ if (list_empty(&op->pend_link)) {
+ switch (op->flags & FSCACHE_OP_TYPE) {
+ case FSCACHE_OP_FAST:
+ _debug("queue fast");
+ atomic_inc(&op->usage);
+ if (!schedule_work(&op->fast_work))
+ fscache_put_operation(op);
+ break;
+ case FSCACHE_OP_SLOW:
+ _debug("queue slow");
+ slow_work_enqueue(&op->slow_work);
+ break;
+ case FSCACHE_OP_MYTHREAD:
+ _debug("queue for caller's attention");
+ break;
+ default:
+ printk(KERN_ERR "FS-Cache: Unexpected op type %lx",
+ op->flags);
+ BUG();
+ break;
+ }
+ fscache_stat(&fscache_n_op_enqueue);
+ }
+}
+EXPORT_SYMBOL(fscache_enqueue_operation);
+
+/*
+ * start an op running
+ */
+static void fscache_run_op(struct fscache_object *object,
+ struct fscache_operation *op)
+{
+ object->n_in_progress++;
+ if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
+ wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
+ if (op->processor)
+ fscache_enqueue_operation(op);
+ fscache_stat(&fscache_n_op_run);
+}
+
+/*
+ * submit an exclusive operation for an object
+ * - other ops are excluded from running simultaneously with this one
+ * - this gets any extra refs it needs on an op
+ */
+int fscache_submit_exclusive_op(struct fscache_object *object,
+ struct fscache_operation *op)
+{
+ int ret;
+
+ _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
+
+ spin_lock(&object->lock);
+ ASSERTCMP(object->n_ops, >=, object->n_in_progress);
+ ASSERTCMP(object->n_ops, >=, object->n_exclusive);
+
+ ret = -ENOBUFS;
+ if (fscache_object_is_active(object)) {
+ op->object = object;
+ object->n_ops++;
+ object->n_exclusive++; /* reads and writes must wait */
+
+ if (object->n_ops > 0) {
+ atomic_inc(&op->usage);
+ list_add_tail(&op->pend_link, &object->pending_ops);
+ fscache_stat(&fscache_n_op_pend);
+ } else if (!list_empty(&object->pending_ops)) {
+ atomic_inc(&op->usage);
+ list_add_tail(&op->pend_link, &object->pending_ops);
+ fscache_stat(&fscache_n_op_pend);
+ fscache_start_operations(object);
+ } else {
+ ASSERTCMP(object->n_in_progress, ==, 0);
+ fscache_run_op(object, op);
+ }
+
+ /* need to issue a new write op after this */
+ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
+ ret = 0;
+ } else if (object->state == FSCACHE_OBJECT_CREATING) {
+ op->object = object;
+ object->n_ops++;
+ object->n_exclusive++; /* reads and writes must wait */
+ atomic_inc(&op->usage);
+ list_add_tail(&op->pend_link, &object->pending_ops);
+ fscache_stat(&fscache_n_op_pend);
+ ret = 0;
+ } else {
+ /* not allowed to submit ops in any other state */
+ BUG();
+ }
+
+ spin_unlock(&object->lock);
+ return ret;
+}
+
+/*
+ * report an unexpected submission
+ */
+static void fscache_report_unexpected_submission(struct fscache_object *object,
+ struct fscache_operation *op,
+ unsigned long ostate)
+{
+ static bool once_only;
+ struct fscache_operation *p;
+ unsigned n;
+
+ if (once_only)
+ return;
+ once_only = true;
+
+ kdebug("unexpected submission OP%x [OBJ%x %s]",
+ op->debug_id, object->debug_id,
+ fscache_object_states[object->state]);
+ kdebug("objstate=%s [%s]",
+ fscache_object_states[object->state],
+ fscache_object_states[ostate]);
+ kdebug("objflags=%lx", object->flags);
+ kdebug("objevent=%lx [%lx]", object->events, object->event_mask);
+ kdebug("ops=%u inp=%u exc=%u",
+ object->n_ops, object->n_in_progress, object->n_exclusive);
+
+ if (!list_empty(&object->pending_ops)) {
+ n = 0;
+ list_for_each_entry(p, &object->pending_ops, pend_link) {
+ ASSERTCMP(p->object, ==, object);
+ kdebug("%p %p", op->processor, op->release);
+ n++;
+ }
+
+ kdebug("n=%u", n);
+ }
+
+ dump_stack();
+}
+
+/*
+ * submit an operation for an object
+ * - objects may be submitted only in the following states:
+ * - during object creation (write ops may be submitted)
+ * - whilst the object is active
+ * - after an I/O error incurred in one of the two above states (op rejected)
+ * - this gets any extra refs it needs on an op
+ */
+int fscache_submit_op(struct fscache_object *object,
+ struct fscache_operation *op)
+{
+ unsigned long ostate;
+ int ret;
+
+ _enter("{OBJ%x OP%x},{%u}",
+ object->debug_id, op->debug_id, atomic_read(&op->usage));
+
+ ASSERTCMP(atomic_read(&op->usage), >, 0);
+
+ spin_lock(&object->lock);
+ ASSERTCMP(object->n_ops, >=, object->n_in_progress);
+ ASSERTCMP(object->n_ops, >=, object->n_exclusive);
+
+ ostate = object->state;
+ smp_rmb();
+
+ if (fscache_object_is_active(object)) {
+ op->object = object;
+ object->n_ops++;
+
+ if (object->n_exclusive > 0) {
+ atomic_inc(&op->usage);
+ list_add_tail(&op->pend_link, &object->pending_ops);
+ fscache_stat(&fscache_n_op_pend);
+ } else if (!list_empty(&object->pending_ops)) {
+ atomic_inc(&op->usage);
+ list_add_tail(&op->pend_link, &object->pending_ops);
+ fscache_stat(&fscache_n_op_pend);
+ fscache_start_operations(object);
+ } else {
+ ASSERTCMP(object->n_exclusive, ==, 0);
+ fscache_run_op(object, op);
+ }
+ ret = 0;
+ } else if (object->state == FSCACHE_OBJECT_CREATING) {
+ op->object = object;
+ object->n_ops++;
+ atomic_inc(&op->usage);
+ list_add_tail(&op->pend_link, &object->pending_ops);
+ fscache_stat(&fscache_n_op_pend);
+ ret = 0;
+ } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) {
+ fscache_report_unexpected_submission(object, op, ostate);
+ ASSERT(!fscache_object_is_active(object));
+ ret = -ENOBUFS;
+ } else {
+ ret = -ENOBUFS;
+ }
+
+ spin_unlock(&object->lock);
+ return ret;
+}
+
+/*
+ * queue an object for withdrawal on error, aborting all following asynchronous
+ * operations
+ */
+void fscache_abort_object(struct fscache_object *object)
+{
+ _enter("{OBJ%x}", object->debug_id);
+
+ fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR);
+}
+
+/*
+ * jump start the operation processing on an object
+ * - caller must hold object->lock
+ */
+void fscache_start_operations(struct fscache_object *object)
+{
+ struct fscache_operation *op;
+ bool stop = false;
+
+ while (!list_empty(&object->pending_ops) && !stop) {
+ op = list_entry(object->pending_ops.next,
+ struct fscache_operation, pend_link);
+
+ if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
+ if (object->n_in_progress > 0)
+ break;
+ stop = true;
+ }
+ list_del_init(&op->pend_link);
+ object->n_in_progress++;
+
+ if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
+ wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
+ if (op->processor)
+ fscache_enqueue_operation(op);
+
+ /* the pending queue was holding a ref on the object */
+ fscache_put_operation(op);
+ }
+
+ ASSERTCMP(object->n_in_progress, <=, object->n_ops);
+
+ _debug("woke %d ops on OBJ%x",
+ object->n_in_progress, object->debug_id);
+}
+
+/*
+ * release an operation
+ * - queues pending ops if this is the last in-progress op
+ */
+void fscache_put_operation(struct fscache_operation *op)
+{
+ struct fscache_object *object;
+ struct fscache_cache *cache;
+
+ _enter("{OBJ%x OP%x,%d}",
+ op->object->debug_id, op->debug_id, atomic_read(&op->usage));
+
+ ASSERTCMP(atomic_read(&op->usage), >, 0);
+
+ if (!atomic_dec_and_test(&op->usage))
+ return;
+
+ _debug("PUT OP");
+ if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
+ BUG();
+
+ fscache_stat(&fscache_n_op_release);
+
+ if (op->release) {
+ op->release(op);
+ op->release = NULL;
+ }
+
+ object = op->object;
+
+ /* now... we may get called with the object spinlock held, so we
+ * complete the cleanup here only if we can immediately acquire the
+ * lock, and defer it otherwise */
+ if (!spin_trylock(&object->lock)) {
+ _debug("defer put");
+ fscache_stat(&fscache_n_op_deferred_release);
+
+ cache = object->cache;
+ spin_lock(&cache->op_gc_list_lock);
+ list_add_tail(&op->pend_link, &cache->op_gc_list);
+ spin_unlock(&cache->op_gc_list_lock);
+ schedule_work(&cache->op_gc);
+ _leave(" [defer]");
+ return;
+ }
+
+ if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
+ ASSERTCMP(object->n_exclusive, >, 0);
+ object->n_exclusive--;
+ }
+
+ ASSERTCMP(object->n_in_progress, >, 0);
+ object->n_in_progress--;
+ if (object->n_in_progress == 0)
+ fscache_start_operations(object);
+
+ ASSERTCMP(object->n_ops, >, 0);
+ object->n_ops--;
+ if (object->n_ops == 0)
+ fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED);
+
+ spin_unlock(&object->lock);
+
+ kfree(op);
+ _leave(" [done]");
+}
+EXPORT_SYMBOL(fscache_put_operation);
+
+/*
+ * garbage collect operations that have had their release deferred
+ */
+void fscache_operation_gc(struct work_struct *work)
+{
+ struct fscache_operation *op;
+ struct fscache_object *object;
+ struct fscache_cache *cache =
+ container_of(work, struct fscache_cache, op_gc);
+ int count = 0;
+
+ _enter("");
+
+ do {
+ spin_lock(&cache->op_gc_list_lock);
+ if (list_empty(&cache->op_gc_list)) {
+ spin_unlock(&cache->op_gc_list_lock);
+ break;
+ }
+
+ op = list_entry(cache->op_gc_list.next,
+ struct fscache_operation, pend_link);
+ list_del(&op->pend_link);
+ spin_unlock(&cache->op_gc_list_lock);
+
+ object = op->object;
+
+ _debug("GC DEFERRED REL OBJ%x OP%x",
+ object->debug_id, op->debug_id);
+ fscache_stat(&fscache_n_op_gc);
+
+ ASSERTCMP(atomic_read(&op->usage), ==, 0);
+
+ spin_lock(&object->lock);
+ if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
+ ASSERTCMP(object->n_exclusive, >, 0);
+ object->n_exclusive--;
+ }
+
+ ASSERTCMP(object->n_in_progress, >, 0);
+ object->n_in_progress--;
+ if (object->n_in_progress == 0)
+ fscache_start_operations(object);
+
+ ASSERTCMP(object->n_ops, >, 0);
+ object->n_ops--;
+ if (object->n_ops == 0)
+ fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED);
+
+ spin_unlock(&object->lock);
+
+ } while (count++ < 20);
+
+ if (!list_empty(&cache->op_gc_list))
+ schedule_work(&cache->op_gc);
+
+ _leave("");
+}
+
+/*
+ * allow the slow work item processor to get a ref on an operation
+ */
+static int fscache_op_get_ref(struct slow_work *work)
+{
+ struct fscache_operation *op =
+ container_of(work, struct fscache_operation, slow_work);
+
+ atomic_inc(&op->usage);
+ return 0;
+}
+
+/*
+ * allow the slow work item processor to discard a ref on an operation
+ */
+static void fscache_op_put_ref(struct slow_work *work)
+{
+ struct fscache_operation *op =
+ container_of(work, struct fscache_operation, slow_work);
+
+ fscache_put_operation(op);
+}
+
+/*
+ * execute an operation using the slow thread pool to provide processing context
+ * - the caller holds a ref to this object, so we don't need to hold one
+ */
+static void fscache_op_execute(struct slow_work *work)
+{
+ struct fscache_operation *op =
+ container_of(work, struct fscache_operation, slow_work);
+ unsigned long start;
+
+ _enter("{OBJ%x OP%x,%d}",
+ op->object->debug_id, op->debug_id, atomic_read(&op->usage));
+
+ ASSERT(op->processor != NULL);
+ start = jiffies;
+ op->processor(op);
+ fscache_hist(fscache_ops_histogram, start);
+
+ _leave("");
+}
+
+const struct slow_work_ops fscache_op_slow_work_ops = {
+ .get_ref = fscache_op_get_ref,
+ .put_ref = fscache_op_put_ref,
+ .execute = fscache_op_execute,
+};
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
new file mode 100644
index 000000000000..2568e0eb644f
--- /dev/null
+++ b/fs/fscache/page.c
@@ -0,0 +1,816 @@
+/* Cache page management and data I/O routines
+ *
+ * Copyright (C) 2004-2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define FSCACHE_DEBUG_LEVEL PAGE
+#include <linux/module.h>
+#include <linux/fscache-cache.h>
+#include <linux/buffer_head.h>
+#include <linux/pagevec.h>
+#include "internal.h"
+
+/*
+ * check to see if a page is being written to the cache
+ */
+bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page *page)
+{
+ void *val;
+
+ rcu_read_lock();
+ val = radix_tree_lookup(&cookie->stores, page->index);
+ rcu_read_unlock();
+
+ return val != NULL;
+}
+EXPORT_SYMBOL(__fscache_check_page_write);
+
+/*
+ * wait for a page to finish being written to the cache
+ */
+void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *page)
+{
+ wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0);
+
+ wait_event(*wq, !__fscache_check_page_write(cookie, page));
+}
+EXPORT_SYMBOL(__fscache_wait_on_page_write);
+
+/*
+ * note that a page has finished being written to the cache
+ */
+static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *page)
+{
+ struct page *xpage;
+
+ spin_lock(&cookie->lock);
+ xpage = radix_tree_delete(&cookie->stores, page->index);
+ spin_unlock(&cookie->lock);
+ ASSERT(xpage != NULL);
+
+ wake_up_bit(&cookie->flags, 0);
+}
+
+/*
+ * actually apply the changed attributes to a cache object
+ */
+static void fscache_attr_changed_op(struct fscache_operation *op)
+{
+ struct fscache_object *object = op->object;
+
+ _enter("{OBJ%x OP%x}", object->debug_id, op->debug_id);
+
+ fscache_stat(&fscache_n_attr_changed_calls);
+
+ if (fscache_object_is_active(object) &&
+ object->cache->ops->attr_changed(object) < 0)
+ fscache_abort_object(object);
+
+ _leave("");
+}
+
+/*
+ * notification that the attributes on an object have changed
+ */
+int __fscache_attr_changed(struct fscache_cookie *cookie)
+{
+ struct fscache_operation *op;
+ struct fscache_object *object;
+
+ _enter("%p", cookie);
+
+ ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
+
+ fscache_stat(&fscache_n_attr_changed);
+
+ op = kzalloc(sizeof(*op), GFP_KERNEL);
+ if (!op) {
+ fscache_stat(&fscache_n_attr_changed_nomem);
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ fscache_operation_init(op, NULL);
+ fscache_operation_init_slow(op, fscache_attr_changed_op);
+ op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE);
+
+ spin_lock(&cookie->lock);
+
+ if (hlist_empty(&cookie->backing_objects))
+ goto nobufs;
+ object = hlist_entry(cookie->backing_objects.first,
+ struct fscache_object, cookie_link);
+
+ if (fscache_submit_exclusive_op(object, op) < 0)
+ goto nobufs;
+ spin_unlock(&cookie->lock);
+ fscache_stat(&fscache_n_attr_changed_ok);
+ fscache_put_operation(op);
+ _leave(" = 0");
+ return 0;
+
+nobufs:
+ spin_unlock(&cookie->lock);
+ kfree(op);
+ fscache_stat(&fscache_n_attr_changed_nobufs);
+ _leave(" = %d", -ENOBUFS);
+ return -ENOBUFS;
+}
+EXPORT_SYMBOL(__fscache_attr_changed);
+
+/*
+ * handle secondary execution given to a retrieval op on behalf of the
+ * cache
+ */
+static void fscache_retrieval_work(struct work_struct *work)
+{
+ struct fscache_retrieval *op =
+ container_of(work, struct fscache_retrieval, op.fast_work);
+ unsigned long start;
+
+ _enter("{OP%x}", op->op.debug_id);
+
+ start = jiffies;
+ op->op.processor(&op->op);
+ fscache_hist(fscache_ops_histogram, start);
+ fscache_put_operation(&op->op);
+}
+
+/*
+ * release a retrieval op reference
+ */
+static void fscache_release_retrieval_op(struct fscache_operation *_op)
+{
+ struct fscache_retrieval *op =
+ container_of(_op, struct fscache_retrieval, op);
+
+ _enter("{OP%x}", op->op.debug_id);
+
+ fscache_hist(fscache_retrieval_histogram, op->start_time);
+ if (op->context)
+ fscache_put_context(op->op.object->cookie, op->context);
+
+ _leave("");
+}
+
+/*
+ * allocate a retrieval op
+ */
+static struct fscache_retrieval *fscache_alloc_retrieval(
+ struct address_space *mapping,
+ fscache_rw_complete_t end_io_func,
+ void *context)
+{
+ struct fscache_retrieval *op;
+
+ /* allocate a retrieval operation and attempt to submit it */
+ op = kzalloc(sizeof(*op), GFP_NOIO);
+ if (!op) {
+ fscache_stat(&fscache_n_retrievals_nomem);
+ return NULL;
+ }
+
+ fscache_operation_init(&op->op, fscache_release_retrieval_op);
+ op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING);
+ op->mapping = mapping;
+ op->end_io_func = end_io_func;
+ op->context = context;
+ op->start_time = jiffies;
+ INIT_WORK(&op->op.fast_work, fscache_retrieval_work);
+ INIT_LIST_HEAD(&op->to_do);
+ return op;
+}
+
+/*
+ * wait for a deferred lookup to complete
+ */
+static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
+{
+ unsigned long jif;
+
+ _enter("");
+
+ if (!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) {
+ _leave(" = 0 [imm]");
+ return 0;
+ }
+
+ fscache_stat(&fscache_n_retrievals_wait);
+
+ jif = jiffies;
+ if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
+ fscache_wait_bit_interruptible,
+ TASK_INTERRUPTIBLE) != 0) {
+ fscache_stat(&fscache_n_retrievals_intr);
+ _leave(" = -ERESTARTSYS");
+ return -ERESTARTSYS;
+ }
+
+ ASSERT(!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags));
+
+ smp_rmb();
+ fscache_hist(fscache_retrieval_delay_histogram, jif);
+ _leave(" = 0 [dly]");
+ return 0;
+}
+
+/*
+ * read a page from the cache or allocate a block in which to store it
+ * - we return:
+ * -ENOMEM - out of memory, nothing done
+ * -ERESTARTSYS - interrupted
+ * -ENOBUFS - no backing object available in which to cache the block
+ * -ENODATA - no data available in the backing object for this block
+ * 0 - dispatched a read - it'll call end_io_func() when finished
+ */
+int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
+ struct page *page,
+ fscache_rw_complete_t end_io_func,
+ void *context,
+ gfp_t gfp)
+{
+ struct fscache_retrieval *op;
+ struct fscache_object *object;
+ int ret;
+
+ _enter("%p,%p,,,", cookie, page);
+
+ fscache_stat(&fscache_n_retrievals);
+
+ if (hlist_empty(&cookie->backing_objects))
+ goto nobufs;
+
+ ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
+ ASSERTCMP(page, !=, NULL);
+
+ if (fscache_wait_for_deferred_lookup(cookie) < 0)
+ return -ERESTARTSYS;
+
+ op = fscache_alloc_retrieval(page->mapping, end_io_func, context);
+ if (!op) {
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ spin_lock(&cookie->lock);
+
+ if (hlist_empty(&cookie->backing_objects))
+ goto nobufs_unlock;
+ object = hlist_entry(cookie->backing_objects.first,
+ struct fscache_object, cookie_link);
+
+ ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP);
+
+ if (fscache_submit_op(object, &op->op) < 0)
+ goto nobufs_unlock;
+ spin_unlock(&cookie->lock);
+
+ fscache_stat(&fscache_n_retrieval_ops);
+
+ /* pin the netfs read context in case we need to do the actual netfs
+ * read because we've encountered a cache read failure */
+ fscache_get_context(object->cookie, op->context);
+
+ /* we wait for the operation to become active, and then process it
+ * *here*, in this thread, and not in the thread pool */
+ if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) {
+ _debug(">>> WT");
+ fscache_stat(&fscache_n_retrieval_op_waits);
+ wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
+ fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ _debug("<<< GO");
+ }
+
+ /* ask the cache to honour the operation */
+ if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) {
+ ret = object->cache->ops->allocate_page(op, page, gfp);
+ if (ret == 0)
+ ret = -ENODATA;
+ } else {
+ ret = object->cache->ops->read_or_alloc_page(op, page, gfp);
+ }
+
+ if (ret == -ENOMEM)
+ fscache_stat(&fscache_n_retrievals_nomem);
+ else if (ret == -ERESTARTSYS)
+ fscache_stat(&fscache_n_retrievals_intr);
+ else if (ret == -ENODATA)
+ fscache_stat(&fscache_n_retrievals_nodata);
+ else if (ret < 0)
+ fscache_stat(&fscache_n_retrievals_nobufs);
+ else
+ fscache_stat(&fscache_n_retrievals_ok);
+
+ fscache_put_retrieval(op);
+ _leave(" = %d", ret);
+ return ret;
+
+nobufs_unlock:
+ spin_unlock(&cookie->lock);
+ kfree(op);
+nobufs:
+ fscache_stat(&fscache_n_retrievals_nobufs);
+ _leave(" = -ENOBUFS");
+ return -ENOBUFS;
+}
+EXPORT_SYMBOL(__fscache_read_or_alloc_page);
+
+/*
+ * read a list of page from the cache or allocate a block in which to store
+ * them
+ * - we return:
+ * -ENOMEM - out of memory, some pages may be being read
+ * -ERESTARTSYS - interrupted, some pages may be being read
+ * -ENOBUFS - no backing object or space available in which to cache any
+ * pages not being read
+ * -ENODATA - no data available in the backing object for some or all of
+ * the pages
+ * 0 - dispatched a read on all pages
+ *
+ * end_io_func() will be called for each page read from the cache as it is
+ * finishes being read
+ *
+ * any pages for which a read is dispatched will be removed from pages and
+ * nr_pages
+ */
+int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages,
+ fscache_rw_complete_t end_io_func,
+ void *context,
+ gfp_t gfp)
+{
+ fscache_pages_retrieval_func_t func;
+ struct fscache_retrieval *op;
+ struct fscache_object *object;
+ int ret;
+
+ _enter("%p,,%d,,,", cookie, *nr_pages);
+
+ fscache_stat(&fscache_n_retrievals);
+
+ if (hlist_empty(&cookie->backing_objects))
+ goto nobufs;
+
+ ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
+ ASSERTCMP(*nr_pages, >, 0);
+ ASSERT(!list_empty(pages));
+
+ if (fscache_wait_for_deferred_lookup(cookie) < 0)
+ return -ERESTARTSYS;
+
+ op = fscache_alloc_retrieval(mapping, end_io_func, context);
+ if (!op)
+ return -ENOMEM;
+
+ spin_lock(&cookie->lock);
+
+ if (hlist_empty(&cookie->backing_objects))
+ goto nobufs_unlock;
+ object = hlist_entry(cookie->backing_objects.first,
+ struct fscache_object, cookie_link);
+
+ if (fscache_submit_op(object, &op->op) < 0)
+ goto nobufs_unlock;
+ spin_unlock(&cookie->lock);
+
+ fscache_stat(&fscache_n_retrieval_ops);
+
+ /* pin the netfs read context in case we need to do the actual netfs
+ * read because we've encountered a cache read failure */
+ fscache_get_context(object->cookie, op->context);
+
+ /* we wait for the operation to become active, and then process it
+ * *here*, in this thread, and not in the thread pool */
+ if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) {
+ _debug(">>> WT");
+ fscache_stat(&fscache_n_retrieval_op_waits);
+ wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
+ fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ _debug("<<< GO");
+ }
+
+ /* ask the cache to honour the operation */
+ if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags))
+ func = object->cache->ops->allocate_pages;
+ else
+ func = object->cache->ops->read_or_alloc_pages;
+ ret = func(op, pages, nr_pages, gfp);
+
+ if (ret == -ENOMEM)
+ fscache_stat(&fscache_n_retrievals_nomem);
+ else if (ret == -ERESTARTSYS)
+ fscache_stat(&fscache_n_retrievals_intr);
+ else if (ret == -ENODATA)
+ fscache_stat(&fscache_n_retrievals_nodata);
+ else if (ret < 0)
+ fscache_stat(&fscache_n_retrievals_nobufs);
+ else
+ fscache_stat(&fscache_n_retrievals_ok);
+
+ fscache_put_retrieval(op);
+ _leave(" = %d", ret);
+ return ret;
+
+nobufs_unlock:
+ spin_unlock(&cookie->lock);
+ kfree(op);
+nobufs:
+ fscache_stat(&fscache_n_retrievals_nobufs);
+ _leave(" = -ENOBUFS");
+ return -ENOBUFS;
+}
+EXPORT_SYMBOL(__fscache_read_or_alloc_pages);
+
+/*
+ * allocate a block in the cache on which to store a page
+ * - we return:
+ * -ENOMEM - out of memory, nothing done
+ * -ERESTARTSYS - interrupted
+ * -ENOBUFS - no backing object available in which to cache the block
+ * 0 - block allocated
+ */
+int __fscache_alloc_page(struct fscache_cookie *cookie,
+ struct page *page,
+ gfp_t gfp)
+{
+ struct fscache_retrieval *op;
+ struct fscache_object *object;
+ int ret;
+
+ _enter("%p,%p,,,", cookie, page);
+
+ fscache_stat(&fscache_n_allocs);
+
+ if (hlist_empty(&cookie->backing_objects))
+ goto nobufs;
+
+ ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
+ ASSERTCMP(page, !=, NULL);
+
+ if (fscache_wait_for_deferred_lookup(cookie) < 0)
+ return -ERESTARTSYS;
+
+ op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
+ if (!op)
+ return -ENOMEM;
+
+ spin_lock(&cookie->lock);
+
+ if (hlist_empty(&cookie->backing_objects))
+ goto nobufs_unlock;
+ object = hlist_entry(cookie->backing_objects.first,
+ struct fscache_object, cookie_link);
+
+ if (fscache_submit_op(object, &op->op) < 0)
+ goto nobufs_unlock;
+ spin_unlock(&cookie->lock);
+
+ fscache_stat(&fscache_n_alloc_ops);
+
+ if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) {
+ _debug(">>> WT");
+ fscache_stat(&fscache_n_alloc_op_waits);
+ wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
+ fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+ _debug("<<< GO");
+ }
+
+ /* ask the cache to honour the operation */
+ ret = object->cache->ops->allocate_page(op, page, gfp);
+
+ if (ret < 0)
+ fscache_stat(&fscache_n_allocs_nobufs);
+ else
+ fscache_stat(&fscache_n_allocs_ok);
+
+ fscache_put_retrieval(op);
+ _leave(" = %d", ret);
+ return ret;
+
+nobufs_unlock:
+ spin_unlock(&cookie->lock);
+ kfree(op);
+nobufs:
+ fscache_stat(&fscache_n_allocs_nobufs);
+ _leave(" = -ENOBUFS");
+ return -ENOBUFS;
+}
+EXPORT_SYMBOL(__fscache_alloc_page);
+
+/*
+ * release a write op reference
+ */
+static void fscache_release_write_op(struct fscache_operation *_op)
+{
+ _enter("{OP%x}", _op->debug_id);
+}
+
+/*
+ * perform the background storage of a page into the cache
+ */
+static void fscache_write_op(struct fscache_operation *_op)
+{
+ struct fscache_storage *op =
+ container_of(_op, struct fscache_storage, op);
+ struct fscache_object *object = op->op.object;
+ struct fscache_cookie *cookie = object->cookie;
+ struct page *page;
+ unsigned n;
+ void *results[1];
+ int ret;
+
+ _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
+
+ spin_lock(&cookie->lock);
+ spin_lock(&object->lock);
+
+ if (!fscache_object_is_active(object)) {
+ spin_unlock(&object->lock);
+ spin_unlock(&cookie->lock);
+ _leave("");
+ return;
+ }
+
+ fscache_stat(&fscache_n_store_calls);
+
+ /* find a page to store */
+ page = NULL;
+ n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, 1,
+ FSCACHE_COOKIE_PENDING_TAG);
+ if (n != 1)
+ goto superseded;
+ page = results[0];
+ _debug("gang %d [%lx]", n, page->index);
+ if (page->index > op->store_limit)
+ goto superseded;
+
+ radix_tree_tag_clear(&cookie->stores, page->index,
+ FSCACHE_COOKIE_PENDING_TAG);
+
+ spin_unlock(&object->lock);
+ spin_unlock(&cookie->lock);
+
+ if (page) {
+ ret = object->cache->ops->write_page(op, page);
+ fscache_end_page_write(cookie, page);
+ page_cache_release(page);
+ if (ret < 0)
+ fscache_abort_object(object);
+ else
+ fscache_enqueue_operation(&op->op);
+ }
+
+ _leave("");
+ return;
+
+superseded:
+ /* this writer is going away and there aren't any more things to
+ * write */
+ _debug("cease");
+ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
+ spin_unlock(&object->lock);
+ spin_unlock(&cookie->lock);
+ _leave("");
+}
+
+/*
+ * request a page be stored in the cache
+ * - returns:
+ * -ENOMEM - out of memory, nothing done
+ * -ENOBUFS - no backing object available in which to cache the page
+ * 0 - dispatched a write - it'll call end_io_func() when finished
+ *
+ * if the cookie still has a backing object at this point, that object can be
+ * in one of a few states with respect to storage processing:
+ *
+ * (1) negative lookup, object not yet created (FSCACHE_COOKIE_CREATING is
+ * set)
+ *
+ * (a) no writes yet (set FSCACHE_COOKIE_PENDING_FILL and queue deferred
+ * fill op)
+ *
+ * (b) writes deferred till post-creation (mark page for writing and
+ * return immediately)
+ *
+ * (2) negative lookup, object created, initial fill being made from netfs
+ * (FSCACHE_COOKIE_INITIAL_FILL is set)
+ *
+ * (a) fill point not yet reached this page (mark page for writing and
+ * return)
+ *
+ * (b) fill point passed this page (queue op to store this page)
+ *
+ * (3) object extant (queue op to store this page)
+ *
+ * any other state is invalid
+ */
+int __fscache_write_page(struct fscache_cookie *cookie,
+ struct page *page,
+ gfp_t gfp)
+{
+ struct fscache_storage *op;
+ struct fscache_object *object;
+ int ret;
+
+ _enter("%p,%x,", cookie, (u32) page->flags);
+
+ ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
+ ASSERT(PageFsCache(page));
+
+ fscache_stat(&fscache_n_stores);
+
+ op = kzalloc(sizeof(*op), GFP_NOIO);
+ if (!op)
+ goto nomem;
+
+ fscache_operation_init(&op->op, fscache_release_write_op);
+ fscache_operation_init_slow(&op->op, fscache_write_op);
+ op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING);
+
+ ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
+ if (ret < 0)
+ goto nomem_free;
+
+ ret = -ENOBUFS;
+ spin_lock(&cookie->lock);
+
+ if (hlist_empty(&cookie->backing_objects))
+ goto nobufs;
+ object = hlist_entry(cookie->backing_objects.first,
+ struct fscache_object, cookie_link);
+ if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
+ goto nobufs;
+
+ /* add the page to the pending-storage radix tree on the backing
+ * object */
+ spin_lock(&object->lock);
+
+ _debug("store limit %llx", (unsigned long long) object->store_limit);
+
+ ret = radix_tree_insert(&cookie->stores, page->index, page);
+ if (ret < 0) {
+ if (ret == -EEXIST)
+ goto already_queued;
+ _debug("insert failed %d", ret);
+ goto nobufs_unlock_obj;
+ }
+
+ radix_tree_tag_set(&cookie->stores, page->index,
+ FSCACHE_COOKIE_PENDING_TAG);
+ page_cache_get(page);
+
+ /* we only want one writer at a time, but we do need to queue new
+ * writers after exclusive ops */
+ if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags))
+ goto already_pending;
+
+ spin_unlock(&object->lock);
+
+ op->op.debug_id = atomic_inc_return(&fscache_op_debug_id);
+ op->store_limit = object->store_limit;
+
+ if (fscache_submit_op(object, &op->op) < 0)
+ goto submit_failed;
+
+ spin_unlock(&cookie->lock);
+ radix_tree_preload_end();
+ fscache_stat(&fscache_n_store_ops);
+ fscache_stat(&fscache_n_stores_ok);
+
+ /* the slow work queue now carries its own ref on the object */
+ fscache_put_operation(&op->op);
+ _leave(" = 0");
+ return 0;
+
+already_queued:
+ fscache_stat(&fscache_n_stores_again);
+already_pending:
+ spin_unlock(&object->lock);
+ spin_unlock(&cookie->lock);
+ radix_tree_preload_end();
+ kfree(op);
+ fscache_stat(&fscache_n_stores_ok);
+ _leave(" = 0");
+ return 0;
+
+submit_failed:
+ radix_tree_delete(&cookie->stores, page->index);
+ page_cache_release(page);
+ ret = -ENOBUFS;
+ goto nobufs;
+
+nobufs_unlock_obj:
+ spin_unlock(&object->lock);
+nobufs:
+ spin_unlock(&cookie->lock);
+ radix_tree_preload_end();
+ kfree(op);
+ fscache_stat(&fscache_n_stores_nobufs);
+ _leave(" = -ENOBUFS");
+ return -ENOBUFS;
+
+nomem_free:
+ kfree(op);
+nomem:
+ fscache_stat(&fscache_n_stores_oom);
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(__fscache_write_page);
+
+/*
+ * remove a page from the cache
+ */
+void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page)
+{
+ struct fscache_object *object;
+
+ _enter(",%p", page);
+
+ ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
+ ASSERTCMP(page, !=, NULL);
+
+ fscache_stat(&fscache_n_uncaches);
+
+ /* cache withdrawal may beat us to it */
+ if (!PageFsCache(page))
+ goto done;
+
+ /* get the object */
+ spin_lock(&cookie->lock);
+
+ if (hlist_empty(&cookie->backing_objects)) {
+ ClearPageFsCache(page);
+ goto done_unlock;
+ }
+
+ object = hlist_entry(cookie->backing_objects.first,
+ struct fscache_object, cookie_link);
+
+ /* there might now be stuff on disk we could read */
+ clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
+
+ /* only invoke the cache backend if we managed to mark the page
+ * uncached here; this deals with synchronisation vs withdrawal */
+ if (TestClearPageFsCache(page) &&
+ object->cache->ops->uncache_page) {
+ /* the cache backend releases the cookie lock */
+ object->cache->ops->uncache_page(object, page);
+ goto done;
+ }
+
+done_unlock:
+ spin_unlock(&cookie->lock);
+done:
+ _leave("");
+}
+EXPORT_SYMBOL(__fscache_uncache_page);
+
+/**
+ * fscache_mark_pages_cached - Mark pages as being cached
+ * @op: The retrieval op pages are being marked for
+ * @pagevec: The pages to be marked
+ *
+ * Mark a bunch of netfs pages as being cached. After this is called,
+ * the netfs must call fscache_uncache_page() to remove the mark.
+ */
+void fscache_mark_pages_cached(struct fscache_retrieval *op,
+ struct pagevec *pagevec)
+{
+ struct fscache_cookie *cookie = op->op.object->cookie;
+ unsigned long loop;
+
+#ifdef CONFIG_FSCACHE_STATS
+ atomic_add(pagevec->nr, &fscache_n_marks);
+#endif
+
+ for (loop = 0; loop < pagevec->nr; loop++) {
+ struct page *page = pagevec->pages[loop];
+
+ _debug("- mark %p{%lx}", page, page->index);
+ if (TestSetPageFsCache(page)) {
+ static bool once_only;
+ if (!once_only) {
+ once_only = true;
+ printk(KERN_WARNING "FS-Cache:"
+ " Cookie type %s marked page %lx"
+ " multiple times\n",
+ cookie->def->name, page->index);
+ }
+ }
+ }
+
+ if (cookie->def->mark_pages_cached)
+ cookie->def->mark_pages_cached(cookie->netfs_data,
+ op->mapping, pagevec);
+ pagevec_reinit(pagevec);
+}
+EXPORT_SYMBOL(fscache_mark_pages_cached);
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c
new file mode 100644
index 000000000000..beeab44bc31a
--- /dev/null
+++ b/fs/fscache/proc.c
@@ -0,0 +1,68 @@
+/* FS-Cache statistics viewing interface
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define FSCACHE_DEBUG_LEVEL OPERATION
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include "internal.h"
+
+/*
+ * initialise the /proc/fs/fscache/ directory
+ */
+int __init fscache_proc_init(void)
+{
+ _enter("");
+
+ if (!proc_mkdir("fs/fscache", NULL))
+ goto error_dir;
+
+#ifdef CONFIG_FSCACHE_STATS
+ if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL,
+ &fscache_stats_fops))
+ goto error_stats;
+#endif
+
+#ifdef CONFIG_FSCACHE_HISTOGRAM
+ if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL,
+ &fscache_histogram_fops))
+ goto error_histogram;
+#endif
+
+ _leave(" = 0");
+ return 0;
+
+#ifdef CONFIG_FSCACHE_HISTOGRAM
+error_histogram:
+#endif
+#ifdef CONFIG_FSCACHE_STATS
+ remove_proc_entry("fs/fscache/stats", NULL);
+error_stats:
+#endif
+ remove_proc_entry("fs/fscache", NULL);
+error_dir:
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+}
+
+/*
+ * clean up the /proc/fs/fscache/ directory
+ */
+void fscache_proc_cleanup(void)
+{
+#ifdef CONFIG_FSCACHE_HISTOGRAM
+ remove_proc_entry("fs/fscache/histogram", NULL);
+#endif
+#ifdef CONFIG_FSCACHE_STATS
+ remove_proc_entry("fs/fscache/stats", NULL);
+#endif
+ remove_proc_entry("fs/fscache", NULL);
+}
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
new file mode 100644
index 000000000000..65deb99e756b
--- /dev/null
+++ b/fs/fscache/stats.c
@@ -0,0 +1,212 @@
+/* FS-Cache statistics
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define FSCACHE_DEBUG_LEVEL THREAD
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include "internal.h"
+
+/*
+ * operation counters
+ */
+atomic_t fscache_n_op_pend;
+atomic_t fscache_n_op_run;
+atomic_t fscache_n_op_enqueue;
+atomic_t fscache_n_op_requeue;
+atomic_t fscache_n_op_deferred_release;
+atomic_t fscache_n_op_release;
+atomic_t fscache_n_op_gc;
+
+atomic_t fscache_n_attr_changed;
+atomic_t fscache_n_attr_changed_ok;
+atomic_t fscache_n_attr_changed_nobufs;
+atomic_t fscache_n_attr_changed_nomem;
+atomic_t fscache_n_attr_changed_calls;
+
+atomic_t fscache_n_allocs;
+atomic_t fscache_n_allocs_ok;
+atomic_t fscache_n_allocs_wait;
+atomic_t fscache_n_allocs_nobufs;
+atomic_t fscache_n_alloc_ops;
+atomic_t fscache_n_alloc_op_waits;
+
+atomic_t fscache_n_retrievals;
+atomic_t fscache_n_retrievals_ok;
+atomic_t fscache_n_retrievals_wait;
+atomic_t fscache_n_retrievals_nodata;
+atomic_t fscache_n_retrievals_nobufs;
+atomic_t fscache_n_retrievals_intr;
+atomic_t fscache_n_retrievals_nomem;
+atomic_t fscache_n_retrieval_ops;
+atomic_t fscache_n_retrieval_op_waits;
+
+atomic_t fscache_n_stores;
+atomic_t fscache_n_stores_ok;
+atomic_t fscache_n_stores_again;
+atomic_t fscache_n_stores_nobufs;
+atomic_t fscache_n_stores_oom;
+atomic_t fscache_n_store_ops;
+atomic_t fscache_n_store_calls;
+
+atomic_t fscache_n_marks;
+atomic_t fscache_n_uncaches;
+
+atomic_t fscache_n_acquires;
+atomic_t fscache_n_acquires_null;
+atomic_t fscache_n_acquires_no_cache;
+atomic_t fscache_n_acquires_ok;
+atomic_t fscache_n_acquires_nobufs;
+atomic_t fscache_n_acquires_oom;
+
+atomic_t fscache_n_updates;
+atomic_t fscache_n_updates_null;
+atomic_t fscache_n_updates_run;
+
+atomic_t fscache_n_relinquishes;
+atomic_t fscache_n_relinquishes_null;
+atomic_t fscache_n_relinquishes_waitcrt;
+
+atomic_t fscache_n_cookie_index;
+atomic_t fscache_n_cookie_data;
+atomic_t fscache_n_cookie_special;
+
+atomic_t fscache_n_object_alloc;
+atomic_t fscache_n_object_no_alloc;
+atomic_t fscache_n_object_lookups;
+atomic_t fscache_n_object_lookups_negative;
+atomic_t fscache_n_object_lookups_positive;
+atomic_t fscache_n_object_created;
+atomic_t fscache_n_object_avail;
+atomic_t fscache_n_object_dead;
+
+atomic_t fscache_n_checkaux_none;
+atomic_t fscache_n_checkaux_okay;
+atomic_t fscache_n_checkaux_update;
+atomic_t fscache_n_checkaux_obsolete;
+
+/*
+ * display the general statistics
+ */
+static int fscache_stats_show(struct seq_file *m, void *v)
+{
+ seq_puts(m, "FS-Cache statistics\n");
+
+ seq_printf(m, "Cookies: idx=%u dat=%u spc=%u\n",
+ atomic_read(&fscache_n_cookie_index),
+ atomic_read(&fscache_n_cookie_data),
+ atomic_read(&fscache_n_cookie_special));
+
+ seq_printf(m, "Objects: alc=%u nal=%u avl=%u ded=%u\n",
+ atomic_read(&fscache_n_object_alloc),
+ atomic_read(&fscache_n_object_no_alloc),
+ atomic_read(&fscache_n_object_avail),
+ atomic_read(&fscache_n_object_dead));
+ seq_printf(m, "ChkAux : non=%u ok=%u upd=%u obs=%u\n",
+ atomic_read(&fscache_n_checkaux_none),
+ atomic_read(&fscache_n_checkaux_okay),
+ atomic_read(&fscache_n_checkaux_update),
+ atomic_read(&fscache_n_checkaux_obsolete));
+
+ seq_printf(m, "Pages : mrk=%u unc=%u\n",
+ atomic_read(&fscache_n_marks),
+ atomic_read(&fscache_n_uncaches));
+
+ seq_printf(m, "Acquire: n=%u nul=%u noc=%u ok=%u nbf=%u"
+ " oom=%u\n",
+ atomic_read(&fscache_n_acquires),
+ atomic_read(&fscache_n_acquires_null),
+ atomic_read(&fscache_n_acquires_no_cache),
+ atomic_read(&fscache_n_acquires_ok),
+ atomic_read(&fscache_n_acquires_nobufs),
+ atomic_read(&fscache_n_acquires_oom));
+
+ seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u\n",
+ atomic_read(&fscache_n_object_lookups),
+ atomic_read(&fscache_n_object_lookups_negative),
+ atomic_read(&fscache_n_object_lookups_positive),
+ atomic_read(&fscache_n_object_created));
+
+ seq_printf(m, "Updates: n=%u nul=%u run=%u\n",
+ atomic_read(&fscache_n_updates),
+ atomic_read(&fscache_n_updates_null),
+ atomic_read(&fscache_n_updates_run));
+
+ seq_printf(m, "Relinqs: n=%u nul=%u wcr=%u\n",
+ atomic_read(&fscache_n_relinquishes),
+ atomic_read(&fscache_n_relinquishes_null),
+ atomic_read(&fscache_n_relinquishes_waitcrt));
+
+ seq_printf(m, "AttrChg: n=%u ok=%u nbf=%u oom=%u run=%u\n",
+ atomic_read(&fscache_n_attr_changed),
+ atomic_read(&fscache_n_attr_changed_ok),
+ atomic_read(&fscache_n_attr_changed_nobufs),
+ atomic_read(&fscache_n_attr_changed_nomem),
+ atomic_read(&fscache_n_attr_changed_calls));
+
+ seq_printf(m, "Allocs : n=%u ok=%u wt=%u nbf=%u\n",
+ atomic_read(&fscache_n_allocs),
+ atomic_read(&fscache_n_allocs_ok),
+ atomic_read(&fscache_n_allocs_wait),
+ atomic_read(&fscache_n_allocs_nobufs));
+ seq_printf(m, "Allocs : ops=%u owt=%u\n",
+ atomic_read(&fscache_n_alloc_ops),
+ atomic_read(&fscache_n_alloc_op_waits));
+
+ seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u"
+ " int=%u oom=%u\n",
+ atomic_read(&fscache_n_retrievals),
+ atomic_read(&fscache_n_retrievals_ok),
+ atomic_read(&fscache_n_retrievals_wait),
+ atomic_read(&fscache_n_retrievals_nodata),
+ atomic_read(&fscache_n_retrievals_nobufs),
+ atomic_read(&fscache_n_retrievals_intr),
+ atomic_read(&fscache_n_retrievals_nomem));
+ seq_printf(m, "Retrvls: ops=%u owt=%u\n",
+ atomic_read(&fscache_n_retrieval_ops),
+ atomic_read(&fscache_n_retrieval_op_waits));
+
+ seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n",
+ atomic_read(&fscache_n_stores),
+ atomic_read(&fscache_n_stores_ok),
+ atomic_read(&fscache_n_stores_again),
+ atomic_read(&fscache_n_stores_nobufs),
+ atomic_read(&fscache_n_stores_oom));
+ seq_printf(m, "Stores : ops=%u run=%u\n",
+ atomic_read(&fscache_n_store_ops),
+ atomic_read(&fscache_n_store_calls));
+
+ seq_printf(m, "Ops : pend=%u run=%u enq=%u\n",
+ atomic_read(&fscache_n_op_pend),
+ atomic_read(&fscache_n_op_run),
+ atomic_read(&fscache_n_op_enqueue));
+ seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n",
+ atomic_read(&fscache_n_op_deferred_release),
+ atomic_read(&fscache_n_op_release),
+ atomic_read(&fscache_n_op_gc));
+ return 0;
+}
+
+/*
+ * open "/proc/fs/fscache/stats" allowing provision of a statistical summary
+ */
+static int fscache_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, fscache_stats_show, NULL);
+}
+
+const struct file_operations fscache_stats_fops = {
+ .owner = THIS_MODULE,
+ .open = fscache_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 36fe20d6eba2..e67f3ec07736 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -84,3 +84,11 @@ config ROOT_NFS
<file:Documentation/filesystems/nfsroot.txt>.
Most people say N here.
+
+config NFS_FSCACHE
+ bool "Provide NFS client caching support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y
+ help
+ Say Y here if you want NFS data to be cached locally on disc through
+ the general filesystem cache manager
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index ac6170c594a3..845159814de2 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -15,3 +15,4 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
callback.o callback_xdr.o callback_proc.o \
nfs4namespace.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
+nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index aba38017bdef..75c9cd2aa119 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -45,6 +45,7 @@
#include "delegation.h"
#include "iostat.h"
#include "internal.h"
+#include "fscache.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
@@ -154,6 +155,8 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
if (!IS_ERR(cred))
clp->cl_machine_cred = cred;
+ nfs_fscache_get_client_cookie(clp);
+
return clp;
error_3:
@@ -187,6 +190,8 @@ static void nfs_free_client(struct nfs_client *clp)
nfs4_shutdown_client(clp);
+ nfs_fscache_release_client_cookie(clp);
+
/* -EIO all pending I/O */
if (!IS_ERR(clp->cl_rpcclient))
rpc_shutdown_client(clp->cl_rpcclient);
@@ -760,6 +765,7 @@ static int nfs_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = data->flags;
+ server->options = data->options;
if (data->rsize)
server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1148,6 +1154,7 @@ static int nfs4_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = data->flags;
server->caps |= NFS_CAP_ATOMIC_OPEN;
+ server->options = data->options;
/* Get a client record */
error = nfs4_set_client(server,
@@ -1559,7 +1566,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
/* display header on line 1 */
if (v == &nfs_volume_list) {
- seq_puts(m, "NV SERVER PORT DEV FSID\n");
+ seq_puts(m, "NV SERVER PORT DEV FSID FSC\n");
return 0;
}
/* display one transport per line on subsequent lines */
@@ -1573,12 +1580,13 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
- seq_printf(m, "v%u %s %s %-7s %-17s\n",
+ seq_printf(m, "v%u %s %s %-7s %-17s %s\n",
clp->rpc_ops->version,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
dev,
- fsid);
+ fsid,
+ nfs_server_fscache_state(server));
return 0;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0abf3f331f56..3523b895eb4b 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -35,6 +35,7 @@
#include "delegation.h"
#include "internal.h"
#include "iostat.h"
+#include "fscache.h"
#define NFSDBG_FACILITY NFSDBG_FILE
@@ -409,6 +410,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
return copied;
}
+/*
+ * Partially or wholly invalidate a page
+ * - Release the private state associated with a page if undergoing complete
+ * page invalidation
+ * - Called if either PG_private or PG_fscache is set on the page
+ * - Caller holds page lock
+ */
static void nfs_invalidate_page(struct page *page, unsigned long offset)
{
dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset);
@@ -417,23 +425,43 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
return;
/* Cancel any unstarted writes on this page */
nfs_wb_page_cancel(page->mapping->host, page);
+
+ nfs_fscache_invalidate_page(page, page->mapping->host);
}
+/*
+ * Attempt to release the private state associated with a page
+ * - Called if either PG_private or PG_fscache is set on the page
+ * - Caller holds page lock
+ * - Return true (may release page) or false (may not)
+ */
static int nfs_release_page(struct page *page, gfp_t gfp)
{
dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
/* If PagePrivate() is set, then the page is not freeable */
- return 0;
+ if (PagePrivate(page))
+ return 0;
+ return nfs_fscache_release_page(page, gfp);
}
+/*
+ * Attempt to clear the private state associated with a page when an error
+ * occurs that requires the cached contents of an inode to be written back or
+ * destroyed
+ * - Called if either PG_private or fscache is set on the page
+ * - Caller holds page lock
+ * - Return 0 if successful, -error otherwise
+ */
static int nfs_launder_page(struct page *page)
{
struct inode *inode = page->mapping->host;
+ struct nfs_inode *nfsi = NFS_I(inode);
dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n",
inode->i_ino, (long long)page_offset(page));
+ nfs_fscache_wait_on_page_write(nfsi, page);
return nfs_wb_page(inode, page);
}
@@ -451,6 +479,11 @@ const struct address_space_operations nfs_file_aops = {
.launder_page = nfs_launder_page,
};
+/*
+ * Notification that a PTE pointing to an NFS page is about to be made
+ * writable, implying that someone is about to modify the page through a
+ * shared-writable mapping
+ */
static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = vmf->page;
@@ -465,6 +498,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
filp->f_mapping->host->i_ino,
(long long)page_offset(page));
+ /* make sure the cache has finished storing the page */
+ nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
+
lock_page(page);
mapping = page->mapping;
if (mapping != dentry->d_inode->i_mapping)
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c
new file mode 100644
index 000000000000..5b1006480bc2
--- /dev/null
+++ b/fs/nfs/fscache-index.c
@@ -0,0 +1,337 @@
+/* NFS FS-Cache index structure definition
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/in6.h>
+
+#include "internal.h"
+#include "fscache.h"
+
+#define NFSDBG_FACILITY NFSDBG_FSCACHE
+
+/*
+ * Define the NFS filesystem for FS-Cache. Upon registration FS-Cache sticks
+ * the cookie for the top-level index object for NFS into here. The top-level
+ * index can than have other cache objects inserted into it.
+ */
+struct fscache_netfs nfs_fscache_netfs = {
+ .name = "nfs",
+ .version = 0,
+};
+
+/*
+ * Register NFS for caching
+ */
+int nfs_fscache_register(void)
+{
+ return fscache_register_netfs(&nfs_fscache_netfs);
+}
+
+/*
+ * Unregister NFS for caching
+ */
+void nfs_fscache_unregister(void)
+{
+ fscache_unregister_netfs(&nfs_fscache_netfs);
+}
+
+/*
+ * Layout of the key for an NFS server cache object.
+ */
+struct nfs_server_key {
+ uint16_t nfsversion; /* NFS protocol version */
+ uint16_t family; /* address family */
+ uint16_t port; /* IP port */
+ union {
+ struct in_addr ipv4_addr; /* IPv4 address */
+ struct in6_addr ipv6_addr; /* IPv6 address */
+ } addr[0];
+};
+
+/*
+ * Generate a key to describe a server in the main NFS index
+ * - We return the length of the key, or 0 if we can't generate one
+ */
+static uint16_t nfs_server_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ const struct nfs_client *clp = cookie_netfs_data;
+ const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr;
+ const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr;
+ struct nfs_server_key *key = buffer;
+ uint16_t len = sizeof(struct nfs_server_key);
+
+ key->nfsversion = clp->rpc_ops->version;
+ key->family = clp->cl_addr.ss_family;
+
+ memset(key, 0, len);
+
+ switch (clp->cl_addr.ss_family) {
+ case AF_INET:
+ key->port = sin->sin_port;
+ key->addr[0].ipv4_addr = sin->sin_addr;
+ len += sizeof(key->addr[0].ipv4_addr);
+ break;
+
+ case AF_INET6:
+ key->port = sin6->sin6_port;
+ key->addr[0].ipv6_addr = sin6->sin6_addr;
+ len += sizeof(key->addr[0].ipv6_addr);
+ break;
+
+ default:
+ printk(KERN_WARNING "NFS: Unknown network family '%d'\n",
+ clp->cl_addr.ss_family);
+ len = 0;
+ break;
+ }
+
+ return len;
+}
+
+/*
+ * Define the server object for FS-Cache. This is used to describe a server
+ * object to fscache_acquire_cookie(). It is keyed by the NFS protocol and
+ * server address parameters.
+ */
+const struct fscache_cookie_def nfs_fscache_server_index_def = {
+ .name = "NFS.server",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = nfs_server_get_key,
+};
+
+/*
+ * Generate a key to describe a superblock key in the main NFS index
+ */
+static uint16_t nfs_super_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ const struct nfs_fscache_key *key;
+ const struct nfs_server *nfss = cookie_netfs_data;
+ uint16_t len;
+
+ key = nfss->fscache_key;
+ len = sizeof(key->key) + key->key.uniq_len;
+ if (len > bufmax) {
+ len = 0;
+ } else {
+ memcpy(buffer, &key->key, sizeof(key->key));
+ memcpy(buffer + sizeof(key->key),
+ key->key.uniquifier, key->key.uniq_len);
+ }
+
+ return len;
+}
+
+/*
+ * Define the superblock object for FS-Cache. This is used to describe a
+ * superblock object to fscache_acquire_cookie(). It is keyed by all the NFS
+ * parameters that might cause a separate superblock.
+ */
+const struct fscache_cookie_def nfs_fscache_super_index_def = {
+ .name = "NFS.super",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = nfs_super_get_key,
+};
+
+/*
+ * Definition of the auxiliary data attached to NFS inode storage objects
+ * within the cache.
+ *
+ * The contents of this struct are recorded in the on-disk local cache in the
+ * auxiliary data attached to the data storage object backing an inode. This
+ * permits coherency to be managed when a new inode binds to an already extant
+ * cache object.
+ */
+struct nfs_fscache_inode_auxdata {
+ struct timespec mtime;
+ struct timespec ctime;
+ loff_t size;
+ u64 change_attr;
+};
+
+/*
+ * Generate a key to describe an NFS inode in an NFS server's index
+ */
+static uint16_t nfs_fscache_inode_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ const struct nfs_inode *nfsi = cookie_netfs_data;
+ uint16_t nsize;
+
+ /* use the inode's NFS filehandle as the key */
+ nsize = nfsi->fh.size;
+ memcpy(buffer, nfsi->fh.data, nsize);
+ return nsize;
+}
+
+/*
+ * Get certain file attributes from the netfs data
+ * - This function can be absent for an index
+ * - Not permitted to return an error
+ * - The netfs data from the cookie being used as the source is presented
+ */
+static void nfs_fscache_inode_get_attr(const void *cookie_netfs_data,
+ uint64_t *size)
+{
+ const struct nfs_inode *nfsi = cookie_netfs_data;
+
+ *size = nfsi->vfs_inode.i_size;
+}
+
+/*
+ * Get the auxiliary data from netfs data
+ * - This function can be absent if the index carries no state data
+ * - Should store the auxiliary data in the buffer
+ * - Should return the amount of amount stored
+ * - Not permitted to return an error
+ * - The netfs data from the cookie being used as the source is presented
+ */
+static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ struct nfs_fscache_inode_auxdata auxdata;
+ const struct nfs_inode *nfsi = cookie_netfs_data;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.size = nfsi->vfs_inode.i_size;
+ auxdata.mtime = nfsi->vfs_inode.i_mtime;
+ auxdata.ctime = nfsi->vfs_inode.i_ctime;
+
+ if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
+ auxdata.change_attr = nfsi->change_attr;
+
+ if (bufmax > sizeof(auxdata))
+ bufmax = sizeof(auxdata);
+
+ memcpy(buffer, &auxdata, bufmax);
+ return bufmax;
+}
+
+/*
+ * Consult the netfs about the state of an object
+ * - This function can be absent if the index carries no state data
+ * - The netfs data from the cookie being used as the target is
+ * presented, as is the auxiliary data
+ */
+static
+enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data,
+ const void *data,
+ uint16_t datalen)
+{
+ struct nfs_fscache_inode_auxdata auxdata;
+ struct nfs_inode *nfsi = cookie_netfs_data;
+
+ if (datalen != sizeof(auxdata))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.size = nfsi->vfs_inode.i_size;
+ auxdata.mtime = nfsi->vfs_inode.i_mtime;
+ auxdata.ctime = nfsi->vfs_inode.i_ctime;
+
+ if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
+ auxdata.change_attr = nfsi->change_attr;
+
+ if (memcmp(data, &auxdata, datalen) != 0)
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ return FSCACHE_CHECKAUX_OKAY;
+}
+
+/*
+ * Indication from FS-Cache that the cookie is no longer cached
+ * - This function is called when the backing store currently caching a cookie
+ * is removed
+ * - The netfs should use this to clean up any markers indicating cached pages
+ * - This is mandatory for any object that may have data
+ */
+static void nfs_fscache_inode_now_uncached(void *cookie_netfs_data)
+{
+ struct nfs_inode *nfsi = cookie_netfs_data;
+ struct pagevec pvec;
+ pgoff_t first;
+ int loop, nr_pages;
+
+ pagevec_init(&pvec, 0);
+ first = 0;
+
+ dprintk("NFS: nfs_inode_now_uncached: nfs_inode 0x%p\n", nfsi);
+
+ for (;;) {
+ /* grab a bunch of pages to unmark */
+ nr_pages = pagevec_lookup(&pvec,
+ nfsi->vfs_inode.i_mapping,
+ first,
+ PAGEVEC_SIZE - pagevec_count(&pvec));
+ if (!nr_pages)
+ break;
+
+ for (loop = 0; loop < nr_pages; loop++)
+ ClearPageFsCache(pvec.pages[loop]);
+
+ first = pvec.pages[nr_pages - 1]->index + 1;
+
+ pvec.nr = nr_pages;
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+}
+
+/*
+ * Get an extra reference on a read context.
+ * - This function can be absent if the completion function doesn't require a
+ * context.
+ * - The read context is passed back to NFS in the event that a data read on the
+ * cache fails with EIO - in which case the server must be contacted to
+ * retrieve the data, which requires the read context for security.
+ */
+static void nfs_fh_get_context(void *cookie_netfs_data, void *context)
+{
+ get_nfs_open_context(context);
+}
+
+/*
+ * Release an extra reference on a read context.
+ * - This function can be absent if the completion function doesn't require a
+ * context.
+ */
+static void nfs_fh_put_context(void *cookie_netfs_data, void *context)
+{
+ if (context)
+ put_nfs_open_context(context);
+}
+
+/*
+ * Define the inode object for FS-Cache. This is used to describe an inode
+ * object to fscache_acquire_cookie(). It is keyed by the NFS file handle for
+ * an inode.
+ *
+ * Coherency is managed by comparing the copies of i_size, i_mtime and i_ctime
+ * held in the cache auxiliary data for the data storage object with those in
+ * the inode struct in memory.
+ */
+const struct fscache_cookie_def nfs_fscache_inode_object_def = {
+ .name = "NFS.fh",
+ .type = FSCACHE_COOKIE_TYPE_DATAFILE,
+ .get_key = nfs_fscache_inode_get_key,
+ .get_attr = nfs_fscache_inode_get_attr,
+ .get_aux = nfs_fscache_inode_get_aux,
+ .check_aux = nfs_fscache_inode_check_aux,
+ .now_uncached = nfs_fscache_inode_now_uncached,
+ .get_context = nfs_fh_get_context,
+ .put_context = nfs_fh_put_context,
+};
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
new file mode 100644
index 000000000000..379be678cb7e
--- /dev/null
+++ b/fs/nfs/fscache.c
@@ -0,0 +1,523 @@
+/* NFS filesystem cache interface
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/in6.h>
+#include <linux/seq_file.h>
+
+#include "internal.h"
+#include "iostat.h"
+#include "fscache.h"
+
+#define NFSDBG_FACILITY NFSDBG_FSCACHE
+
+static struct rb_root nfs_fscache_keys = RB_ROOT;
+static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
+
+/*
+ * Get the per-client index cookie for an NFS client if the appropriate mount
+ * flag was set
+ * - We always try and get an index cookie for the client, but get filehandle
+ * cookies on a per-superblock basis, depending on the mount flags
+ */
+void nfs_fscache_get_client_cookie(struct nfs_client *clp)
+{
+ /* create a cache index for looking up filehandles */
+ clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index,
+ &nfs_fscache_server_index_def,
+ clp);
+ dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n",
+ clp, clp->fscache);
+}
+
+/*
+ * Dispose of a per-client cookie
+ */
+void nfs_fscache_release_client_cookie(struct nfs_client *clp)
+{
+ dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n",
+ clp, clp->fscache);
+
+ fscache_relinquish_cookie(clp->fscache, 0);
+ clp->fscache = NULL;
+}
+
+/*
+ * Get the cache cookie for an NFS superblock. We have to handle
+ * uniquification here because the cache doesn't do it for us.
+ */
+void nfs_fscache_get_super_cookie(struct super_block *sb,
+ struct nfs_parsed_mount_data *data)
+{
+ struct nfs_fscache_key *key, *xkey;
+ struct nfs_server *nfss = NFS_SB(sb);
+ struct rb_node **p, *parent;
+ const char *uniq = data->fscache_uniq ?: "";
+ int diff, ulen;
+
+ ulen = strlen(uniq);
+ key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL);
+ if (!key)
+ return;
+
+ key->nfs_client = nfss->nfs_client;
+ key->key.super.s_flags = sb->s_flags & NFS_MS_MASK;
+ key->key.nfs_server.flags = nfss->flags;
+ key->key.nfs_server.rsize = nfss->rsize;
+ key->key.nfs_server.wsize = nfss->wsize;
+ key->key.nfs_server.acregmin = nfss->acregmin;
+ key->key.nfs_server.acregmax = nfss->acregmax;
+ key->key.nfs_server.acdirmin = nfss->acdirmin;
+ key->key.nfs_server.acdirmax = nfss->acdirmax;
+ key->key.nfs_server.fsid = nfss->fsid;
+ key->key.rpc_auth.au_flavor = nfss->client->cl_auth->au_flavor;
+
+ key->key.uniq_len = ulen;
+ memcpy(key->key.uniquifier, uniq, ulen);
+
+ spin_lock(&nfs_fscache_keys_lock);
+ p = &nfs_fscache_keys.rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ xkey = rb_entry(parent, struct nfs_fscache_key, node);
+
+ if (key->nfs_client < xkey->nfs_client)
+ goto go_left;
+ if (key->nfs_client > xkey->nfs_client)
+ goto go_right;
+
+ diff = memcmp(&key->key, &xkey->key, sizeof(key->key));
+ if (diff < 0)
+ goto go_left;
+ if (diff > 0)
+ goto go_right;
+
+ if (key->key.uniq_len == 0)
+ goto non_unique;
+ diff = memcmp(key->key.uniquifier,
+ xkey->key.uniquifier,
+ key->key.uniq_len);
+ if (diff < 0)
+ goto go_left;
+ if (diff > 0)
+ goto go_right;
+ goto non_unique;
+
+ go_left:
+ p = &(*p)->rb_left;
+ continue;
+ go_right:
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&key->node, parent, p);
+ rb_insert_color(&key->node, &nfs_fscache_keys);
+ spin_unlock(&nfs_fscache_keys_lock);
+ nfss->fscache_key = key;
+
+ /* create a cache index for looking up filehandles */
+ nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache,
+ &nfs_fscache_super_index_def,
+ nfss);
+ dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n",
+ nfss, nfss->fscache);
+ return;
+
+non_unique:
+ spin_unlock(&nfs_fscache_keys_lock);
+ kfree(key);
+ nfss->fscache_key = NULL;
+ nfss->fscache = NULL;
+ printk(KERN_WARNING "NFS:"
+ " Cache request denied due to non-unique superblock keys\n");
+}
+
+/*
+ * release a per-superblock cookie
+ */
+void nfs_fscache_release_super_cookie(struct super_block *sb)
+{
+ struct nfs_server *nfss = NFS_SB(sb);
+
+ dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n",
+ nfss, nfss->fscache);
+
+ fscache_relinquish_cookie(nfss->fscache, 0);
+ nfss->fscache = NULL;
+
+ if (nfss->fscache_key) {
+ spin_lock(&nfs_fscache_keys_lock);
+ rb_erase(&nfss->fscache_key->node, &nfs_fscache_keys);
+ spin_unlock(&nfs_fscache_keys_lock);
+ kfree(nfss->fscache_key);
+ nfss->fscache_key = NULL;
+ }
+}
+
+/*
+ * Initialise the per-inode cache cookie pointer for an NFS inode.
+ */
+void nfs_fscache_init_inode_cookie(struct inode *inode)
+{
+ NFS_I(inode)->fscache = NULL;
+ if (S_ISREG(inode->i_mode))
+ set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
+}
+
+/*
+ * Get the per-inode cache cookie for an NFS inode.
+ */
+static void nfs_fscache_enable_inode_cookie(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ if (nfsi->fscache || !NFS_FSCACHE(inode))
+ return;
+
+ if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) {
+ nfsi->fscache = fscache_acquire_cookie(
+ NFS_SB(sb)->fscache,
+ &nfs_fscache_inode_object_def,
+ nfsi);
+
+ dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n",
+ sb, nfsi, nfsi->fscache);
+ }
+}
+
+/*
+ * Release a per-inode cookie.
+ */
+void nfs_fscache_release_inode_cookie(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n",
+ nfsi, nfsi->fscache);
+
+ fscache_relinquish_cookie(nfsi->fscache, 0);
+ nfsi->fscache = NULL;
+}
+
+/*
+ * Retire a per-inode cookie, destroying the data attached to it.
+ */
+void nfs_fscache_zap_inode_cookie(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n",
+ nfsi, nfsi->fscache);
+
+ fscache_relinquish_cookie(nfsi->fscache, 1);
+ nfsi->fscache = NULL;
+}
+
+/*
+ * Turn off the cache with regard to a per-inode cookie if opened for writing,
+ * invalidating all the pages in the page cache relating to the associated
+ * inode to clear the per-page caching.
+ */
+static void nfs_fscache_disable_inode_cookie(struct inode *inode)
+{
+ clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
+
+ if (NFS_I(inode)->fscache) {
+ dfprintk(FSCACHE,
+ "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode));
+
+ /* Need to invalidate any mapped pages that were read in before
+ * turning off the cache.
+ */
+ if (inode->i_mapping && inode->i_mapping->nrpages)
+ invalidate_inode_pages2(inode->i_mapping);
+
+ nfs_fscache_zap_inode_cookie(inode);
+ }
+}
+
+/*
+ * wait_on_bit() sleep function for uninterruptible waiting
+ */
+static int nfs_fscache_wait_bit(void *flags)
+{
+ schedule();
+ return 0;
+}
+
+/*
+ * Lock against someone else trying to also acquire or relinquish a cookie
+ */
+static inline void nfs_fscache_inode_lock(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags))
+ wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK,
+ nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * Unlock cookie management lock
+ */
+static inline void nfs_fscache_inode_unlock(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ smp_mb__before_clear_bit();
+ clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK);
+}
+
+/*
+ * Decide if we should enable or disable local caching for this inode.
+ * - For now, with NFS, only regular files that are open read-only will be able
+ * to use the cache.
+ * - May be invoked multiple times in parallel by parallel nfs_open() functions.
+ */
+void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
+{
+ if (NFS_FSCACHE(inode)) {
+ nfs_fscache_inode_lock(inode);
+ if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+ nfs_fscache_disable_inode_cookie(inode);
+ else
+ nfs_fscache_enable_inode_cookie(inode);
+ nfs_fscache_inode_unlock(inode);
+ }
+}
+
+/*
+ * Replace a per-inode cookie due to revalidation detecting a file having
+ * changed on the server.
+ */
+void nfs_fscache_reset_inode_cookie(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_server *nfss = NFS_SERVER(inode);
+ struct fscache_cookie *old = nfsi->fscache;
+
+ nfs_fscache_inode_lock(inode);
+ if (nfsi->fscache) {
+ /* retire the current fscache cache and get a new one */
+ fscache_relinquish_cookie(nfsi->fscache, 1);
+
+ nfsi->fscache = fscache_acquire_cookie(
+ nfss->nfs_client->fscache,
+ &nfs_fscache_inode_object_def,
+ nfsi);
+
+ dfprintk(FSCACHE,
+ "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n",
+ nfss, nfsi, old, nfsi->fscache);
+ }
+ nfs_fscache_inode_unlock(inode);
+}
+
+/*
+ * Release the caching state associated with a page, if the page isn't busy
+ * interacting with the cache.
+ * - Returns true (can release page) or false (page busy).
+ */
+int nfs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ struct nfs_inode *nfsi = NFS_I(page->mapping->host);
+ struct fscache_cookie *cookie = nfsi->fscache;
+
+ BUG_ON(!cookie);
+
+ if (fscache_check_page_write(cookie, page)) {
+ if (!(gfp & __GFP_WAIT))
+ return 0;
+ fscache_wait_on_page_write(cookie, page);
+ }
+
+ if (PageFsCache(page)) {
+ dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
+ cookie, page, nfsi);
+
+ fscache_uncache_page(cookie, page);
+ nfs_add_fscache_stats(page->mapping->host,
+ NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
+ }
+
+ return 1;
+}
+
+/*
+ * Release the caching state associated with a page if undergoing complete page
+ * invalidation.
+ */
+void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct fscache_cookie *cookie = nfsi->fscache;
+
+ BUG_ON(!cookie);
+
+ dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n",
+ cookie, page, nfsi);
+
+ fscache_wait_on_page_write(cookie, page);
+
+ BUG_ON(!PageLocked(page));
+ fscache_uncache_page(cookie, page);
+ nfs_add_fscache_stats(page->mapping->host,
+ NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
+}
+
+/*
+ * Handle completion of a page being read from the cache.
+ * - Called in process (keventd) context.
+ */
+static void nfs_readpage_from_fscache_complete(struct page *page,
+ void *context,
+ int error)
+{
+ dfprintk(FSCACHE,
+ "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n",
+ page, context, error);
+
+ /* if the read completes with an error, we just unlock the page and let
+ * the VM reissue the readpage */
+ if (!error) {
+ SetPageUptodate(page);
+ unlock_page(page);
+ } else {
+ error = nfs_readpage_async(context, page->mapping->host, page);
+ if (error)
+ unlock_page(page);
+ }
+}
+
+/*
+ * Retrieve a page from fscache
+ */
+int __nfs_readpage_from_fscache(struct nfs_open_context *ctx,
+ struct inode *inode, struct page *page)
+{
+ int ret;
+
+ dfprintk(FSCACHE,
+ "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n",
+ NFS_I(inode)->fscache, page, page->index, page->flags, inode);
+
+ ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache,
+ page,
+ nfs_readpage_from_fscache_complete,
+ ctx,
+ GFP_KERNEL);
+
+ switch (ret) {
+ case 0: /* read BIO submitted (page in fscache) */
+ dfprintk(FSCACHE,
+ "NFS: readpage_from_fscache: BIO submitted\n");
+ nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1);
+ return ret;
+
+ case -ENOBUFS: /* inode not in cache */
+ case -ENODATA: /* page not in cache */
+ nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
+ dfprintk(FSCACHE,
+ "NFS: readpage_from_fscache %d\n", ret);
+ return 1;
+
+ default:
+ dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret);
+ nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
+ }
+ return ret;
+}
+
+/*
+ * Retrieve a set of pages from fscache
+ */
+int __nfs_readpages_from_fscache(struct nfs_open_context *ctx,
+ struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ int ret, npages = *nr_pages;
+
+ dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
+ NFS_I(inode)->fscache, npages, inode);
+
+ ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache,
+ mapping, pages, nr_pages,
+ nfs_readpage_from_fscache_complete,
+ ctx,
+ mapping_gfp_mask(mapping));
+ if (*nr_pages < npages)
+ nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK,
+ npages);
+ if (*nr_pages > 0)
+ nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL,
+ *nr_pages);
+
+ switch (ret) {
+ case 0: /* read submitted to the cache for all pages */
+ BUG_ON(!list_empty(pages));
+ BUG_ON(*nr_pages != 0);
+ dfprintk(FSCACHE,
+ "NFS: nfs_getpages_from_fscache: submitted\n");
+
+ return ret;
+
+ case -ENOBUFS: /* some pages aren't cached and can't be */
+ case -ENODATA: /* some pages aren't cached */
+ dfprintk(FSCACHE,
+ "NFS: nfs_getpages_from_fscache: no page: %d\n", ret);
+ return 1;
+
+ default:
+ dfprintk(FSCACHE,
+ "NFS: nfs_getpages_from_fscache: ret %d\n", ret);
+ }
+
+ return ret;
+}
+
+/*
+ * Store a newly fetched page in fscache
+ * - PG_fscache must be set on the page
+ */
+void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
+{
+ int ret;
+
+ dfprintk(FSCACHE,
+ "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
+ NFS_I(inode)->fscache, page, page->index, page->flags, sync);
+
+ ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL);
+ dfprintk(FSCACHE,
+ "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n",
+ page, page->index, page->flags, ret);
+
+ if (ret != 0) {
+ fscache_uncache_page(NFS_I(inode)->fscache, page);
+ nfs_add_fscache_stats(inode,
+ NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1);
+ nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
+ } else {
+ nfs_add_fscache_stats(inode,
+ NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1);
+ }
+}
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
new file mode 100644
index 000000000000..6e809bb0ff08
--- /dev/null
+++ b/fs/nfs/fscache.h
@@ -0,0 +1,220 @@
+/* NFS filesystem cache interface definitions
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _NFS_FSCACHE_H
+#define _NFS_FSCACHE_H
+
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/fscache.h>
+
+#ifdef CONFIG_NFS_FSCACHE
+
+/*
+ * set of NFS FS-Cache objects that form a superblock key
+ */
+struct nfs_fscache_key {
+ struct rb_node node;
+ struct nfs_client *nfs_client; /* the server */
+
+ /* the elements of the unique key - as used by nfs_compare_super() and
+ * nfs_compare_mount_options() to distinguish superblocks */
+ struct {
+ struct {
+ unsigned long s_flags; /* various flags
+ * (& NFS_MS_MASK) */
+ } super;
+
+ struct {
+ struct nfs_fsid fsid;
+ int flags;
+ unsigned int rsize; /* read size */
+ unsigned int wsize; /* write size */
+ unsigned int acregmin; /* attr cache timeouts */
+ unsigned int acregmax;
+ unsigned int acdirmin;
+ unsigned int acdirmax;
+ } nfs_server;
+
+ struct {
+ rpc_authflavor_t au_flavor;
+ } rpc_auth;
+
+ /* uniquifier - can be used if nfs_server.flags includes
+ * NFS_MOUNT_UNSHARED */
+ u8 uniq_len;
+ char uniquifier[0];
+ } key;
+};
+
+/*
+ * fscache-index.c
+ */
+extern struct fscache_netfs nfs_fscache_netfs;
+extern const struct fscache_cookie_def nfs_fscache_server_index_def;
+extern const struct fscache_cookie_def nfs_fscache_super_index_def;
+extern const struct fscache_cookie_def nfs_fscache_inode_object_def;
+
+extern int nfs_fscache_register(void);
+extern void nfs_fscache_unregister(void);
+
+/*
+ * fscache.c
+ */
+extern void nfs_fscache_get_client_cookie(struct nfs_client *);
+extern void nfs_fscache_release_client_cookie(struct nfs_client *);
+
+extern void nfs_fscache_get_super_cookie(struct super_block *,
+ struct nfs_parsed_mount_data *);
+extern void nfs_fscache_release_super_cookie(struct super_block *);
+
+extern void nfs_fscache_init_inode_cookie(struct inode *);
+extern void nfs_fscache_release_inode_cookie(struct inode *);
+extern void nfs_fscache_zap_inode_cookie(struct inode *);
+extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *);
+extern void nfs_fscache_reset_inode_cookie(struct inode *);
+
+extern void __nfs_fscache_invalidate_page(struct page *, struct inode *);
+extern int nfs_fscache_release_page(struct page *, gfp_t);
+
+extern int __nfs_readpage_from_fscache(struct nfs_open_context *,
+ struct inode *, struct page *);
+extern int __nfs_readpages_from_fscache(struct nfs_open_context *,
+ struct inode *, struct address_space *,
+ struct list_head *, unsigned *);
+extern void __nfs_readpage_to_fscache(struct inode *, struct page *, int);
+
+/*
+ * wait for a page to complete writing to the cache
+ */
+static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi,
+ struct page *page)
+{
+ if (PageFsCache(page))
+ fscache_wait_on_page_write(nfsi->fscache, page);
+}
+
+/*
+ * release the caching state associated with a page if undergoing complete page
+ * invalidation
+ */
+static inline void nfs_fscache_invalidate_page(struct page *page,
+ struct inode *inode)
+{
+ if (PageFsCache(page))
+ __nfs_fscache_invalidate_page(page, inode);
+}
+
+/*
+ * Retrieve a page from an inode data storage object.
+ */
+static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx,
+ struct inode *inode,
+ struct page *page)
+{
+ if (NFS_I(inode)->fscache)
+ return __nfs_readpage_from_fscache(ctx, inode, page);
+ return -ENOBUFS;
+}
+
+/*
+ * Retrieve a set of pages from an inode data storage object.
+ */
+static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,
+ struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ if (NFS_I(inode)->fscache)
+ return __nfs_readpages_from_fscache(ctx, inode, mapping, pages,
+ nr_pages);
+ return -ENOBUFS;
+}
+
+/*
+ * Store a page newly fetched from the server in an inode data storage object
+ * in the cache.
+ */
+static inline void nfs_readpage_to_fscache(struct inode *inode,
+ struct page *page,
+ int sync)
+{
+ if (PageFsCache(page))
+ __nfs_readpage_to_fscache(inode, page, sync);
+}
+
+/*
+ * indicate the client caching state as readable text
+ */
+static inline const char *nfs_server_fscache_state(struct nfs_server *server)
+{
+ if (server->fscache && (server->options & NFS_OPTION_FSCACHE))
+ return "yes";
+ return "no ";
+}
+
+
+#else /* CONFIG_NFS_FSCACHE */
+static inline int nfs_fscache_register(void) { return 0; }
+static inline void nfs_fscache_unregister(void) {}
+
+static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
+static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
+
+static inline void nfs_fscache_get_super_cookie(
+ struct super_block *sb,
+ struct nfs_parsed_mount_data *data)
+{
+}
+static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
+
+static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
+static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {}
+static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {}
+static inline void nfs_fscache_set_inode_cookie(struct inode *inode,
+ struct file *filp) {}
+static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {}
+
+static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ return 1; /* True: may release page */
+}
+static inline void nfs_fscache_invalidate_page(struct page *page,
+ struct inode *inode) {}
+static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi,
+ struct page *page) {}
+
+static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx,
+ struct inode *inode,
+ struct page *page)
+{
+ return -ENOBUFS;
+}
+static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,
+ struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ return -ENOBUFS;
+}
+static inline void nfs_readpage_to_fscache(struct inode *inode,
+ struct page *page, int sync) {}
+
+static inline const char *nfs_server_fscache_state(struct nfs_server *server)
+{
+ return "no ";
+}
+
+#endif /* CONFIG_NFS_FSCACHE */
+#endif /* _NFS_FSCACHE_H */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a834d1d850b7..64f87194d390 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -46,6 +46,7 @@
#include "delegation.h"
#include "iostat.h"
#include "internal.h"
+#include "fscache.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -121,6 +122,7 @@ void nfs_clear_inode(struct inode *inode)
BUG_ON(!list_empty(&NFS_I(inode)->open_files));
nfs_zap_acl_cache(inode);
nfs_access_zap_cache(inode);
+ nfs_fscache_release_inode_cookie(inode);
}
/**
@@ -355,6 +357,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
nfsi->attrtimeo_timestamp = now;
nfsi->access_cache = RB_ROOT;
+ nfs_fscache_init_inode_cookie(inode);
+
unlock_new_inode(inode);
} else
nfs_refresh_inode(inode, fattr);
@@ -686,6 +690,7 @@ int nfs_open(struct inode *inode, struct file *filp)
ctx->mode = filp->f_mode;
nfs_file_set_open_context(filp, ctx);
put_nfs_open_context(ctx);
+ nfs_fscache_set_inode_cookie(inode, filp);
return 0;
}
@@ -786,6 +791,7 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
spin_unlock(&inode->i_lock);
nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
+ nfs_fscache_reset_inode_cookie(inode);
dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
inode->i_sb->s_id, (long long)NFS_FILEID(inode));
return 0;
@@ -1030,6 +1036,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
spin_lock(&inode->i_lock);
status = nfs_refresh_inode_locked(inode, fattr);
spin_unlock(&inode->i_lock);
+
return status;
}
@@ -1436,6 +1443,10 @@ static int __init init_nfs_fs(void)
{
int err;
+ err = nfs_fscache_register();
+ if (err < 0)
+ goto out7;
+
err = nfsiod_start();
if (err)
goto out6;
@@ -1488,6 +1499,8 @@ out4:
out5:
nfsiod_stop();
out6:
+ nfs_fscache_unregister();
+out7:
return err;
}
@@ -1498,6 +1511,7 @@ static void __exit exit_nfs_fs(void)
nfs_destroy_readpagecache();
nfs_destroy_inodecache();
nfs_destroy_nfspagecache();
+ nfs_fscache_unregister();
#ifdef CONFIG_PROC_FS
rpc_proc_unregister("nfs");
#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2041f68ff1cc..e4d6a8348adf 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -5,6 +5,8 @@
#include <linux/mount.h>
#include <linux/security.h>
+#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
+
struct nfs_string;
/* Maximum number of readahead requests
@@ -37,10 +39,12 @@ struct nfs_parsed_mount_data {
int acregmin, acregmax,
acdirmin, acdirmax;
int namlen;
+ unsigned int options;
unsigned int bsize;
unsigned int auth_flavor_len;
rpc_authflavor_t auth_flavors[1];
char *client_address;
+ char *fscache_uniq;
struct {
struct sockaddr_storage address;
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index a36952810032..a2ab2529b5ca 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -16,6 +16,9 @@
struct nfs_iostats {
unsigned long long bytes[__NFSIOS_BYTESMAX];
+#ifdef CONFIG_NFS_FSCACHE
+ unsigned long long fscache[__NFSIOS_FSCACHEMAX];
+#endif
unsigned long events[__NFSIOS_COUNTSMAX];
} ____cacheline_aligned;
@@ -57,6 +60,21 @@ static inline void nfs_add_stats(const struct inode *inode,
nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
}
+#ifdef CONFIG_NFS_FSCACHE
+static inline void nfs_add_fscache_stats(struct inode *inode,
+ enum nfs_stat_fscachecounters stat,
+ unsigned long addend)
+{
+ struct nfs_iostats *iostats;
+ int cpu;
+
+ cpu = get_cpu();
+ iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
+ iostats->fscache[stat] += addend;
+ put_cpu_no_resched();
+}
+#endif
+
static inline struct nfs_iostats *nfs_alloc_iostats(void)
{
return alloc_percpu(struct nfs_iostats);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f856004bb7fa..4ace3c50a8eb 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -24,6 +24,7 @@
#include "internal.h"
#include "iostat.h"
+#include "fscache.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -111,8 +112,8 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
}
}
-static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
- struct page *page)
+int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
+ struct page *page)
{
LIST_HEAD(one_request);
struct nfs_page *new;
@@ -139,6 +140,11 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
static void nfs_readpage_release(struct nfs_page *req)
{
+ struct inode *d_inode = req->wb_context->path.dentry->d_inode;
+
+ if (PageUptodate(req->wb_page))
+ nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+
unlock_page(req->wb_page);
dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
@@ -510,8 +516,15 @@ int nfs_readpage(struct file *file, struct page *page)
} else
ctx = get_nfs_open_context(nfs_file_open_context(file));
+ if (!IS_SYNC(inode)) {
+ error = nfs_readpage_from_fscache(ctx, inode, page);
+ if (error == 0)
+ goto out;
+ }
+
error = nfs_readpage_async(ctx, inode, page);
+out:
put_nfs_open_context(ctx);
return error;
out_unlock:
@@ -584,6 +597,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
return -EBADF;
} else
desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
+
+ /* attempt to read as many of the pages as possible from the cache
+ * - this returns -ENOBUFS immediately if the cookie is negative
+ */
+ ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping,
+ pages, &nr_pages);
+ if (ret == 0)
+ goto read_complete; /* all pages were read */
+
if (rsize < PAGE_CACHE_SIZE)
nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
else
@@ -594,6 +616,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
nfs_pageio_complete(&pgio);
npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
nfs_add_stats(inode, NFSIOS_READPAGES, npages);
+read_complete:
put_nfs_open_context(desc.ctx);
out:
return ret;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0942fcbbad3c..82eaadbff408 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -60,6 +60,7 @@
#include "delegation.h"
#include "iostat.h"
#include "internal.h"
+#include "fscache.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -76,6 +77,7 @@ enum {
Opt_rdirplus, Opt_nordirplus,
Opt_sharecache, Opt_nosharecache,
Opt_resvport, Opt_noresvport,
+ Opt_fscache, Opt_nofscache,
/* Mount options that take integer arguments */
Opt_port,
@@ -93,6 +95,7 @@ enum {
Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
Opt_addr, Opt_mountaddr, Opt_clientaddr,
Opt_lookupcache,
+ Opt_fscache_uniq,
/* Special mount options */
Opt_userspace, Opt_deprecated, Opt_sloppy,
@@ -132,6 +135,9 @@ static const match_table_t nfs_mount_option_tokens = {
{ Opt_nosharecache, "nosharecache" },
{ Opt_resvport, "resvport" },
{ Opt_noresvport, "noresvport" },
+ { Opt_fscache, "fsc" },
+ { Opt_fscache_uniq, "fsc=%s" },
+ { Opt_nofscache, "nofsc" },
{ Opt_port, "port=%u" },
{ Opt_rsize, "rsize=%u" },
@@ -563,6 +569,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
if (clp->rpc_ops->version == 4)
seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
#endif
+ if (nfss->options & NFS_OPTION_FSCACHE)
+ seq_printf(m, ",fsc");
}
/*
@@ -641,6 +649,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
totals.events[i] += stats->events[i];
for (i = 0; i < __NFSIOS_BYTESMAX; i++)
totals.bytes[i] += stats->bytes[i];
+#ifdef CONFIG_NFS_FSCACHE
+ for (i = 0; i < __NFSIOS_FSCACHEMAX; i++)
+ totals.fscache[i] += stats->fscache[i];
+#endif
preempt_enable();
}
@@ -651,6 +663,13 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
seq_printf(m, "\n\tbytes:\t");
for (i = 0; i < __NFSIOS_BYTESMAX; i++)
seq_printf(m, "%Lu ", totals.bytes[i]);
+#ifdef CONFIG_NFS_FSCACHE
+ if (nfss->options & NFS_OPTION_FSCACHE) {
+ seq_printf(m, "\n\tfsc:\t");
+ for (i = 0; i < __NFSIOS_FSCACHEMAX; i++)
+ seq_printf(m, "%Lu ", totals.bytes[i]);
+ }
+#endif
seq_printf(m, "\n");
rpc_print_iostats(m, nfss->client);
@@ -1044,6 +1063,24 @@ static int nfs_parse_mount_options(char *raw,
case Opt_noresvport:
mnt->flags |= NFS_MOUNT_NORESVPORT;
break;
+ case Opt_fscache:
+ mnt->options |= NFS_OPTION_FSCACHE;
+ kfree(mnt->fscache_uniq);
+ mnt->fscache_uniq = NULL;
+ break;
+ case Opt_nofscache:
+ mnt->options &= ~NFS_OPTION_FSCACHE;
+ kfree(mnt->fscache_uniq);
+ mnt->fscache_uniq = NULL;
+ break;
+ case Opt_fscache_uniq:
+ string = match_strdup(args);
+ if (!string)
+ goto out_nomem;
+ kfree(mnt->fscache_uniq);
+ mnt->fscache_uniq = string;
+ mnt->options |= NFS_OPTION_FSCACHE;
+ break;
/*
* options that take numeric values
@@ -1870,8 +1907,6 @@ static void nfs_clone_super(struct super_block *sb,
nfs_initialise_sb(sb);
}
-#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
-
static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
{
const struct nfs_server *a = s->s_fs_info;
@@ -2036,6 +2071,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
if (!s->s_root) {
/* initial superblock/root creation */
nfs_fill_super(s, data);
+ nfs_fscache_get_super_cookie(s, data);
}
mntroot = nfs_get_root(s, mntfh);
@@ -2056,6 +2092,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
out:
kfree(data->nfs_server.hostname);
kfree(data->mount_server.hostname);
+ kfree(data->fscache_uniq);
security_free_mnt_opts(&data->lsm_opts);
out_free_fh:
kfree(mntfh);
@@ -2083,6 +2120,7 @@ static void nfs_kill_super(struct super_block *s)
bdi_unregister(&server->backing_dev_info);
kill_anon_super(s);
+ nfs_fscache_release_super_cookie(s);
nfs_free_server(server);
}
@@ -2390,6 +2428,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
if (!s->s_root) {
/* initial superblock/root creation */
nfs4_fill_super(s);
+ nfs_fscache_get_super_cookie(s, data);
}
mntroot = nfs4_get_root(s, mntfh);
@@ -2411,6 +2450,7 @@ out:
kfree(data->client_address);
kfree(data->nfs_server.export_path);
kfree(data->nfs_server.hostname);
+ kfree(data->fscache_uniq);
security_free_mnt_opts(&data->lsm_opts);
out_free_fh:
kfree(mntfh);
@@ -2437,6 +2477,7 @@ static void nfs4_kill_super(struct super_block *sb)
kill_anon_super(sb);
nfs4_renewd_prepare_shutdown(server);
+ nfs_fscache_release_super_cookie(sb);
nfs_free_server(server);
}
diff --git a/fs/splice.c b/fs/splice.c
index 4ed0ba44a966..dd727d43e5b7 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -59,7 +59,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
*/
wait_on_page_writeback(page);
- if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
+ if (page_has_private(page) &&
+ !try_to_release_page(page, GFP_KERNEL))
goto out_unlock;
/*
diff --git a/fs/super.c b/fs/super.c
index 2ba481518ba7..77cb4ec919b9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -287,6 +287,7 @@ int fsync_super(struct super_block *sb)
__fsync_super(sb);
return sync_blockdev(sb->s_bdev);
}
+EXPORT_SYMBOL_GPL(fsync_super);
/**
* generic_shutdown_super - common helper for ->kill_sb()