summaryrefslogtreecommitdiff
path: root/Documentation/filesystems
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/f2fs.rst19
-rw-r--r--Documentation/filesystems/fsverity.rst76
-rw-r--r--Documentation/filesystems/locking.rst7
-rw-r--r--Documentation/filesystems/overlayfs.rst8
-rw-r--r--Documentation/filesystems/porting.rst18
-rw-r--r--Documentation/filesystems/seq_file.rst6
-rw-r--r--Documentation/filesystems/vfs.rst24
7 files changed, 143 insertions, 15 deletions
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index dae15c96e659..35ed01a5fbc9 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -179,7 +179,6 @@ fault_type=%d Support configuring fault injection type, should be
FAULT_KVMALLOC 0x000000002
FAULT_PAGE_ALLOC 0x000000004
FAULT_PAGE_GET 0x000000008
- FAULT_ALLOC_BIO 0x000000010
FAULT_ALLOC_NID 0x000000020
FAULT_ORPHAN 0x000000040
FAULT_BLOCK 0x000000080
@@ -247,8 +246,24 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enabl
hide up to all remaining free space. The actual space that
would be unusable can be viewed at /sys/fs/f2fs/<disk>/unusable
This space is reclaimed once checkpoint=enable.
+checkpoint_merge When checkpoint is enabled, this can be used to create a kernel
+ daemon and make it to merge concurrent checkpoint requests as
+ much as possible to eliminate redundant checkpoint issues. Plus,
+ we can eliminate the sluggish issue caused by slow checkpoint
+ operation when the checkpoint is done in a process context in
+ a cgroup having low i/o budget and cpu shares. To make this
+ do better, we set the default i/o priority of the kernel daemon
+ to "3", to give one higher priority than other kernel threads.
+ This is the same way to give a I/O priority to the jbd2
+ journaling thread of ext4 filesystem.
+nocheckpoint_merge Disable checkpoint merge feature.
compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo",
"lz4", "zstd" and "lzo-rle" algorithm.
+compress_algorithm=%s:%d Control compress algorithm and its compress level, now, only
+ "lz4" and "zstd" support compress level config.
+ algorithm level range
+ lz4 3 - 16
+ zstd 1 - 22
compress_log_size=%u Support configuring compress cluster size, the size will
be 4KB * (1 << %u), 16KB is minimum size, also it's
default size.
@@ -831,7 +846,7 @@ This is the default option. f2fs does automatic compression in the writeback of
compression enabled files.
2) compress_mode=user
-This disables the automaic compression and gives the user discretion of choosing the
+This disables the automatic compression and gives the user discretion of choosing the
target file and the timing. The user can do manual compression/decompression on the
compression enabled files using F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE
ioctls like the below.
diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst
index e0204a23e997..1d831e3cbcb3 100644
--- a/Documentation/filesystems/fsverity.rst
+++ b/Documentation/filesystems/fsverity.rst
@@ -217,6 +217,82 @@ FS_IOC_MEASURE_VERITY can fail with the following errors:
- ``EOVERFLOW``: the digest is longer than the specified
``digest_size`` bytes. Try providing a larger buffer.
+FS_IOC_READ_VERITY_METADATA
+---------------------------
+
+The FS_IOC_READ_VERITY_METADATA ioctl reads verity metadata from a
+verity file. This ioctl is available since Linux v5.12.
+
+This ioctl allows writing a server program that takes a verity file
+and serves it to a client program, such that the client can do its own
+fs-verity compatible verification of the file. This only makes sense
+if the client doesn't trust the server and if the server needs to
+provide the storage for the client.
+
+This is a fairly specialized use case, and most fs-verity users won't
+need this ioctl.
+
+This ioctl takes in a pointer to the following structure::
+
+ #define FS_VERITY_METADATA_TYPE_MERKLE_TREE 1
+ #define FS_VERITY_METADATA_TYPE_DESCRIPTOR 2
+ #define FS_VERITY_METADATA_TYPE_SIGNATURE 3
+
+ struct fsverity_read_metadata_arg {
+ __u64 metadata_type;
+ __u64 offset;
+ __u64 length;
+ __u64 buf_ptr;
+ __u64 __reserved;
+ };
+
+``metadata_type`` specifies the type of metadata to read:
+
+- ``FS_VERITY_METADATA_TYPE_MERKLE_TREE`` reads the blocks of the
+ Merkle tree. The blocks are returned in order from the root level
+ to the leaf level. Within each level, the blocks are returned in
+ the same order that their hashes are themselves hashed.
+ See `Merkle tree`_ for more information.
+
+- ``FS_VERITY_METADATA_TYPE_DESCRIPTOR`` reads the fs-verity
+ descriptor. See `fs-verity descriptor`_.
+
+- ``FS_VERITY_METADATA_TYPE_SIGNATURE`` reads the signature which was
+ passed to FS_IOC_ENABLE_VERITY, if any. See `Built-in signature
+ verification`_.
+
+The semantics are similar to those of ``pread()``. ``offset``
+specifies the offset in bytes into the metadata item to read from, and
+``length`` specifies the maximum number of bytes to read from the
+metadata item. ``buf_ptr`` is the pointer to the buffer to read into,
+cast to a 64-bit integer. ``__reserved`` must be 0. On success, the
+number of bytes read is returned. 0 is returned at the end of the
+metadata item. The returned length may be less than ``length``, for
+example if the ioctl is interrupted.
+
+The metadata returned by FS_IOC_READ_VERITY_METADATA isn't guaranteed
+to be authenticated against the file digest that would be returned by
+`FS_IOC_MEASURE_VERITY`_, as the metadata is expected to be used to
+implement fs-verity compatible verification anyway (though absent a
+malicious disk, the metadata will indeed match). E.g. to implement
+this ioctl, the filesystem is allowed to just read the Merkle tree
+blocks from disk without actually verifying the path to the root node.
+
+FS_IOC_READ_VERITY_METADATA can fail with the following errors:
+
+- ``EFAULT``: the caller provided inaccessible memory
+- ``EINTR``: the ioctl was interrupted before any data was read
+- ``EINVAL``: reserved fields were set, or ``offset + length``
+ overflowed
+- ``ENODATA``: the file is not a verity file, or
+ FS_VERITY_METADATA_TYPE_SIGNATURE was requested but the file doesn't
+ have a built-in signature
+- ``ENOTTY``: this type of filesystem does not implement fs-verity, or
+ this ioctl is not yet implemented on it
+- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
+ support, or the filesystem superblock has not had the 'verity'
+ feature enabled on it. (See `Filesystem support`_.)
+
FS_IOC_GETFLAGS
---------------
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index c0f2c7586531..b7dcc86c92a4 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -126,9 +126,10 @@ prototypes::
int (*get)(const struct xattr_handler *handler, struct dentry *dentry,
struct inode *inode, const char *name, void *buffer,
size_t size);
- int (*set)(const struct xattr_handler *handler, struct dentry *dentry,
- struct inode *inode, const char *name, const void *buffer,
- size_t size, int flags);
+ int (*set)(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct inode *inode, const char *name,
+ const void *buffer, size_t size, int flags);
locking rules:
all may block
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index 587a93973929..78240e29b0bb 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -586,6 +586,14 @@ without significant effort.
The advantage of mounting with the "volatile" option is that all forms of
sync calls to the upper filesystem are omitted.
+In order to avoid a giving a false sense of safety, the syncfs (and fsync)
+semantics of volatile mounts are slightly different than that of the rest of
+VFS. If any writeback error occurs on the upperdir's filesystem after a
+volatile mount takes place, all sync functions will return an error. Once this
+condition is reached, the filesystem will not recover, and every subsequent sync
+call will return an error, even if the upperdir has not experience a new error
+since the last sync call.
+
When overlay is mounted with "volatile" option, the directory
"$workdir/work/incompat/volatile" is created. During next mount, overlay
checks for this directory and refuses to mount if present. This is a strong
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 867036aa90b8..633610253f45 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -717,6 +717,8 @@ be removed. Switch while you still can; the old one won't stay.
**mandatory**
->setxattr() and xattr_handler.set() get dentry and inode passed separately.
+The xattr_handler.set() gets passed the user namespace of the mount the inode
+is seen from so filesystems can idmap the i_uid and i_gid accordingly.
dentry might be yet to be attached to inode, so do _not_ use its ->d_inode
in the instances. Rationale: !@#!@# security_d_instantiate() needs to be
called before we attach dentry to inode and !@#!@##!@$!$#!@#$!@$!@$ smack
@@ -865,3 +867,19 @@ no matter what. Everything is handled by the caller.
clone_private_mount() returns a longterm mount now, so the proper destructor of
its result is kern_unmount() or kern_unmount_array().
+
+---
+
+**mandatory**
+
+zero-length bvec segments are disallowed, they must be filtered out before
+passed on to an iterator.
+
+---
+
+**mandatory**
+
+For bvec based itererators bio_iov_iter_get_pages() now doesn't copy bvecs but
+uses the one provided. Anyone issuing kiocb-I/O should ensure that the bvec and
+page references stay until I/O has completed, i.e. until ->ki_complete() has
+been called or returned with non -EIOCBQUEUED code.
diff --git a/Documentation/filesystems/seq_file.rst b/Documentation/filesystems/seq_file.rst
index 56856481dc8d..a6726082a7c2 100644
--- a/Documentation/filesystems/seq_file.rst
+++ b/Documentation/filesystems/seq_file.rst
@@ -217,6 +217,12 @@ between the calls to start() and stop(), so holding a lock during that time
is a reasonable thing to do. The seq_file code will also avoid taking any
other locks while the iterator is active.
+The iterater value returned by start() or next() is guaranteed to be
+passed to a subsequent next() or stop() call. This allows resources
+such as locks that were taken to be reliably released. There is *no*
+guarantee that the iterator will be passed to show(), though in practice
+it often will be.
+
Formatted output
================
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 18d69a4559d6..2049bbf5e388 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -270,7 +270,10 @@ or bottom half).
->alloc_inode.
``dirty_inode``
- this method is called by the VFS to mark an inode dirty.
+ this method is called by the VFS when an inode is marked dirty.
+ This is specifically for the inode itself being marked dirty,
+ not its data. If the update needs to be persisted by fdatasync(),
+ then I_DIRTY_DATASYNC will be set in the flags argument.
``write_inode``
this method is called when the VFS needs to write an inode to
@@ -415,28 +418,29 @@ As of kernel 2.6.22, the following members are defined:
.. code-block:: c
struct inode_operations {
- int (*create) (struct inode *,struct dentry *, umode_t, bool);
+ int (*create) (struct user_namespace *, struct inode *,struct dentry *, umode_t, bool);
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*unlink) (struct inode *,struct dentry *);
- int (*symlink) (struct inode *,struct dentry *,const char *);
- int (*mkdir) (struct inode *,struct dentry *,umode_t);
+ int (*symlink) (struct user_namespace *, struct inode *,struct dentry *,const char *);
+ int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *,umode_t);
int (*rmdir) (struct inode *,struct dentry *);
- int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
- int (*rename) (struct inode *, struct dentry *,
+ int (*mknod) (struct user_namespace *, struct inode *,struct dentry *,umode_t,dev_t);
+ int (*rename) (struct user_namespace *, struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
const char *(*get_link) (struct dentry *, struct inode *,
struct delayed_call *);
- int (*permission) (struct inode *, int);
+ int (*permission) (struct user_namespace *, struct inode *, int);
int (*get_acl)(struct inode *, int);
- int (*setattr) (struct dentry *, struct iattr *);
- int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
+ int (*setattr) (struct user_namespace *, struct dentry *, struct iattr *);
+ int (*getattr) (struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
void (*update_time)(struct inode *, struct timespec *, int);
int (*atomic_open)(struct inode *, struct dentry *, struct file *,
unsigned open_flag, umode_t create_mode);
- int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+ int (*tmpfile) (struct user_namespace *, struct inode *, struct dentry *, umode_t);
+ int (*set_acl)(struct user_namespace *, struct inode *, struct posix_acl *, int);
};
Again, all methods are called without any locks being held, unless