summaryrefslogtreecommitdiff
path: root/security/landlock/fs.c
diff options
context:
space:
mode:
Diffstat (limited to 'security/landlock/fs.c')
-rw-r--r--security/landlock/fs.c815
1 files changed, 663 insertions, 152 deletions
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 97b8e421f617..ec5a6247cd3e 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -4,6 +4,7 @@
*
* Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
* Copyright © 2018-2020 ANSSI
+ * Copyright © 2021-2022 Microsoft Corporation
*/
#include <linux/atomic.h>
@@ -141,23 +142,26 @@ retry:
}
/* All access rights that can be tied to files. */
+/* clang-format off */
#define ACCESS_FILE ( \
LANDLOCK_ACCESS_FS_EXECUTE | \
LANDLOCK_ACCESS_FS_WRITE_FILE | \
LANDLOCK_ACCESS_FS_READ_FILE)
+/* clang-format on */
/*
* @path: Should have been checked by get_path_from_fd().
*/
int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
- const struct path *const path, u32 access_rights)
+ const struct path *const path,
+ access_mask_t access_rights)
{
int err;
struct landlock_object *object;
/* Files only get access rights that make sense. */
- if (!d_is_dir(path->dentry) && (access_rights | ACCESS_FILE) !=
- ACCESS_FILE)
+ if (!d_is_dir(path->dentry) &&
+ (access_rights | ACCESS_FILE) != ACCESS_FILE)
return -EINVAL;
if (WARN_ON_ONCE(ruleset->num_layers != 1))
return -EINVAL;
@@ -180,84 +184,352 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
/* Access-control management */
-static inline u64 unmask_layers(
- const struct landlock_ruleset *const domain,
- const struct path *const path, const u32 access_request,
- u64 layer_mask)
+/*
+ * The lifetime of the returned rule is tied to @domain.
+ *
+ * Returns NULL if no rule is found or if @dentry is negative.
+ */
+static inline const struct landlock_rule *
+find_rule(const struct landlock_ruleset *const domain,
+ const struct dentry *const dentry)
{
const struct landlock_rule *rule;
const struct inode *inode;
- size_t i;
- if (d_is_negative(path->dentry))
- /* Ignore nonexistent leafs. */
- return layer_mask;
- inode = d_backing_inode(path->dentry);
+ /* Ignores nonexistent leafs. */
+ if (d_is_negative(dentry))
+ return NULL;
+
+ inode = d_backing_inode(dentry);
rcu_read_lock();
- rule = landlock_find_rule(domain,
- rcu_dereference(landlock_inode(inode)->object));
+ rule = landlock_find_rule(
+ domain, rcu_dereference(landlock_inode(inode)->object));
rcu_read_unlock();
+ return rule;
+}
+
+/*
+ * @layer_masks is read and may be updated according to the access request and
+ * the matching rule.
+ *
+ * Returns true if the request is allowed (i.e. relevant layer masks for the
+ * request are empty).
+ */
+static inline bool
+unmask_layers(const struct landlock_rule *const rule,
+ const access_mask_t access_request,
+ layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+{
+ size_t layer_level;
+
+ if (!access_request || !layer_masks)
+ return true;
if (!rule)
- return layer_mask;
+ return false;
/*
* An access is granted if, for each policy layer, at least one rule
- * encountered on the pathwalk grants the requested accesses,
- * regardless of their position in the layer stack. We must then check
+ * encountered on the pathwalk grants the requested access,
+ * regardless of its position in the layer stack. We must then check
* the remaining layers for each inode, from the first added layer to
- * the last one.
+ * the last one. When there is multiple requested accesses, for each
+ * policy layer, the full set of requested accesses may not be granted
+ * by only one rule, but by the union (binary OR) of multiple rules.
+ * E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
*/
- for (i = 0; i < rule->num_layers; i++) {
- const struct landlock_layer *const layer = &rule->layers[i];
- const u64 layer_level = BIT_ULL(layer->level - 1);
+ for (layer_level = 0; layer_level < rule->num_layers; layer_level++) {
+ const struct landlock_layer *const layer =
+ &rule->layers[layer_level];
+ const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
+ const unsigned long access_req = access_request;
+ unsigned long access_bit;
+ bool is_empty;
- /* Checks that the layer grants access to the full request. */
- if ((layer->access & access_request) == access_request) {
- layer_mask &= ~layer_level;
-
- if (layer_mask == 0)
- return layer_mask;
+ /*
+ * Records in @layer_masks which layer grants access to each
+ * requested access.
+ */
+ is_empty = true;
+ for_each_set_bit(access_bit, &access_req,
+ ARRAY_SIZE(*layer_masks)) {
+ if (layer->access & BIT_ULL(access_bit))
+ (*layer_masks)[access_bit] &= ~layer_bit;
+ is_empty = is_empty && !(*layer_masks)[access_bit];
}
+ if (is_empty)
+ return true;
}
- return layer_mask;
+ return false;
}
-static int check_access_path(const struct landlock_ruleset *const domain,
- const struct path *const path, u32 access_request)
+/*
+ * Allows access to pseudo filesystems that will never be mountable (e.g.
+ * sockfs, pipefs), but can still be reachable through
+ * /proc/<pid>/fd/<file-descriptor>
+ */
+static inline bool is_nouser_or_private(const struct dentry *dentry)
{
- bool allowed = false;
- struct path walker_path;
- u64 layer_mask;
- size_t i;
+ return (dentry->d_sb->s_flags & SB_NOUSER) ||
+ (d_is_positive(dentry) &&
+ unlikely(IS_PRIVATE(d_backing_inode(dentry))));
+}
- /* Make sure all layers can be checked. */
- BUILD_BUG_ON(BITS_PER_TYPE(layer_mask) < LANDLOCK_MAX_NUM_LAYERS);
+static inline access_mask_t
+get_handled_accesses(const struct landlock_ruleset *const domain)
+{
+ access_mask_t access_dom = 0;
+ unsigned long access_bit;
+
+ for (access_bit = 0; access_bit < LANDLOCK_NUM_ACCESS_FS;
+ access_bit++) {
+ size_t layer_level;
+
+ for (layer_level = 0; layer_level < domain->num_layers;
+ layer_level++) {
+ if (domain->fs_access_masks[layer_level] &
+ BIT_ULL(access_bit)) {
+ access_dom |= BIT_ULL(access_bit);
+ break;
+ }
+ }
+ }
+ return access_dom;
+}
+
+static inline access_mask_t
+init_layer_masks(const struct landlock_ruleset *const domain,
+ const access_mask_t access_request,
+ layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+{
+ access_mask_t handled_accesses = 0;
+ size_t layer_level;
+ memset(layer_masks, 0, sizeof(*layer_masks));
+ /* An empty access request can happen because of O_WRONLY | O_RDWR. */
if (!access_request)
return 0;
+
+ /* Saves all handled accesses per layer. */
+ for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
+ const unsigned long access_req = access_request;
+ unsigned long access_bit;
+
+ for_each_set_bit(access_bit, &access_req,
+ ARRAY_SIZE(*layer_masks)) {
+ if (domain->fs_access_masks[layer_level] &
+ BIT_ULL(access_bit)) {
+ (*layer_masks)[access_bit] |=
+ BIT_ULL(layer_level);
+ handled_accesses |= BIT_ULL(access_bit);
+ }
+ }
+ }
+ return handled_accesses;
+}
+
+/*
+ * Check that a destination file hierarchy has more restrictions than a source
+ * file hierarchy. This is only used for link and rename actions.
+ *
+ * @layer_masks_child2: Optional child masks.
+ */
+static inline bool no_more_access(
+ const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
+ const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS],
+ const bool child1_is_directory,
+ const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
+ const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS],
+ const bool child2_is_directory)
+{
+ unsigned long access_bit;
+
+ for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2);
+ access_bit++) {
+ /* Ignores accesses that only make sense for directories. */
+ const bool is_file_access =
+ !!(BIT_ULL(access_bit) & ACCESS_FILE);
+
+ if (child1_is_directory || is_file_access) {
+ /*
+ * Checks if the destination restrictions are a
+ * superset of the source ones (i.e. inherited access
+ * rights without child exceptions):
+ * restrictions(parent2) >= restrictions(child1)
+ */
+ if ((((*layer_masks_parent1)[access_bit] &
+ (*layer_masks_child1)[access_bit]) |
+ (*layer_masks_parent2)[access_bit]) !=
+ (*layer_masks_parent2)[access_bit])
+ return false;
+ }
+
+ if (!layer_masks_child2)
+ continue;
+ if (child2_is_directory || is_file_access) {
+ /*
+ * Checks inverted restrictions for RENAME_EXCHANGE:
+ * restrictions(parent1) >= restrictions(child2)
+ */
+ if ((((*layer_masks_parent2)[access_bit] &
+ (*layer_masks_child2)[access_bit]) |
+ (*layer_masks_parent1)[access_bit]) !=
+ (*layer_masks_parent1)[access_bit])
+ return false;
+ }
+ }
+ return true;
+}
+
+/*
+ * Removes @layer_masks accesses that are not requested.
+ *
+ * Returns true if the request is allowed, false otherwise.
+ */
+static inline bool
+scope_to_request(const access_mask_t access_request,
+ layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+{
+ const unsigned long access_req = access_request;
+ unsigned long access_bit;
+
+ if (WARN_ON_ONCE(!layer_masks))
+ return true;
+
+ for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks))
+ (*layer_masks)[access_bit] = 0;
+ return !memchr_inv(layer_masks, 0, sizeof(*layer_masks));
+}
+
+/*
+ * Returns true if there is at least one access right different than
+ * LANDLOCK_ACCESS_FS_REFER.
+ */
+static inline bool
+is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS],
+ const access_mask_t access_request)
+{
+ unsigned long access_bit;
+ /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */
+ const unsigned long access_check = access_request &
+ ~LANDLOCK_ACCESS_FS_REFER;
+
+ if (!layer_masks)
+ return false;
+
+ for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) {
+ if ((*layer_masks)[access_bit])
+ return true;
+ }
+ return false;
+}
+
+/**
+ * check_access_path_dual - Check accesses for requests with a common path
+ *
+ * @domain: Domain to check against.
+ * @path: File hierarchy to walk through.
+ * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is
+ * equal to @layer_masks_parent2 (if any). This is tied to the unique
+ * requested path for most actions, or the source in case of a refer action
+ * (i.e. rename or link), or the source and destination in case of
+ * RENAME_EXCHANGE.
+ * @layer_masks_parent1: Pointer to a matrix of layer masks per access
+ * masks, identifying the layers that forbid a specific access. Bits from
+ * this matrix can be unset according to the @path walk. An empty matrix
+ * means that @domain allows all possible Landlock accesses (i.e. not only
+ * those identified by @access_request_parent1). This matrix can
+ * initially refer to domain layer masks and, when the accesses for the
+ * destination and source are the same, to requested layer masks.
+ * @dentry_child1: Dentry to the initial child of the parent1 path. This
+ * pointer must be NULL for non-refer actions (i.e. not link nor rename).
+ * @access_request_parent2: Similar to @access_request_parent1 but for a
+ * request involving a source and a destination. This refers to the
+ * destination, except in case of RENAME_EXCHANGE where it also refers to
+ * the source. Must be set to 0 when using a simple path request.
+ * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer
+ * action. This must be NULL otherwise.
+ * @dentry_child2: Dentry to the initial child of the parent2 path. This
+ * pointer is only set for RENAME_EXCHANGE actions and must be NULL
+ * otherwise.
+ *
+ * This helper first checks that the destination has a superset of restrictions
+ * compared to the source (if any) for a common path. Because of
+ * RENAME_EXCHANGE actions, source and destinations may be swapped. It then
+ * checks that the collected accesses and the remaining ones are enough to
+ * allow the request.
+ *
+ * Returns:
+ * - 0 if the access request is granted;
+ * - -EACCES if it is denied because of access right other than
+ * LANDLOCK_ACCESS_FS_REFER;
+ * - -EXDEV if the renaming or linking would be a privileged escalation
+ * (according to each layered policies), or if LANDLOCK_ACCESS_FS_REFER is
+ * not allowed by the source or the destination.
+ */
+static int check_access_path_dual(
+ const struct landlock_ruleset *const domain,
+ const struct path *const path,
+ const access_mask_t access_request_parent1,
+ layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
+ const struct dentry *const dentry_child1,
+ const access_mask_t access_request_parent2,
+ layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
+ const struct dentry *const dentry_child2)
+{
+ bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check,
+ child1_is_directory = true, child2_is_directory = true;
+ struct path walker_path;
+ access_mask_t access_masked_parent1, access_masked_parent2;
+ layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS],
+ _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS];
+ layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL,
+ (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL;
+
+ if (!access_request_parent1 && !access_request_parent2)
+ return 0;
if (WARN_ON_ONCE(!domain || !path))
return 0;
- /*
- * Allows access to pseudo filesystems that will never be mountable
- * (e.g. sockfs, pipefs), but can still be reachable through
- * /proc/<pid>/fd/<file-descriptor> .
- */
- if ((path->dentry->d_sb->s_flags & SB_NOUSER) ||
- (d_is_positive(path->dentry) &&
- unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))))
+ if (is_nouser_or_private(path->dentry))
return 0;
- if (WARN_ON_ONCE(domain->num_layers < 1))
+ if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1))
return -EACCES;
- /* Saves all layers handling a subset of requested accesses. */
- layer_mask = 0;
- for (i = 0; i < domain->num_layers; i++) {
- if (domain->fs_access_masks[i] & access_request)
- layer_mask |= BIT_ULL(i);
+ if (unlikely(layer_masks_parent2)) {
+ if (WARN_ON_ONCE(!dentry_child1))
+ return -EACCES;
+ /*
+ * For a double request, first check for potential privilege
+ * escalation by looking at domain handled accesses (which are
+ * a superset of the meaningful requested accesses).
+ */
+ access_masked_parent1 = access_masked_parent2 =
+ get_handled_accesses(domain);
+ is_dom_check = true;
+ } else {
+ if (WARN_ON_ONCE(dentry_child1 || dentry_child2))
+ return -EACCES;
+ /* For a simple request, only check for requested accesses. */
+ access_masked_parent1 = access_request_parent1;
+ access_masked_parent2 = access_request_parent2;
+ is_dom_check = false;
+ }
+
+ if (unlikely(dentry_child1)) {
+ unmask_layers(find_rule(domain, dentry_child1),
+ init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+ &_layer_masks_child1),
+ &_layer_masks_child1);
+ layer_masks_child1 = &_layer_masks_child1;
+ child1_is_directory = d_is_dir(dentry_child1);
+ }
+ if (unlikely(dentry_child2)) {
+ unmask_layers(find_rule(domain, dentry_child2),
+ init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+ &_layer_masks_child2),
+ &_layer_masks_child2);
+ layer_masks_child2 = &_layer_masks_child2;
+ child2_is_directory = d_is_dir(dentry_child2);
}
- /* An access request not handled by the domain is allowed. */
- if (layer_mask == 0)
- return 0;
walker_path = *path;
path_get(&walker_path);
@@ -267,15 +539,54 @@ static int check_access_path(const struct landlock_ruleset *const domain,
*/
while (true) {
struct dentry *parent_dentry;
+ const struct landlock_rule *rule;
- layer_mask = unmask_layers(domain, &walker_path,
- access_request, layer_mask);
- if (layer_mask == 0) {
- /* Stops when a rule from each layer grants access. */
- allowed = true;
- break;
+ /*
+ * If at least all accesses allowed on the destination are
+ * already allowed on the source, respectively if there is at
+ * least as much as restrictions on the destination than on the
+ * source, then we can safely refer files from the source to
+ * the destination without risking a privilege escalation.
+ * This also applies in the case of RENAME_EXCHANGE, which
+ * implies checks on both direction. This is crucial for
+ * standalone multilayered security policies. Furthermore,
+ * this helps avoid policy writers to shoot themselves in the
+ * foot.
+ */
+ if (unlikely(is_dom_check &&
+ no_more_access(
+ layer_masks_parent1, layer_masks_child1,
+ child1_is_directory, layer_masks_parent2,
+ layer_masks_child2,
+ child2_is_directory))) {
+ allowed_parent1 = scope_to_request(
+ access_request_parent1, layer_masks_parent1);
+ allowed_parent2 = scope_to_request(
+ access_request_parent2, layer_masks_parent2);
+
+ /* Stops when all accesses are granted. */
+ if (allowed_parent1 && allowed_parent2)
+ break;
+
+ /*
+ * Now, downgrades the remaining checks from domain
+ * handled accesses to requested accesses.
+ */
+ is_dom_check = false;
+ access_masked_parent1 = access_request_parent1;
+ access_masked_parent2 = access_request_parent2;
}
+ rule = find_rule(domain, walker_path.dentry);
+ allowed_parent1 = unmask_layers(rule, access_masked_parent1,
+ layer_masks_parent1);
+ allowed_parent2 = unmask_layers(rule, access_masked_parent2,
+ layer_masks_parent2);
+
+ /* Stops when a rule from each layer grants access. */
+ if (allowed_parent1 && allowed_parent2)
+ break;
+
jump_up:
if (walker_path.dentry == walker_path.mnt->mnt_root) {
if (follow_up(&walker_path)) {
@@ -286,7 +597,6 @@ jump_up:
* Stops at the real root. Denies access
* because not all layers have granted access.
*/
- allowed = false;
break;
}
}
@@ -296,7 +606,8 @@ jump_up:
* access to internal filesystems (e.g. nsfs, which is
* reachable through /proc/<pid>/ns/<namespace>).
*/
- allowed = !!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
+ allowed_parent1 = allowed_parent2 =
+ !!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
break;
}
parent_dentry = dget_parent(walker_path.dentry);
@@ -304,11 +615,40 @@ jump_up:
walker_path.dentry = parent_dentry;
}
path_put(&walker_path);
- return allowed ? 0 : -EACCES;
+
+ if (allowed_parent1 && allowed_parent2)
+ return 0;
+
+ /*
+ * This prioritizes EACCES over EXDEV for all actions, including
+ * renames with RENAME_EXCHANGE.
+ */
+ if (likely(is_eacces(layer_masks_parent1, access_request_parent1) ||
+ is_eacces(layer_masks_parent2, access_request_parent2)))
+ return -EACCES;
+
+ /*
+ * Gracefully forbids reparenting if the destination directory
+ * hierarchy is not a superset of restrictions of the source directory
+ * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the
+ * source or the destination.
+ */
+ return -EXDEV;
+}
+
+static inline int check_access_path(const struct landlock_ruleset *const domain,
+ const struct path *const path,
+ access_mask_t access_request)
+{
+ layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
+
+ access_request = init_layer_masks(domain, access_request, &layer_masks);
+ return check_access_path_dual(domain, path, access_request,
+ &layer_masks, NULL, 0, NULL, NULL);
}
static inline int current_check_access_path(const struct path *const path,
- const u32 access_request)
+ const access_mask_t access_request)
{
const struct landlock_ruleset *const dom =
landlock_get_current_domain();
@@ -318,6 +658,239 @@ static inline int current_check_access_path(const struct path *const path,
return check_access_path(dom, path, access_request);
}
+static inline access_mask_t get_mode_access(const umode_t mode)
+{
+ switch (mode & S_IFMT) {
+ case S_IFLNK:
+ return LANDLOCK_ACCESS_FS_MAKE_SYM;
+ case 0:
+ /* A zero mode translates to S_IFREG. */
+ case S_IFREG:
+ return LANDLOCK_ACCESS_FS_MAKE_REG;
+ case S_IFDIR:
+ return LANDLOCK_ACCESS_FS_MAKE_DIR;
+ case S_IFCHR:
+ return LANDLOCK_ACCESS_FS_MAKE_CHAR;
+ case S_IFBLK:
+ return LANDLOCK_ACCESS_FS_MAKE_BLOCK;
+ case S_IFIFO:
+ return LANDLOCK_ACCESS_FS_MAKE_FIFO;
+ case S_IFSOCK:
+ return LANDLOCK_ACCESS_FS_MAKE_SOCK;
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+
+static inline access_mask_t maybe_remove(const struct dentry *const dentry)
+{
+ if (d_is_negative(dentry))
+ return 0;
+ return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
+ LANDLOCK_ACCESS_FS_REMOVE_FILE;
+}
+
+/**
+ * collect_domain_accesses - Walk through a file path and collect accesses
+ *
+ * @domain: Domain to check against.
+ * @mnt_root: Last directory to check.
+ * @dir: Directory to start the walk from.
+ * @layer_masks_dom: Where to store the collected accesses.
+ *
+ * This helper is useful to begin a path walk from the @dir directory to a
+ * @mnt_root directory used as a mount point. This mount point is the common
+ * ancestor between the source and the destination of a renamed and linked
+ * file. While walking from @dir to @mnt_root, we record all the domain's
+ * allowed accesses in @layer_masks_dom.
+ *
+ * This is similar to check_access_path_dual() but much simpler because it only
+ * handles walking on the same mount point and only check one set of accesses.
+ *
+ * Returns:
+ * - true if all the domain access rights are allowed for @dir;
+ * - false if the walk reached @mnt_root.
+ */
+static bool collect_domain_accesses(
+ const struct landlock_ruleset *const domain,
+ const struct dentry *const mnt_root, struct dentry *dir,
+ layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS])
+{
+ unsigned long access_dom;
+ bool ret = false;
+
+ if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom))
+ return true;
+ if (is_nouser_or_private(dir))
+ return true;
+
+ access_dom = init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+ layer_masks_dom);
+
+ dget(dir);
+ while (true) {
+ struct dentry *parent_dentry;
+
+ /* Gets all layers allowing all domain accesses. */
+ if (unmask_layers(find_rule(domain, dir), access_dom,
+ layer_masks_dom)) {
+ /*
+ * Stops when all handled accesses are allowed by at
+ * least one rule in each layer.
+ */
+ ret = true;
+ break;
+ }
+
+ /* We should not reach a root other than @mnt_root. */
+ if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir)))
+ break;
+
+ parent_dentry = dget_parent(dir);
+ dput(dir);
+ dir = parent_dentry;
+ }
+ dput(dir);
+ return ret;
+}
+
+/**
+ * current_check_refer_path - Check if a rename or link action is allowed
+ *
+ * @old_dentry: File or directory requested to be moved or linked.
+ * @new_dir: Destination parent directory.
+ * @new_dentry: Destination file or directory.
+ * @removable: Sets to true if it is a rename operation.
+ * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE.
+ *
+ * Because of its unprivileged constraints, Landlock relies on file hierarchies
+ * (and not only inodes) to tie access rights to files. Being able to link or
+ * rename a file hierarchy brings some challenges. Indeed, moving or linking a
+ * file (i.e. creating a new reference to an inode) can have an impact on the
+ * actions allowed for a set of files if it would change its parent directory
+ * (i.e. reparenting).
+ *
+ * To avoid trivial access right bypasses, Landlock first checks if the file or
+ * directory requested to be moved would gain new access rights inherited from
+ * its new hierarchy. Before returning any error, Landlock then checks that
+ * the parent source hierarchy and the destination hierarchy would allow the
+ * link or rename action. If it is not the case, an error with EACCES is
+ * returned to inform user space that there is no way to remove or create the
+ * requested source file type. If it should be allowed but the new inherited
+ * access rights would be greater than the source access rights, then the
+ * kernel returns an error with EXDEV. Prioritizing EACCES over EXDEV enables
+ * user space to abort the whole operation if there is no way to do it, or to
+ * manually copy the source to the destination if this remains allowed, e.g.
+ * because file creation is allowed on the destination directory but not direct
+ * linking.
+ *
+ * To achieve this goal, the kernel needs to compare two file hierarchies: the
+ * one identifying the source file or directory (including itself), and the
+ * destination one. This can be seen as a multilayer partial ordering problem.
+ * The kernel walks through these paths and collects in a matrix the access
+ * rights that are denied per layer. These matrices are then compared to see
+ * if the destination one has more (or the same) restrictions as the source
+ * one. If this is the case, the requested action will not return EXDEV, which
+ * doesn't mean the action is allowed. The parent hierarchy of the source
+ * (i.e. parent directory), and the destination hierarchy must also be checked
+ * to verify that they explicitly allow such action (i.e. referencing,
+ * creation and potentially removal rights). The kernel implementation is then
+ * required to rely on potentially four matrices of access rights: one for the
+ * source file or directory (i.e. the child), a potentially other one for the
+ * other source/destination (in case of RENAME_EXCHANGE), one for the source
+ * parent hierarchy and a last one for the destination hierarchy. These
+ * ephemeral matrices take some space on the stack, which limits the number of
+ * layers to a deemed reasonable number: 16.
+ *
+ * Returns:
+ * - 0 if access is allowed;
+ * - -EXDEV if @old_dentry would inherit new access rights from @new_dir;
+ * - -EACCES if file removal or creation is denied.
+ */
+static int current_check_refer_path(struct dentry *const old_dentry,
+ const struct path *const new_dir,
+ struct dentry *const new_dentry,
+ const bool removable, const bool exchange)
+{
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+ bool allow_parent1, allow_parent2;
+ access_mask_t access_request_parent1, access_request_parent2;
+ struct path mnt_dir;
+ layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS],
+ layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS];
+
+ if (!dom)
+ return 0;
+ if (WARN_ON_ONCE(dom->num_layers < 1))
+ return -EACCES;
+ if (unlikely(d_is_negative(old_dentry)))
+ return -ENOENT;
+ if (exchange) {
+ if (unlikely(d_is_negative(new_dentry)))
+ return -ENOENT;
+ access_request_parent1 =
+ get_mode_access(d_backing_inode(new_dentry)->i_mode);
+ } else {
+ access_request_parent1 = 0;
+ }
+ access_request_parent2 =
+ get_mode_access(d_backing_inode(old_dentry)->i_mode);
+ if (removable) {
+ access_request_parent1 |= maybe_remove(old_dentry);
+ access_request_parent2 |= maybe_remove(new_dentry);
+ }
+
+ /* The mount points are the same for old and new paths, cf. EXDEV. */
+ if (old_dentry->d_parent == new_dir->dentry) {
+ /*
+ * The LANDLOCK_ACCESS_FS_REFER access right is not required
+ * for same-directory referer (i.e. no reparenting).
+ */
+ access_request_parent1 = init_layer_masks(
+ dom, access_request_parent1 | access_request_parent2,
+ &layer_masks_parent1);
+ return check_access_path_dual(dom, new_dir,
+ access_request_parent1,
+ &layer_masks_parent1, NULL, 0,
+ NULL, NULL);
+ }
+
+ /* Backward compatibility: no reparenting support. */
+ if (!(get_handled_accesses(dom) & LANDLOCK_ACCESS_FS_REFER))
+ return -EXDEV;
+
+ access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER;
+ access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER;
+
+ /* Saves the common mount point. */
+ mnt_dir.mnt = new_dir->mnt;
+ mnt_dir.dentry = new_dir->mnt->mnt_root;
+
+ /* new_dir->dentry is equal to new_dentry->d_parent */
+ allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry,
+ old_dentry->d_parent,
+ &layer_masks_parent1);
+ allow_parent2 = collect_domain_accesses(
+ dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2);
+
+ if (allow_parent1 && allow_parent2)
+ return 0;
+
+ /*
+ * To be able to compare source and destination domain access rights,
+ * take into account the @old_dentry access rights aggregated with its
+ * parent access rights. This will be useful to compare with the
+ * destination parent access rights.
+ */
+ return check_access_path_dual(dom, &mnt_dir, access_request_parent1,
+ &layer_masks_parent1, old_dentry,
+ access_request_parent2,
+ &layer_masks_parent2,
+ exchange ? new_dentry : NULL);
+}
+
/* Inode hooks */
static void hook_inode_free_security(struct inode *const inode)
@@ -436,8 +1009,8 @@ static void hook_sb_delete(struct super_block *const sb)
if (prev_inode)
iput(prev_inode);
/* Waits for pending iput() in release_inode(). */
- wait_var_event(&landlock_superblock(sb)->inode_refs, !atomic_long_read(
- &landlock_superblock(sb)->inode_refs));
+ wait_var_event(&landlock_superblock(sb)->inode_refs,
+ !atomic_long_read(&landlock_superblock(sb)->inode_refs));
}
/*
@@ -459,8 +1032,8 @@ static void hook_sb_delete(struct super_block *const sb)
* a dedicated user space option would be required (e.g. as a ruleset flag).
*/
static int hook_sb_mount(const char *const dev_name,
- const struct path *const path, const char *const type,
- const unsigned long flags, void *const data)
+ const struct path *const path, const char *const type,
+ const unsigned long flags, void *const data)
{
if (!landlock_get_current_domain())
return 0;
@@ -468,7 +1041,7 @@ static int hook_sb_mount(const char *const dev_name,
}
static int hook_move_mount(const struct path *const from_path,
- const struct path *const to_path)
+ const struct path *const to_path)
{
if (!landlock_get_current_domain())
return 0;
@@ -502,7 +1075,7 @@ static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts)
* view of the filesystem.
*/
static int hook_sb_pivotroot(const struct path *const old_path,
- const struct path *const new_path)
+ const struct path *const new_path)
{
if (!landlock_get_current_domain())
return 0;
@@ -511,97 +1084,34 @@ static int hook_sb_pivotroot(const struct path *const old_path,
/* Path hooks */
-static inline u32 get_mode_access(const umode_t mode)
-{
- switch (mode & S_IFMT) {
- case S_IFLNK:
- return LANDLOCK_ACCESS_FS_MAKE_SYM;
- case 0:
- /* A zero mode translates to S_IFREG. */
- case S_IFREG:
- return LANDLOCK_ACCESS_FS_MAKE_REG;
- case S_IFDIR:
- return LANDLOCK_ACCESS_FS_MAKE_DIR;
- case S_IFCHR:
- return LANDLOCK_ACCESS_FS_MAKE_CHAR;
- case S_IFBLK:
- return LANDLOCK_ACCESS_FS_MAKE_BLOCK;
- case S_IFIFO:
- return LANDLOCK_ACCESS_FS_MAKE_FIFO;
- case S_IFSOCK:
- return LANDLOCK_ACCESS_FS_MAKE_SOCK;
- default:
- WARN_ON_ONCE(1);
- return 0;
- }
-}
-
-/*
- * Creating multiple links or renaming may lead to privilege escalations if not
- * handled properly. Indeed, we must be sure that the source doesn't gain more
- * privileges by being accessible from the destination. This is getting more
- * complex when dealing with multiple layers. The whole picture can be seen as
- * a multilayer partial ordering problem. A future version of Landlock will
- * deal with that.
- */
static int hook_path_link(struct dentry *const old_dentry,
- const struct path *const new_dir,
- struct dentry *const new_dentry)
-{
- const struct landlock_ruleset *const dom =
- landlock_get_current_domain();
-
- if (!dom)
- return 0;
- /* The mount points are the same for old and new paths, cf. EXDEV. */
- if (old_dentry->d_parent != new_dir->dentry)
- /* Gracefully forbids reparenting. */
- return -EXDEV;
- if (unlikely(d_is_negative(old_dentry)))
- return -ENOENT;
- return check_access_path(dom, new_dir,
- get_mode_access(d_backing_inode(old_dentry)->i_mode));
-}
-
-static inline u32 maybe_remove(const struct dentry *const dentry)
+ const struct path *const new_dir,
+ struct dentry *const new_dentry)
{
- if (d_is_negative(dentry))
- return 0;
- return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
- LANDLOCK_ACCESS_FS_REMOVE_FILE;
+ return current_check_refer_path(old_dentry, new_dir, new_dentry, false,
+ false);
}
static int hook_path_rename(const struct path *const old_dir,
- struct dentry *const old_dentry,
- const struct path *const new_dir,
- struct dentry *const new_dentry)
+ struct dentry *const old_dentry,
+ const struct path *const new_dir,
+ struct dentry *const new_dentry,
+ const unsigned int flags)
{
- const struct landlock_ruleset *const dom =
- landlock_get_current_domain();
-
- if (!dom)
- return 0;
- /* The mount points are the same for old and new paths, cf. EXDEV. */
- if (old_dir->dentry != new_dir->dentry)
- /* Gracefully forbids reparenting. */
- return -EXDEV;
- if (unlikely(d_is_negative(old_dentry)))
- return -ENOENT;
- /* RENAME_EXCHANGE is handled because directories are the same. */
- return check_access_path(dom, old_dir, maybe_remove(old_dentry) |
- maybe_remove(new_dentry) |
- get_mode_access(d_backing_inode(old_dentry)->i_mode));
+ /* old_dir refers to old_dentry->d_parent and new_dir->mnt */
+ return current_check_refer_path(old_dentry, new_dir, new_dentry, true,
+ !!(flags & RENAME_EXCHANGE));
}
static int hook_path_mkdir(const struct path *const dir,
- struct dentry *const dentry, const umode_t mode)
+ struct dentry *const dentry, const umode_t mode)
{
return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR);
}
static int hook_path_mknod(const struct path *const dir,
- struct dentry *const dentry, const umode_t mode,
- const unsigned int dev)
+ struct dentry *const dentry, const umode_t mode,
+ const unsigned int dev)
{
const struct landlock_ruleset *const dom =
landlock_get_current_domain();
@@ -612,28 +1122,29 @@ static int hook_path_mknod(const struct path *const dir,
}
static int hook_path_symlink(const struct path *const dir,
- struct dentry *const dentry, const char *const old_name)
+ struct dentry *const dentry,
+ const char *const old_name)
{
return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM);
}
static int hook_path_unlink(const struct path *const dir,
- struct dentry *const dentry)
+ struct dentry *const dentry)
{
return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE);
}
static int hook_path_rmdir(const struct path *const dir,
- struct dentry *const dentry)
+ struct dentry *const dentry)
{
return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR);
}
/* File hooks */
-static inline u32 get_file_access(const struct file *const file)
+static inline access_mask_t get_file_access(const struct file *const file)
{
- u32 access = 0;
+ access_mask_t access = 0;
if (file->f_mode & FMODE_READ) {
/* A directory can only be opened in read mode. */
@@ -688,5 +1199,5 @@ static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
__init void landlock_add_fs_hooks(void)
{
security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
- LANDLOCK_NAME);
+ LANDLOCK_NAME);
}