summaryrefslogtreecommitdiff
path: root/fs/bcachefs/six.h
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-05-21 22:40:40 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-23 00:10:02 +0300
commit91d16f16d0fd4b6eb8503068ea7f6ad8305e32db (patch)
tree277706ee9c5ca1b52f3a10612b26687980698f92 /fs/bcachefs/six.h
parent1fb4fe63178881a0ac043a5c05288d9fff85d6b8 (diff)
downloadlinux-91d16f16d0fd4b6eb8503068ea7f6ad8305e32db.tar.xz
six locks: Documentation, renaming
- Expanded and revamped overview documentation in six.h, giving an overview of all features - docbook-comments for all external interfaces - Rename some functions for simplicity, i.e. six_lock_ip_type() -> six_lock_ip() Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/six.h')
-rw-r--r--fs/bcachefs/six.h298
1 files changed, 217 insertions, 81 deletions
diff --git a/fs/bcachefs/six.h b/fs/bcachefs/six.h
index 449589f76628..82bf9de72490 100644
--- a/fs/bcachefs/six.h
+++ b/fs/bcachefs/six.h
@@ -3,59 +3,124 @@
#ifndef _LINUX_SIX_H
#define _LINUX_SIX_H
-/*
- * Shared/intent/exclusive locks: sleepable read/write locks, much like rw
- * semaphores, except with a third intermediate state, intent. Basic operations
- * are:
+/**
+ * DOC: SIX locks overview
*
- * six_lock_read(&foo->lock);
- * six_unlock_read(&foo->lock);
+ * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores
+ * but with an additional state: read/shared, intent, exclusive/write
*
- * six_lock_intent(&foo->lock);
- * six_unlock_intent(&foo->lock);
+ * The purpose of the intent state is to allow for greater concurrency on tree
+ * structures without deadlocking. In general, a read can't be upgraded to a
+ * write lock without deadlocking, so an operation that updates multiple nodes
+ * will have to take write locks for the full duration of the operation.
*
- * six_lock_write(&foo->lock);
- * six_unlock_write(&foo->lock);
+ * But by adding an intent state, which is exclusive with other intent locks but
+ * not with readers, we can take intent locks at thte start of the operation,
+ * and then take write locks only for the actual update to each individual
+ * nodes, without deadlocking.
*
- * Intent locks block other intent locks, but do not block read locks, and you
- * must have an intent lock held before taking a write lock, like so:
+ * Example usage:
+ * six_lock_read(&foo->lock);
+ * six_unlock_read(&foo->lock);
*
- * six_lock_intent(&foo->lock);
- * six_lock_write(&foo->lock);
- * six_unlock_write(&foo->lock);
- * six_unlock_intent(&foo->lock);
+ * An intent lock must be held before taking a write lock:
+ * six_lock_intent(&foo->lock);
+ * six_lock_write(&foo->lock);
+ * six_unlock_write(&foo->lock);
+ * six_unlock_intent(&foo->lock);
*
* Other operations:
- *
* six_trylock_read()
* six_trylock_intent()
* six_trylock_write()
*
- * six_lock_downgrade(): convert from intent to read
- * six_lock_tryupgrade(): attempt to convert from read to intent
- *
- * Locks also embed a sequence number, which is incremented when the lock is
- * locked or unlocked for write. The current sequence number can be grabbed
- * while a lock is held from lock->state.seq; then, if you drop the lock you can
- * use six_relock_(read|intent_write)(lock, seq) to attempt to retake the lock
- * iff it hasn't been locked for write in the meantime.
- *
- * There are also operations that take the lock type as a parameter, where the
- * type is one of SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write:
- *
- * six_lock_type(lock, type)
- * six_unlock_type(lock, type)
- * six_relock(lock, type, seq)
- * six_trylock_type(lock, type)
- * six_trylock_convert(lock, from, to)
- *
- * A lock may be held multiple times by the same thread (for read or intent,
- * not write). However, the six locks code does _not_ implement the actual
- * recursive checks itself though - rather, if your code (e.g. btree iterator
- * code) knows that the current thread already has a lock held, and for the
- * correct type, six_lock_increment() may be used to bump up the counter for
- * that type - the only effect is that one more call to unlock will be required
- * before the lock is unlocked.
+ * six_lock_downgrade() convert from intent to read
+ * six_lock_tryupgrade() attempt to convert from read to intent, may fail
+ *
+ * There are also interfaces that take the lock type as an enum:
+ *
+ * six_lock_type(&foo->lock, SIX_LOCK_read);
+ * six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent)
+ * six_lock_type(&foo->lock, SIX_LOCK_write);
+ * six_unlock_type(&foo->lock, SIX_LOCK_write);
+ * six_unlock_type(&foo->lock, SIX_LOCK_intent);
+ *
+ * Lock sequence numbers - unlock(), relock():
+ *
+ * Locks embed sequences numbers, which are incremented on write lock/unlock.
+ * This allows locks to be dropped and the retaken iff the state they protect
+ * hasn't changed; this makes it much easier to avoid holding locks while e.g.
+ * doing IO or allocating memory.
+ *
+ * Example usage:
+ * six_lock_read(&foo->lock);
+ * u32 seq = six_lock_seq(&foo->lock);
+ * six_unlock_read(&foo->lock);
+ *
+ * some_operation_that_may_block();
+ *
+ * if (six_relock_read(&foo->lock, seq)) { ... }
+ *
+ * If the relock operation succeeds, it is as if the lock was never unlocked.
+ *
+ * Reentrancy:
+ *
+ * Six locks are not by themselves reentrent, but have counters for both the
+ * read and intent states that can be used to provide reentrency by an upper
+ * layer that tracks held locks. If a lock is known to already be held in the
+ * read or intent state, six_lock_increment() can be used to bump the "lock
+ * held in this state" counter, increasing the number of unlock calls that
+ * will be required to fully unlock it.
+ *
+ * Example usage:
+ * six_lock_read(&foo->lock);
+ * six_lock_increment(&foo->lock, SIX_LOCK_read);
+ * six_unlock_read(&foo->lock);
+ * six_unlock_read(&foo->lock);
+ * foo->lock is now fully unlocked.
+ *
+ * Since the intent state supercedes read, it's legal to increment the read
+ * counter when holding an intent lock, but not the reverse.
+ *
+ * A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write)
+ * is not legal.
+ *
+ * should_sleep_fn:
+ *
+ * There is a six_lock() variant that takes a function pointer that is called
+ * immediately prior to schedule() when blocking, and may return an error to
+ * abort.
+ *
+ * One possible use for this feature is when objects being locked are part of
+ * a cache and may reused, and lock ordering is based on a property of the
+ * object that will change when the object is reused - i.e. logical key order.
+ *
+ * If looking up an object in the cache may race with object reuse, and lock
+ * ordering is required to prevent deadlock, object reuse may change the
+ * correct lock order for that object and cause a deadlock. should_sleep_fn
+ * can be used to check if the object is still the object we want and avoid
+ * this deadlock.
+ *
+ * Wait list entry interface:
+ *
+ * There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a
+ * wait list entry. By embedding six_lock_waiter into another object, and by
+ * traversing lock waitlists, it is then possible for an upper layer to
+ * implement full cycle detection for deadlock avoidance.
+ *
+ * should_sleep_fn should be used for invoking the cycle detector, walking the
+ * graph of held locks to check for a deadlock. The upper layer must track
+ * held locks for each thread, and each thread's held locks must be reachable
+ * from its six_lock_waiter object.
+ *
+ * six_lock_waiter() will add the wait object to the waitlist re-trying taking
+ * the lock, and before calling should_sleep_fn, and the wait object will not
+ * be removed from the waitlist until either the lock has been successfully
+ * acquired, or we aborted because should_sleep_fn returned an error.
+ *
+ * Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will
+ * have timestamps in strictly ascending order - this is so the timestamp can
+ * be used as a cursor for lock graph traverse.
*/
#include <linux/lockdep.h>
@@ -66,8 +131,6 @@
#include <linux/osq_lock.h>
#endif
-#define SIX_LOCK_SEPARATE_LOCKFNS
-
enum six_lock_type {
SIX_LOCK_read,
SIX_LOCK_intent,
@@ -108,6 +171,11 @@ enum six_lock_init_flags {
void __six_lock_init(struct six_lock *lock, const char *name,
struct lock_class_key *key, enum six_lock_init_flags flags);
+/**
+ * six_lock_init - initialize a six lock
+ * @lock: lock to initialize
+ * @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU
+ */
#define six_lock_init(lock, flags) \
do { \
static struct lock_class_key __key; \
@@ -115,73 +183,148 @@ do { \
__six_lock_init((lock), #lock, &__key, flags); \
} while (0)
+/**
+ * six_lock_seq - obtain current lock sequence number
+ * @lock: six_lock to obtain sequence number for
+ *
+ * @lock should be held for read or intent, and not write
+ *
+ * By saving the lock sequence number, we can unlock @lock and then (typically
+ * after some blocking operation) attempt to relock it: the relock will succeed
+ * if the sequence number hasn't changed, meaning no write locks have been taken
+ * and state corresponding to what @lock protects is still valid.
+ */
static inline u32 six_lock_seq(const struct six_lock *lock)
{
return atomic64_read(&lock->state) >> 32;
}
-bool six_trylock_ip_type(struct six_lock *lock, enum six_lock_type type,
- unsigned long ip);
+bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
+/**
+ * six_trylock_type - attempt to take a six lock without blocking
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ *
+ * Return: true on success, false on failure.
+ */
static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
{
- return six_trylock_ip_type(lock, type, _THIS_IP_);
+ return six_trylock_ip(lock, type, _THIS_IP_);
}
-int six_lock_type_ip_waiter(struct six_lock *lock, enum six_lock_type type,
- struct six_lock_waiter *wait,
- six_lock_should_sleep_fn should_sleep_fn, void *p,
- unsigned long ip);
-
-static inline int six_lock_type_waiter(struct six_lock *lock, enum six_lock_type type,
- struct six_lock_waiter *wait,
- six_lock_should_sleep_fn should_sleep_fn, void *p)
+int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
+ struct six_lock_waiter *wait,
+ six_lock_should_sleep_fn should_sleep_fn, void *p,
+ unsigned long ip);
+
+/**
+ * six_lock_waiter - take a lock, with full waitlist interface
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @wait: pointer to wait object, which will be added to lock's waitlist
+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
+ * to scheduling
+ * @p: passed through to @should_sleep_fn
+ *
+ * This is a convenience wrapper around six_lock_ip_waiter(), see that function
+ * for full documentation.
+ *
+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
+ */
+static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type,
+ struct six_lock_waiter *wait,
+ six_lock_should_sleep_fn should_sleep_fn, void *p)
{
- return six_lock_type_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_);
+ return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_);
}
-static inline int six_lock_ip_type(struct six_lock *lock, enum six_lock_type type,
- six_lock_should_sleep_fn should_sleep_fn, void *p,
- unsigned long ip)
+/**
+ * six_lock_ip - take a six lock lock
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
+ * to scheduling
+ * @p: passed through to @should_sleep_fn
+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
+ *
+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
+ */
+static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type,
+ six_lock_should_sleep_fn should_sleep_fn, void *p,
+ unsigned long ip)
{
struct six_lock_waiter wait;
- return six_lock_type_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip);
+ return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip);
}
+/**
+ * six_lock_type - take a six lock lock
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior
+ * to scheduling
+ * @p: passed through to @should_sleep_fn
+ *
+ * Return: 0 on success, or the return code from @should_sleep_fn on failure.
+ */
static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, void *p)
{
struct six_lock_waiter wait;
- return six_lock_type_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_);
+ return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_);
}
-bool six_relock_ip_type(struct six_lock *lock, enum six_lock_type type,
- unsigned seq, unsigned long ip);
+bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
+ unsigned seq, unsigned long ip);
+/**
+ * six_relock_type - attempt to re-take a lock that was held previously
+ * @lock: lock to take
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ * @seq: lock sequence number obtained from six_lock_seq() while lock was
+ * held previously
+ *
+ * Return: true on success, false on failure.
+ */
static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
unsigned seq)
{
- return six_relock_ip_type(lock, type, seq, _THIS_IP_);
+ return six_relock_ip(lock, type, seq, _THIS_IP_);
}
-void six_unlock_ip_type(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
+void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
+/**
+ * six_unlock_type - drop a six lock
+ * @lock: lock to unlock
+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
+ *
+ * When a lock is held multiple times (because six_lock_incement()) was used),
+ * this decrements the 'lock held' counter by one.
+ *
+ * For example:
+ * six_lock_read(&foo->lock); read count 1
+ * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2
+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1
+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0
+ */
static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
{
- six_unlock_ip_type(lock, type, _THIS_IP_);
+ six_unlock_ip(lock, type, _THIS_IP_);
}
#define __SIX_LOCK(type) \
static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\
{ \
- return six_trylock_ip_type(lock, SIX_LOCK_##type, ip); \
+ return six_trylock_ip(lock, SIX_LOCK_##type, ip); \
} \
\
static inline bool six_trylock_##type(struct six_lock *lock) \
{ \
- return six_trylock_ip_type(lock, SIX_LOCK_##type, _THIS_IP_); \
+ return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
} \
\
static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \
@@ -189,24 +332,24 @@ static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \
six_lock_should_sleep_fn should_sleep_fn, void *p,\
unsigned long ip) \
{ \
- return six_lock_type_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
+ return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
} \
\
static inline int six_lock_ip_##type(struct six_lock *lock, \
six_lock_should_sleep_fn should_sleep_fn, void *p, \
unsigned long ip) \
{ \
- return six_lock_ip_type(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
+ return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
} \
\
static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
{ \
- return six_relock_ip_type(lock, SIX_LOCK_##type, seq, ip); \
+ return six_relock_ip(lock, SIX_LOCK_##type, seq, ip); \
} \
\
static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \
{ \
- return six_relock_ip_type(lock, SIX_LOCK_##type, seq, _THIS_IP_);\
+ return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_); \
} \
\
static inline int six_lock_##type(struct six_lock *lock, \
@@ -215,21 +358,14 @@ static inline int six_lock_##type(struct six_lock *lock, \
return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \
} \
\
-static inline int six_lock_waiter_##type(struct six_lock *lock, \
- struct six_lock_waiter *wait, \
- six_lock_should_sleep_fn fn, void *p) \
-{ \
- return six_lock_ip_waiter_##type(lock, wait, fn, p, _THIS_IP_); \
-} \
- \
static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \
{ \
- six_unlock_ip_type(lock, SIX_LOCK_##type, ip); \
+ six_unlock_ip(lock, SIX_LOCK_##type, ip); \
} \
\
static inline void six_unlock_##type(struct six_lock *lock) \
{ \
- six_unlock_ip_type(lock, SIX_LOCK_##type, _THIS_IP_); \
+ six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
}
__SIX_LOCK(read)