aboutsummaryrefslogtreecommitdiff
path: root/src/lock.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/lock.zig')
-rw-r--r--src/lock.zig162
1 files changed, 71 insertions, 91 deletions
diff --git a/src/lock.zig b/src/lock.zig
index 73dfdebd2..4ae9444e0 100644
--- a/src/lock.zig
+++ b/src/lock.zig
@@ -6,118 +6,101 @@ const Futex = std.Thread.Futex;
pub const Mutex = struct {
state: Atomic(u32) = Atomic(u32).init(UNLOCKED),
- const UNLOCKED: u32 = 0;
- const LOCKED: u32 = 1;
- const CONTENDED: u32 = 2;
+ const UNLOCKED = 0;
+ const LOCKED = 0b01;
+ const CONTENDED = 0b11;
+ const is_x86 = std.Target.current.cpu.arch.isX86();
pub fn tryAcquire(self: *Mutex) bool {
- return self.state.compareAndSwap(
- UNLOCKED,
- LOCKED,
- .Acquire,
- .Monotonic,
- ) == null;
+ return self.acquireFast(true);
}
pub fn acquire(self: *Mutex) void {
- if (self.state.tryCompareAndSwap(
- UNLOCKED,
- LOCKED,
- .Acquire,
- .Monotonic,
- )) |updated| {
+ if (!self.acquireFast(false)) {
self.acquireSlow();
}
}
- fn acquireSlow(self: *Mutex) void {
- @setCold(true);
+ inline fn acquireFast(self: *Mutex, comptime strong: bool) bool {
+ // On x86, "lock bts" uses less i-cache & can be faster than "lock cmpxchg" below.
+ if (comptime is_x86) {
+ return self.state.bitSet(@ctz(u32, LOCKED), .Acquire) == UNLOCKED;
+ }
- // true if the cpu's atomic swap instruction should be preferred
- const has_fast_swap = comptime blk: {
- const arch = std.Target.current.cpu.arch;
- break :blk arch.isX86() or arch.isRISCV();
+ const cas_fn = comptime switch (strong) {
+ true => "compareAndSwap",
+ else => "tryCompareAndSwap",
};
- var acquire_state = LOCKED;
- var state = self.state.load(.Monotonic);
- var spin: u8 = if (comptime has_fast_swap) 100 else 10;
-
- while (true) {
- // Try to lock the Mutex if its unlocked.
- // acquire_state is changed to CONTENDED if this thread goes to sleep.
- //
- // We acquire with CONTENDED instead of LOCKED in that scenario
- // to make sure that we wake another thread sleeping in release()
- // which didn't see the transition to UNLOCKED since it was asleep.
- //
- // A CONTENDED acquire unfortunately results in one extra wake()
- // if there were no other sleeping threads at the time of the acquire.
- if (state == UNLOCKED) {
- state = self.state.tryCompareAndSwap(
- state,
- acquire_state,
+ return @field(self.state, cas_fn)(
+ UNLOCKED,
+ LOCKED,
+ .Acquire,
+ .Monotonic,
+ ) == null;
+ }
+
+ noinline fn acquireSlow(self: *Mutex) void {
+ // Spin a little bit on the Mutex state in the hopes that
+ // we can acquire it without having to call Futex.wait().
+ // Give up spinning if the Mutex is contended.
+ // This helps acquire() latency under micro-contention.
+ //
+ // Only spin on x86 as other platforms are assumed to
+ // prioritize power efficiency over strict performance.
+ var spin: u8 = comptime if (is_x86) 100 else 0;
+ while (spin > 0) : (spin -= 1) {
+ std.atomic.spinLoopHint();
+
+ switch (self.state.load(.Monotonic)) {
+ UNLOCKED => _ = self.state.tryCompareAndSwap(
+ UNLOCKED,
+ LOCKED,
.Acquire,
.Monotonic,
- ) orelse return;
- continue;
+ ) orelse return,
+ LOCKED => continue,
+ CONTENDED => break,
+ else => unreachable, // invalid Mutex state
}
+ }
- if (state != CONTENDED) uncontended: {
- // If there's no pending threads, try to spin on the Mutex a few times.
- // This makes the throughput close to a spinlock when under micro-contention.
- if (spin > 0) {
- spin -= 1;
- std.atomic.spinLoopHint();
- state = self.state.load(.Monotonic);
- continue;
- }
-
- // Indicate that there will be a waiting thread by updating to CONTENDED.
- // Acquire barrier as this swap could also possibly lock the Mutex.
- if (comptime has_fast_swap) {
- state = self.state.swap(CONTENDED, .Acquire);
- if (state == UNLOCKED) return;
- break :uncontended;
- }
-
- // For other platforms, mark the Mutex as CONTENDED if it's not already.
- // This just indicates that there's waiting threads so no Acquire barrier needed.
- if (self.state.tryCompareAndSwap(
- state,
- CONTENDED,
- .Monotonic,
- .Monotonic,
- )) |updated| {
- state = updated;
- continue;
+ // Make sure the state is CONTENDED before sleeping with Futex so release() can wake us up.
+ // Transitioning to CONTENDED may also acquire the mutex in the process.
+ //
+ // If we sleep, we must acquire the Mutex with CONTENDED to ensure that other threads
+ // sleeping on the Futex having seen CONTENDED before are eventually woken up by release().
+ // This unfortunately ends up in an extra Futex.wake() for the last thread but that's ok.
+ while (true) : (Futex.wait(&self.state, CONTENDED, null) catch unreachable) {
+ // On x86, "xchg" can be faster than "lock cmpxchg" below.
+ if (comptime is_x86) {
+ switch (self.state.swap(CONTENDED, .Acquire)) {
+ UNLOCKED => return,
+ LOCKED, CONTENDED => continue,
+ else => unreachable, // invalid Mutex state
}
}
- Futex.wait(&self.state, CONTENDED, null) catch unreachable;
- state = self.state.load(.Monotonic);
- acquire_state = CONTENDED;
+ var state = self.state.load(.Monotonic);
+ while (state != CONTENDED) {
+ state = switch (state) {
+ UNLOCKED => self.state.tryCompareAndSwap(state, CONTENDED, .Acquire, .Monotonic) orelse return,
+ LOCKED => self.state.tryCompareAndSwap(state, CONTENDED, .Monotonic, .Monotonic) orelse break,
+ CONTENDED => unreachable, // checked above
+ else => unreachable, // invalid Mutex state
+ };
+ }
}
}
pub fn release(self: *Mutex) void {
- const state = self.state.swap(UNLOCKED, .Release);
-
- // Wake up a sleeping thread if it was previously CONTENDED.
- // The woken up thread would acquire by updating the state to CONTENDED again.
- // This is to make sure a future release() wouldn't miss waking up threads that
- // don't see the reset to UNLOCKED above due to them being asleep.
- if (state == CONTENDED) {
- self.releaseSlow();
+ switch (self.state.swap(UNLOCKED, .Release)) {
+ UNLOCKED => unreachable, // released without being acquired
+ LOCKED => {},
+ CONTENDED => Futex.wake(&self.state, 1),
+ else => unreachable, // invalid Mutex state
}
}
-
- fn releaseSlow(self: *Mutex) void {
- @setCold(true);
-
- const num_waiters = 1;
- Futex.wake(&self.state, num_waiters);
- }
};
pub const Lock = struct {
@@ -136,7 +119,4 @@ pub const Lock = struct {
}
};
-
-pub fn spinCycle() void {
-
-} \ No newline at end of file
+pub fn spinCycle() void {}