1 files changed, 135 insertions, 13 deletions
diff --git a/src/export.rs b/src/export.rs
index 838ae843..ed51a9e9 100644
--- a/src/export.rs
+++ b/src/export.rs
@@ -102,6 +102,19 @@ impl Priority {
     }
 }
 
+/// Const helper to check architecture
+pub const fn is_armv6() -> bool {
+    #[cfg(not(armv6m))]
+    {
+        false
+    }
+
+    #[cfg(armv6m)]
+    {
+        true
+    }
+}
+
 #[inline(always)]
 pub fn assert_send<T>()
 where
@@ -123,13 +136,40 @@ where
 {
 }
 
-/// Lock the resource proxy by setting the BASEPRI
-/// and running the closure with interrupt::free
+/// Lock implementation using BASEPRI and global Critical Section (CS)
 ///
 /// # Safety
 ///
-/// Writing to the BASEPRI
-/// Dereferencing a raw pointer
+/// The system ceiling is raised from current to ceiling
+/// by either
+/// - raising the BASEPRI to the ceiling value, or
+/// - disable all interrupts in case we want to
+///   mask interrupts with maximum priority
+///
+/// Dereferencing a raw pointer inside CS
+///
+/// The priority.set/priority.get can safely be outside the CS
+/// as being a context local cell (not affected by preemptions).
+/// It is merely used in order to omit masking in case current
+/// priority is current priority >= ceiling.
+///
+/// Lock Efficiency:
+/// Experiments validate (sub)-zero cost for CS implementation
+/// (Sub)-zero as:
+/// - Either zero OH (lock optimized out), or
+/// - Amounting to an optimal assembly implementation
+///   - The BASEPRI value is folded to a constant at compile time
+///   - CS entry, single assembly instruction to write BASEPRI
+///   - CS exit, single assembly instruction to write BASEPRI
+///   - priority.set/get optimized out (their effect not)
+/// - On par or better than any handwritten implementation of SRP
+///
+/// Limitations:
+/// The current implementation reads/writes BASEPRI once
+/// even in some edge cases where this may be omitted.
+/// Total OH of per task is max 2 clock cycles, negligible in practice
+/// but can in theory be fixed.
+///
 #[cfg(armv7m)]
 #[inline(always)]
 pub unsafe fn lock<T, R>(
@@ -137,6 +177,7 @@ pub unsafe fn lock<T, R>(
     priority: &Priority,
     ceiling: u8,
     nvic_prio_bits: u8,
+    _mask: &[u32; 3],
     f: impl FnOnce(&mut T) -> R,
 ) -> R {
     let current = priority.get();
@@ -160,13 +201,50 @@ pub unsafe fn lock<T, R>(
     }
 }
 
-/// Lock the resource proxy by setting the PRIMASK
-/// and running the closure with ``interrupt::free``
+/// Lock implementation using interrupt masking
 ///
 /// # Safety
 ///
-/// Writing to the PRIMASK
-/// Dereferencing a raw pointer
+/// The system ceiling is raised from current to ceiling
+/// by computing a 32 bit `mask` (1 bit per interrupt)
+/// 1: ceiling >= priority > current
+/// 0: else
+///
+/// On CS entry, `clear_enable_mask(mask)` disables interrupts
+/// On CS exit,  `set_enable_mask(mask)` re-enables interrupts
+///
+/// The priority.set/priority.get can safely be outside the CS
+/// as being a context local cell (not affected by preemptions).
+/// It is merely used in order to omit masking in case
+/// current priority >= ceiling.
+///
+/// Dereferencing a raw pointer is done safely inside the CS
+///
+/// Lock Efficiency:
+/// Early experiments validate (sub)-zero cost for CS implementation
+/// (Sub)-zero as:
+/// - Either zero OH (lock optimized out), or
+/// - Amounting to an optimal assembly implementation
+///   - if ceiling == (1 << nvic_prio_bits)
+///     - we execute the closure in a global critical section (interrupt free)
+///     - CS entry cost, single write to core register
+///     - CS exit cost, single write to core register
+///   else
+///     - The `mask` value is folded to a constant at compile time
+///     - CS entry, single write of the 32 bit `mask` to the `icer` register
+///     - CS exit, single write of the 32 bit `mask` to the `iser` register
+/// - priority.set/get optimized out (their effect not)
+/// - On par or better than any hand written implementation of SRP
+///
+/// Limitations:
+/// Current implementation does not allow for tasks with shared resources
+/// to be bound to exception handlers, as these cannot be masked in HW.
+///
+/// Possible solutions:
+/// - Mask exceptions by global critical sections (interrupt::free)
+/// - Temporary lower exception priority
+///
+/// These possible solutions are set goals for future work
 #[cfg(not(armv7m))]
 #[inline(always)]
 pub unsafe fn lock<T, R>(
@@ -174,20 +252,64 @@ pub unsafe fn lock<T, R>(
     priority: &Priority,
     ceiling: u8,
     _nvic_prio_bits: u8,
+    masks: &[u32; 3],
     f: impl FnOnce(&mut T) -> R,
 ) -> R {
     let current = priority.get();
-
     if current < ceiling {
-        priority.set(u8::max_value());
-        let r = interrupt::free(|_| f(&mut *ptr));
-        priority.set(current);
-        r
+        if ceiling >= 4 {
+            // safe to manipulate outside critical section
+            priority.set(ceiling);
+            // execute closure under protection of raised system ceiling
+            let r = interrupt::free(|_| f(&mut *ptr));
+            // safe to manipulate outside critical section
+            priority.set(current);
+            r
+        } else {
+            // safe to manipulate outside critical section
+            priority.set(ceiling);
+            let mask = compute_mask(current, ceiling, masks);
+            clear_enable_mask(mask);
+
+            // execute closure under protection of raised system ceiling
+            let r = f(&mut *ptr);
+
+            set_enable_mask(mask);
+
+            // safe to manipulate outside critical section
+            priority.set(current);
+            r
+        }
     } else {
+        // execute closure without raising system ceiling
         f(&mut *ptr)
     }
 }
 
+#[cfg(not(armv7m))]
+#[inline(always)]
+fn compute_mask(from_prio: u8, to_prio: u8, masks: &[u32; 3]) -> u32 {
+    let mut res = 0;
+    masks[from_prio as usize..to_prio as usize]
+        .iter()
+        .for_each(|m| res |= m);
+    res
+}
+
+// enables interrupts
+#[cfg(not(armv7m))]
+#[inline(always)]
+unsafe fn set_enable_mask(mask: u32) {
+    (*NVIC::ptr()).iser[0].write(mask)
+}
+
+// disables interrupts
+#[cfg(not(armv7m))]
+#[inline(always)]
+unsafe fn clear_enable_mask(mask: u32) {
+    (*NVIC::ptr()).icer[0].write(mask)
+}
+
 #[inline]
 #[must_use]
 pub fn logical2hw(logical: u8, nvic_prio_bits: u8) -> u8 {