589: Fine grained concurrency on thumbv6m (no BASEPRI). r=korken89 a=perlindgren

This is an experimental implementation of SRP based scheduling on the M0/M0+ (thumbv6m) architecture. 

The aim is a (sub)-zero abstraction to the resource protection (locking mechanism).

Please try, but not merge yet, since its an early POC. 

Co-authored-by: Per Lindgren <per.lindgren@ltu.se>
This commit is contained in:
bors[bot] 2022-03-04 15:20:26 +00:00 committed by GitHub
commit a765f3fffa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 434 additions and 16 deletions

View file

@ -15,6 +15,7 @@ For each category, *Added*, *Changed*, *Fixed* add new entries at the top!
- Rework branch structure, release/vVERSION
- Cargo clippy in CI
- Use rust-cache Github Action
- Support for NVIC based SPR based scheduling for armv6m.
- CI changelog entry enforcer
- `examples/periodic-at.rs`, an example of a periodic timer without accumulated drift.
- `examples/periodic-at2.rs`, an example of a periodic process with two tasks, with offset timing.

47
ci/expected/complex.run Normal file
View file

@ -0,0 +1,47 @@
init
idle p0 started
t2 p4 called 1 time
enter lock s4 0
t3 p4 exit
idle enter lock s3 0
idle pend t0
idle pend t1
idle pend t2
t2 p4 called 2 times
enter lock s4 1
t3 p4 exit
idle still in lock s3 0
t1 p3 called 1 time
t1 enter lock s4 2
t1 pend t0
t1 pend t2
t1 still in lock s4 2
t2 p4 called 3 times
enter lock s4 2
t3 p4 exit
t1 p3 exit
t0 p2 called 1 time
t0 p2 exit
back in idle
enter lock s2 0
idle pend t0
idle pend t1
t1 p3 called 2 times
t1 enter lock s4 3
t1 pend t0
t1 pend t2
t1 still in lock s4 3
t2 p4 called 4 times
enter lock s4 3
t3 p4 exit
t1 p3 exit
idle pend t2
t2 p4 called 5 times
enter lock s4 4
t3 p4 exit
idle still in lock s2 0
t0 p2 called 2 times
t0 p2 exit
idle exit

132
examples/complex.rs Normal file
View file

@ -0,0 +1,132 @@
//! examples/complex.rs
#![deny(unsafe_code)]
#![deny(warnings)]
#![no_main]
#![no_std]
use panic_semihosting as _;
#[rtic::app(device = lm3s6965)]
mod app {
use cortex_m_semihosting::{debug, hprintln};
use lm3s6965::Interrupt;
#[shared]
struct Shared {
s2: u32, // shared with ceiling 2
s3: u32, // shared with ceiling 3
s4: u32, // shared with ceiling 4
}
#[local]
struct Local {}
#[init]
fn init(_: init::Context) -> (Shared, Local, init::Monotonics) {
hprintln!("init").unwrap();
(
Shared {
s2: 0,
s3: 0,
s4: 0,
},
Local {},
init::Monotonics(),
)
}
#[idle(shared = [s2, s3])]
fn idle(mut cx: idle::Context) -> ! {
hprintln!("idle p0 started").ok();
rtic::pend(Interrupt::GPIOC);
cx.shared.s3.lock(|s| {
hprintln!("idle enter lock s3 {}", s).ok();
hprintln!("idle pend t0").ok();
rtic::pend(Interrupt::GPIOA); // t0 p2, with shared ceiling 3
hprintln!("idle pend t1").ok();
rtic::pend(Interrupt::GPIOB); // t1 p3, with shared ceiling 3
hprintln!("idle pend t2").ok();
rtic::pend(Interrupt::GPIOC); // t2 p4, no sharing
hprintln!("idle still in lock s3 {}", s).ok();
});
hprintln!("\nback in idle").ok();
cx.shared.s2.lock(|s| {
hprintln!("enter lock s2 {}", s).ok();
hprintln!("idle pend t0").ok();
rtic::pend(Interrupt::GPIOA); // t0 p2, with shared ceiling 2
hprintln!("idle pend t1").ok();
rtic::pend(Interrupt::GPIOB); // t1 p3, no sharing
hprintln!("idle pend t2").ok();
rtic::pend(Interrupt::GPIOC); // t2 p4, no sharing
hprintln!("idle still in lock s2 {}", s).ok();
});
hprintln!("\nidle exit").ok();
debug::exit(debug::EXIT_SUCCESS); // Exit QEMU simulator
loop {
cortex_m::asm::nop();
}
}
#[task(binds = GPIOA, priority = 2, local = [times: u32 = 0], shared = [s2, s3])]
fn t0(cx: t0::Context) {
// Safe access to local `static mut` variable
*cx.local.times += 1;
hprintln!(
"t0 p2 called {} time{}",
*cx.local.times,
if *cx.local.times > 1 { "s" } else { "" }
)
.ok();
hprintln!("t0 p2 exit").ok();
}
#[task(binds = GPIOB, priority = 3, local = [times: u32 = 0], shared = [s3, s4])]
fn t1(mut cx: t1::Context) {
// Safe access to local `static mut` variable
*cx.local.times += 1;
hprintln!(
"t1 p3 called {} time{}",
*cx.local.times,
if *cx.local.times > 1 { "s" } else { "" }
)
.ok();
cx.shared.s4.lock(|s| {
hprintln!("t1 enter lock s4 {}", s).ok();
hprintln!("t1 pend t0").ok();
rtic::pend(Interrupt::GPIOA); // t0 p2, with shared ceiling 2
hprintln!("t1 pend t2").ok();
rtic::pend(Interrupt::GPIOC); // t2 p4, no sharing
hprintln!("t1 still in lock s4 {}", s).ok();
});
hprintln!("t1 p3 exit").ok();
}
#[task(binds = GPIOC, priority = 4, local = [times: u32 = 0], shared = [s4])]
fn t2(mut cx: t2::Context) {
// Safe access to local `static mut` variable
*cx.local.times += 1;
hprintln!(
"t2 p4 called {} time{}",
*cx.local.times,
if *cx.local.times > 1 { "s" } else { "" }
)
.unwrap();
cx.shared.s4.lock(|s| {
hprintln!("enter lock s4 {}", s).ok();
*s += 1;
});
hprintln!("t3 p4 exit").ok();
}
}

View file

@ -28,7 +28,7 @@ pub fn app(app: &App, analysis: &Analysis, extra: &Extra) -> TokenStream2 {
let mut user = vec![];
// Generate the `main` function
let assertion_stmts = assertions::codegen(app, analysis);
let assertion_stmts = assertions::codegen(app, analysis, extra);
let pre_init_stmts = pre_init::codegen(app, analysis, extra);

View file

@ -1,11 +1,11 @@
use proc_macro2::TokenStream as TokenStream2;
use quote::quote;
use crate::analyze::Analysis;
use crate::{analyze::Analysis, check::Extra, codegen::util};
use rtic_syntax::ast::App;
/// Generates compile-time assertions that check that types implement the `Send` / `Sync` traits
pub fn codegen(app: &App, analysis: &Analysis) -> Vec<TokenStream2> {
pub fn codegen(app: &App, analysis: &Analysis, extra: &Extra) -> Vec<TokenStream2> {
let mut stmts = vec![];
for ty in &analysis.send_types {
@ -21,5 +21,33 @@ pub fn codegen(app: &App, analysis: &Analysis) -> Vec<TokenStream2> {
stmts.push(quote!(rtic::export::assert_monotonic::<#ty>();));
}
let device = &extra.device;
let arm_v6_checks: Vec<_> = app
.hardware_tasks
.iter()
.filter_map(|(_, task)| {
if !util::is_exception(&task.args.binds) {
let interrupt_name = &task.args.binds;
Some(quote!(assert!((#device::Interrupt::#interrupt_name as u32) < 32);))
} else {
None
}
})
.collect();
let const_check = quote! {
const _CONST_CHECK: () = {
if rtic::export::is_armv6() {
#(#arm_v6_checks)*
} else {
// TODO: Add armv7 checks here
}
};
let _ = _CONST_CHECK;
};
stmts.push(const_check);
stmts
}

View file

@ -105,5 +105,38 @@ pub fn codegen(
})
};
// Computing mapping of used interrupts to masks
let interrupt_ids = analysis.interrupts.iter().map(|(p, (id, _))| (p, id));
use std::collections::HashMap;
let mut masks: HashMap<u8, _> = std::collections::HashMap::new();
let device = &extra.device;
for p in 0..3 {
masks.insert(p, quote!(0));
}
for (&priority, name) in interrupt_ids.chain(app.hardware_tasks.values().flat_map(|task| {
if !util::is_exception(&task.args.binds) {
Some((&task.args.priority, &task.args.binds))
} else {
// TODO: exceptions not implemented
None
}
})) {
let name = quote!(#device::Interrupt::#name as u32);
if let Some(v) = masks.get_mut(&(priority - 1)) {
*v = quote!(#v | 1 << #name);
};
}
let mut mask_arr: Vec<(_, _)> = masks.iter().collect();
mask_arr.sort_by_key(|(k, _v)| *k);
let mask_arr: Vec<_> = mask_arr.iter().map(|(_, v)| v).collect();
mod_app.push(quote!(
const MASKS: [u32; 3] = [#(#mask_arr),*];
));
(mod_app, mod_resources)
}

View file

@ -52,6 +52,7 @@ pub fn impl_mutex(
#priority,
CEILING,
#device::NVIC_PRIO_BITS,
&MASKS,
f,
)
}

View file

@ -102,6 +102,19 @@ impl Priority {
}
}
/// Const helper to check architecture
pub const fn is_armv6() -> bool {
#[cfg(not(armv6m))]
{
false
}
#[cfg(armv6m)]
{
true
}
}
#[inline(always)]
pub fn assert_send<T>()
where
@ -123,13 +136,40 @@ where
{
}
/// Lock the resource proxy by setting the BASEPRI
/// and running the closure with interrupt::free
/// Lock implementation using BASEPRI and global Critical Section (CS)
///
/// # Safety
///
/// Writing to the BASEPRI
/// Dereferencing a raw pointer
/// The system ceiling is raised from current to ceiling
/// by either
/// - raising the BASEPRI to the ceiling value, or
/// - disable all interrupts in case we want to
/// mask interrupts with maximum priority
///
/// Dereferencing a raw pointer inside CS
///
/// The priority.set/priority.get can safely be outside the CS
/// as being a context local cell (not affected by preemptions).
/// It is merely used in order to omit masking in case current
/// priority is current priority >= ceiling.
///
/// Lock Efficiency:
/// Experiments validate (sub)-zero cost for CS implementation
/// (Sub)-zero as:
/// - Either zero OH (lock optimized out), or
/// - Amounting to an optimal assembly implementation
/// - The BASEPRI value is folded to a constant at compile time
/// - CS entry, single assembly instruction to write BASEPRI
/// - CS exit, single assembly instruction to write BASEPRI
/// - priority.set/get optimized out (their effect not)
/// - On par or better than any handwritten implementation of SRP
///
/// Limitations:
/// The current implementation reads/writes BASEPRI once
/// even in some edge cases where this may be omitted.
/// Total OH of per task is max 2 clock cycles, negligible in practice
/// but can in theory be fixed.
///
#[cfg(armv7m)]
#[inline(always)]
pub unsafe fn lock<T, R>(
@ -137,6 +177,7 @@ pub unsafe fn lock<T, R>(
priority: &Priority,
ceiling: u8,
nvic_prio_bits: u8,
_mask: &[u32; 3],
f: impl FnOnce(&mut T) -> R,
) -> R {
let current = priority.get();
@ -160,13 +201,50 @@ pub unsafe fn lock<T, R>(
}
}
/// Lock the resource proxy by setting the PRIMASK
/// and running the closure with ``interrupt::free``
/// Lock implementation using interrupt masking
///
/// # Safety
///
/// Writing to the PRIMASK
/// Dereferencing a raw pointer
/// The system ceiling is raised from current to ceiling
/// by computing a 32 bit `mask` (1 bit per interrupt)
/// 1: ceiling >= priority > current
/// 0: else
///
/// On CS entry, `clear_enable_mask(mask)` disables interrupts
/// On CS exit, `set_enable_mask(mask)` re-enables interrupts
///
/// The priority.set/priority.get can safely be outside the CS
/// as being a context local cell (not affected by preemptions).
/// It is merely used in order to omit masking in case
/// current priority >= ceiling.
///
/// Dereferencing a raw pointer is done safely inside the CS
///
/// Lock Efficiency:
/// Early experiments validate (sub)-zero cost for CS implementation
/// (Sub)-zero as:
/// - Either zero OH (lock optimized out), or
/// - Amounting to an optimal assembly implementation
/// - if ceiling == (1 << nvic_prio_bits)
/// - we execute the closure in a global critical section (interrupt free)
/// - CS entry cost, single write to core register
/// - CS exit cost, single write to core register
/// else
/// - The `mask` value is folded to a constant at compile time
/// - CS entry, single write of the 32 bit `mask` to the `icer` register
/// - CS exit, single write of the 32 bit `mask` to the `iser` register
/// - priority.set/get optimized out (their effect not)
/// - On par or better than any hand written implementation of SRP
///
/// Limitations:
/// Current implementation does not allow for tasks with shared resources
/// to be bound to exception handlers, as these cannot be masked in HW.
///
/// Possible solutions:
/// - Mask exceptions by global critical sections (interrupt::free)
/// - Temporary lower exception priority
///
/// These possible solutions are set goals for future work
#[cfg(not(armv7m))]
#[inline(always)]
pub unsafe fn lock<T, R>(
@ -174,20 +252,64 @@ pub unsafe fn lock<T, R>(
priority: &Priority,
ceiling: u8,
_nvic_prio_bits: u8,
masks: &[u32; 3],
f: impl FnOnce(&mut T) -> R,
) -> R {
let current = priority.get();
if current < ceiling {
priority.set(u8::max_value());
let r = interrupt::free(|_| f(&mut *ptr));
priority.set(current);
r
if ceiling >= 4 {
// safe to manipulate outside critical section
priority.set(ceiling);
// execute closure under protection of raised system ceiling
let r = interrupt::free(|_| f(&mut *ptr));
// safe to manipulate outside critical section
priority.set(current);
r
} else {
// safe to manipulate outside critical section
priority.set(ceiling);
let mask = compute_mask(current, ceiling, masks);
clear_enable_mask(mask);
// execute closure under protection of raised system ceiling
let r = f(&mut *ptr);
set_enable_mask(mask);
// safe to manipulate outside critical section
priority.set(current);
r
}
} else {
// execute closure without raising system ceiling
f(&mut *ptr)
}
}
#[cfg(not(armv7m))]
#[inline(always)]
fn compute_mask(from_prio: u8, to_prio: u8, masks: &[u32; 3]) -> u32 {
let mut res = 0;
masks[from_prio as usize..to_prio as usize]
.iter()
.for_each(|m| res |= m);
res
}
// enables interrupts
#[cfg(not(armv7m))]
#[inline(always)]
unsafe fn set_enable_mask(mask: u32) {
(*NVIC::ptr()).iser[0].write(mask)
}
// disables interrupts
#[cfg(not(armv7m))]
#[inline(always)]
unsafe fn clear_enable_mask(mask: u32) {
(*NVIC::ptr()).icer[0].write(mask)
}
#[inline]
#[must_use]
pub fn logical2hw(logical: u8, nvic_prio_bits: u8) -> u8 {

View file

@ -0,0 +1,54 @@
//! v6m-interrupt-not-enough.rs_no (not run atm)
//!
//! Expected behavior:
//! should pass
//! > cargo build --example m0_perf_err --target thumbv7m-none-eabi --release
//!
//! should fail
//! > cargo build --example m0_perf_err --target thumbv6m-none-eabi --release
//! Compiling cortex-m-rtic v1.0.0 (/home/pln/rust/rtic/cortex-m-rtic)
//! error[E0308]: mismatched types
//! --> examples/m0_perf_err.rs:25:1
//! |
//! 25 | #[rtic::app(device = lm3s6965)]
//! | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ expected an array with a fixed size of 4 elements, found one with 5 elements
//! |
//! = note: this error originates in the attribute macro `rtic::app` (in Nightly builds, run with -Z macro-backtrace for more info)
#![deny(unsafe_code)]
#![deny(warnings)]
#![no_main]
#![no_std]
use panic_semihosting as _;
#[rtic::app(device = lm3s6965)]
mod app {
use cortex_m_semihosting::debug;
#[shared]
struct Shared {}
#[local]
struct Local {}
#[init]
fn init(_: init::Context) -> (Shared, Local, init::Monotonics) {
(Shared {}, Local {}, init::Monotonics())
}
#[inline(never)]
#[idle]
fn idle(_cx: idle::Context) -> ! {
debug::exit(debug::EXIT_SUCCESS); // Exit QEMU simulator
loop {
cortex_m::asm::nop();
}
}
// priority to high for v6m
#[task(binds = GPIOA, priority = 5)]
fn t0(_cx: t0::Context) {}
}