From b01aef646101b6fecdc5c998def9d2136074bf48 Mon Sep 17 00:00:00 2001 From: Per Date: Sun, 29 Sep 2019 15:53:26 +0200 Subject: [PATCH] Adding disassembly and polishing the Notes.md --- Cargo.toml | 5 +- Notes.md | 30 +++++++- lockopt_vanilla.asm | 173 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 lockopt_vanilla.asm diff --git a/Cargo.toml b/Cargo.toml index d8b7563e2e..90d6f4c377 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,8 +45,11 @@ required-features = ["__v7"] name = "types" required-features = ["__v7"] +[dependencies.cortex-m] +version = "0.6.0" +features = ["inline-asm"] + [dependencies] -cortex-m = "0.6.0" cortex-m-rtfm-macros = { path = "macros" } rtfm-core = "0.3.0-beta.1" cortex-m-rt = "0.6.9" diff --git a/Notes.md b/Notes.md index c7f881d361..bc80d6c2e7 100644 --- a/Notes.md +++ b/Notes.md @@ -147,6 +147,34 @@ Overall, using this approach, we don't need a trampoline (`run`). We reduce the The `examples/lockopt.rs` shows that locks are effectively optimized out. +Old Implementation +``` asm +00000130 : + 130: 21a0 movs r1, #160 ; 0xa0 + 132: f3ef 8011 mrs r0, BASEPRI + 136: f381 8811 msr BASEPRI, r1 + 13a: f240 0100 movw r1, #0 + 13e: f2c2 0100 movt r1, #8192 ; 0x2000 + 142: 680a ldr r2, [r1, #0] + 144: 3201 adds r2, #1 + 146: 600a str r2, [r1, #0] + 148: 21c0 movs r1, #192 ; 0xc0 + 14a: f381 8811 msr BASEPRI, r1 + 14e: f380 8811 msr BASEPRI, r0 + 152: 4770 bx lr + +00000154 : + 154: f240 0100 movw r1, #0 + 158: f3ef 8011 mrs r0, BASEPRI + 15c: f2c2 0100 movt r1, #8192 ; 0x2000 + 160: 680a ldr r2, [r1, #0] + 162: 3202 adds r2, #2 + 164: 600a str r2, [r1, #0] + 166: f380 8811 msr BASEPRI, r0 + 16a: 4770 bx lr +``` + +With lock opt. We see a 20% improvement for short/small tasks. ``` asm 00000128 : 128: 21a0 movs r1, #160 ; 0xa0 @@ -169,7 +197,7 @@ The `examples/lockopt.rs` shows that locks are effectively optimized out. 154: 4770 bx lr ``` -GPIOB/C are sharing a resource (C higher prio). Notice, there is no BASEPRI manipulation at all. +GPIOB/C are sharing a resource (C higher prio). Notice, for GPIOC there is no BASEPRI manipulation at all. For GPIOB, there is a single read of BASEPRI (stored in `old_basepri_hw`) and just two writes, one for entering critical section, one for exiting. On exit we detect that we are indeed at the initial priority for the task, thus we restore the `old_basepri_hw` instead of a logic priority. diff --git a/lockopt_vanilla.asm b/lockopt_vanilla.asm new file mode 100644 index 0000000000..6c9d90cc38 --- /dev/null +++ b/lockopt_vanilla.asm @@ -0,0 +1,173 @@ + +target/thumbv7m-none-eabi/release/examples/lockopt: file format elf32-littlearm + + +Disassembly of section .text: + +000000f0 : + f0: b580 push {r7, lr} + f2: 20a0 movs r0, #160 ; 0xa0 + f4: f380 8811 msr BASEPRI, r0 + f8: f240 0000 movw r0, #0 + fc: f2c2 0000 movt r0, #8192 ; 0x2000 + 100: 6801 ldr r1, [r0, #0] + 102: 3101 adds r1, #1 + 104: 6001 str r1, [r0, #0] + 106: f24e 2000 movw r0, #57856 ; 0xe200 + 10a: 2102 movs r1, #2 + 10c: f2ce 0000 movt r0, #57344 ; 0xe000 + 110: 6001 str r1, [r0, #0] + 112: 2104 movs r1, #4 + 114: 6001 str r1, [r0, #0] + 116: 2126 movs r1, #38 ; 0x26 + 118: 20e0 movs r0, #224 ; 0xe0 + 11a: f380 8811 msr BASEPRI, r0 + 11e: f2c0 0102 movt r1, #2 + 122: 2018 movs r0, #24 + 124: f000 f892 bl 24c <__syscall> + 128: 2000 movs r0, #0 + 12a: f380 8811 msr BASEPRI, r0 + 12e: bd80 pop {r7, pc} + +00000130 : + 130: 21a0 movs r1, #160 ; 0xa0 + 132: f3ef 8011 mrs r0, BASEPRI + 136: f381 8811 msr BASEPRI, r1 + 13a: f240 0100 movw r1, #0 + 13e: f2c2 0100 movt r1, #8192 ; 0x2000 + 142: 680a ldr r2, [r1, #0] + 144: 3201 adds r2, #1 + 146: 600a str r2, [r1, #0] + 148: 21c0 movs r1, #192 ; 0xc0 + 14a: f381 8811 msr BASEPRI, r1 + 14e: f380 8811 msr BASEPRI, r0 + 152: 4770 bx lr + +00000154 : + 154: f240 0100 movw r1, #0 + 158: f3ef 8011 mrs r0, BASEPRI + 15c: f2c2 0100 movt r1, #8192 ; 0x2000 + 160: 680a ldr r2, [r1, #0] + 162: 3202 adds r2, #2 + 164: 600a str r2, [r1, #0] + 166: f380 8811 msr BASEPRI, r0 + 16a: 4770 bx lr + +0000016c
: + 16c: f24e 1000 movw r0, #57600 ; 0xe100 + 170: f24e 4201 movw r2, #58369 ; 0xe401 + 174: f2ce 0000 movt r0, #57344 ; 0xe000 + 178: 21e0 movs r1, #224 ; 0xe0 + 17a: b672 cpsid i + 17c: f880 1300 strb.w r1, [r0, #768] ; 0x300 + 180: 2101 movs r1, #1 + 182: f2ce 0200 movt r2, #57344 ; 0xe000 + 186: 23c0 movs r3, #192 ; 0xc0 + 188: 6001 str r1, [r0, #0] + 18a: 7013 strb r3, [r2, #0] + 18c: 2302 movs r3, #2 + 18e: 6003 str r3, [r0, #0] + 190: 23a0 movs r3, #160 ; 0xa0 + 192: 7053 strb r3, [r2, #1] + 194: 2204 movs r2, #4 + 196: 6002 str r2, [r0, #0] + 198: f64e 5210 movw r2, #60688 ; 0xed10 + 19c: f2ce 0200 movt r2, #57344 ; 0xe000 + 1a0: 6813 ldr r3, [r2, #0] + 1a2: f043 0302 orr.w r3, r3, #2 + 1a6: 6013 str r3, [r2, #0] + 1a8: f8c0 1100 str.w r1, [r0, #256] ; 0x100 + 1ac: b662 cpsie i + 1ae: bf30 wfi + 1b0: e7fd b.n 1ae + +000001b2 : + 1b2: f000 f84a bl 24a + 1b6: f240 0004 movw r0, #4 + 1ba: f240 0100 movw r1, #0 + 1be: f2c2 0000 movt r0, #8192 ; 0x2000 + 1c2: f2c2 0100 movt r1, #8192 ; 0x2000 + 1c6: 4281 cmp r1, r0 + 1c8: d214 bcs.n 1f4 + 1ca: f240 0100 movw r1, #0 + 1ce: 2200 movs r2, #0 + 1d0: f2c2 0100 movt r1, #8192 ; 0x2000 + 1d4: f841 2b04 str.w r2, [r1], #4 + 1d8: 4281 cmp r1, r0 + 1da: bf3c itt cc + 1dc: f841 2b04 strcc.w r2, [r1], #4 + 1e0: 4281 cmpcc r1, r0 + 1e2: d207 bcs.n 1f4 + 1e4: f841 2b04 str.w r2, [r1], #4 + 1e8: 4281 cmp r1, r0 + 1ea: d203 bcs.n 1f4 + 1ec: f841 2b04 str.w r2, [r1], #4 + 1f0: 4281 cmp r1, r0 + 1f2: d3ef bcc.n 1d4 + 1f4: f240 0000 movw r0, #0 + 1f8: f240 0100 movw r1, #0 + 1fc: f2c2 0000 movt r0, #8192 ; 0x2000 + 200: f2c2 0100 movt r1, #8192 ; 0x2000 + 204: 4281 cmp r1, r0 + 206: d21c bcs.n 242 + 208: f240 2168 movw r1, #616 ; 0x268 + 20c: f240 0200 movw r2, #0 + 210: f2c0 0100 movt r1, #0 + 214: f2c2 0200 movt r2, #8192 ; 0x2000 + 218: 680b ldr r3, [r1, #0] + 21a: f842 3b04 str.w r3, [r2], #4 + 21e: 4282 cmp r2, r0 + 220: d20f bcs.n 242 + 222: 684b ldr r3, [r1, #4] + 224: f842 3b04 str.w r3, [r2], #4 + 228: 4282 cmp r2, r0 + 22a: bf3e ittt cc + 22c: 688b ldrcc r3, [r1, #8] + 22e: f842 3b04 strcc.w r3, [r2], #4 + 232: 4282 cmpcc r2, r0 + 234: d205 bcs.n 242 + 236: 68cb ldr r3, [r1, #12] + 238: 3110 adds r1, #16 + 23a: f842 3b04 str.w r3, [r2], #4 + 23e: 4282 cmp r2, r0 + 240: d3ea bcc.n 218 + 242: f7ff ff93 bl 16c
+ 246: defe udf #254 ; 0xfe + +00000248 : + 248: Address 0x0000000000000248 is out of bounds. + + +00000249 : + 249: Address 0x0000000000000249 is out of bounds. + + +0000024a : + 24a: Address 0x000000000000024a is out of bounds. + + +0000024b <__pre_init>: + 24b: Address 0x000000000000024b is out of bounds. + + +0000024c <__syscall>: + 24c: beab bkpt 0x00ab + 24e: 4770 bx lr + +00000250 : + 250: 4670 mov r0, lr + 252: 2104 movs r1, #4 + 254: 4208 tst r0, r1 + 256: d102 bne.n 25e + 258: f3ef 8008 mrs r0, MSP + 25c: e002 b.n 264 + 25e: f3ef 8009 mrs r0, PSP + 262: e7ff b.n 264 + +00000264 : + 264: Address 0x0000000000000264 is out of bounds. + + +00000265 : + 265: Address 0x0000000000000265 is out of bounds. +