| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | |
| 3 | /* |
| 4 | * Copyright (C) 2023 Google LLC. |
| 5 | */ |
| 6 | |
| 7 | #ifndef __UNROLL_H |
| 8 | #define __UNROLL_H |
| 9 | |
| 10 | #include <linux/args.h> |
| 11 | |
| 12 | #ifdef CONFIG_CC_IS_CLANG |
| 13 | #define __pick_unrolled(x, y) _Pragma(#x) |
| 14 | #else |
| 15 | #define __pick_unrolled(x, y) _Pragma(#y) |
| 16 | #endif |
| 17 | |
| 18 | /** |
| 19 | * unrolled - loop attributes to ask the compiler to unroll it |
| 20 | * |
| 21 | * Usage: |
| 22 | * |
| 23 | * #define BATCH 8 |
| 24 | * |
| 25 | * unrolled_count(BATCH) |
| 26 | * for (u32 i = 0; i < BATCH; i++) |
| 27 | * // loop body without cross-iteration dependencies |
| 28 | * |
| 29 | * This is only a hint and the compiler is free to disable unrolling if it |
| 30 | * thinks the count is suboptimal and may hurt performance and/or hugely |
| 31 | * increase object code size. |
| 32 | * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends |
| 33 | * on what iter x will do with variables) is not a strict requirement, but |
| 34 | * provides best performance and object code size. |
| 35 | * Available only on Clang and GCC 8.x onwards. |
| 36 | */ |
| 37 | |
| 38 | /* Ask the compiler to pick an optimal unroll count, Clang only */ |
| 39 | #define unrolled \ |
| 40 | __pick_unrolled(clang loop unroll(enable), /* nothing */) |
| 41 | |
| 42 | /* Unroll each @n iterations of the loop */ |
| 43 | #define unrolled_count(n) \ |
| 44 | __pick_unrolled(clang loop unroll_count(n), GCC unroll n) |
| 45 | |
| 46 | /* Unroll the whole loop */ |
| 47 | #define unrolled_full \ |
| 48 | __pick_unrolled(clang loop unroll(full), GCC unroll 65534) |
| 49 | |
| 50 | /* Never unroll the loop */ |
| 51 | #define unrolled_none \ |
| 52 | __pick_unrolled(clang loop unroll(disable), GCC unroll 1) |
| 53 | |
| 54 | #define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args) |
| 55 | |
| 56 | #define __UNROLL_0(MACRO, args...) |
| 57 | #define __UNROLL_1(MACRO, args...) __UNROLL_0(MACRO, args) MACRO(0, args) |
| 58 | #define __UNROLL_2(MACRO, args...) __UNROLL_1(MACRO, args) MACRO(1, args) |
| 59 | #define __UNROLL_3(MACRO, args...) __UNROLL_2(MACRO, args) MACRO(2, args) |
| 60 | #define __UNROLL_4(MACRO, args...) __UNROLL_3(MACRO, args) MACRO(3, args) |
| 61 | #define __UNROLL_5(MACRO, args...) __UNROLL_4(MACRO, args) MACRO(4, args) |
| 62 | #define __UNROLL_6(MACRO, args...) __UNROLL_5(MACRO, args) MACRO(5, args) |
| 63 | #define __UNROLL_7(MACRO, args...) __UNROLL_6(MACRO, args) MACRO(6, args) |
| 64 | #define __UNROLL_8(MACRO, args...) __UNROLL_7(MACRO, args) MACRO(7, args) |
| 65 | #define __UNROLL_9(MACRO, args...) __UNROLL_8(MACRO, args) MACRO(8, args) |
| 66 | #define __UNROLL_10(MACRO, args...) __UNROLL_9(MACRO, args) MACRO(9, args) |
| 67 | #define __UNROLL_11(MACRO, args...) __UNROLL_10(MACRO, args) MACRO(10, args) |
| 68 | #define __UNROLL_12(MACRO, args...) __UNROLL_11(MACRO, args) MACRO(11, args) |
| 69 | #define __UNROLL_13(MACRO, args...) __UNROLL_12(MACRO, args) MACRO(12, args) |
| 70 | #define __UNROLL_14(MACRO, args...) __UNROLL_13(MACRO, args) MACRO(13, args) |
| 71 | #define __UNROLL_15(MACRO, args...) __UNROLL_14(MACRO, args) MACRO(14, args) |
| 72 | #define __UNROLL_16(MACRO, args...) __UNROLL_15(MACRO, args) MACRO(15, args) |
| 73 | #define __UNROLL_17(MACRO, args...) __UNROLL_16(MACRO, args) MACRO(16, args) |
| 74 | #define __UNROLL_18(MACRO, args...) __UNROLL_17(MACRO, args) MACRO(17, args) |
| 75 | #define __UNROLL_19(MACRO, args...) __UNROLL_18(MACRO, args) MACRO(18, args) |
| 76 | #define __UNROLL_20(MACRO, args...) __UNROLL_19(MACRO, args) MACRO(19, args) |
| 77 | |
| 78 | #endif /* __UNROLL_H */ |
| 79 | |