@@ -61,6 +61,9 @@ typedef struct user_fp elf_fpregset_t;
#define R_ARM_MOVT_ABS 44
#define R_ARM_MOVW_PREL_NC 45
#define R_ARM_MOVT_PREL 46
+#define R_ARM_ALU_PC_G0_NC 57
+#define R_ARM_ALU_PC_G1_NC 59
+#define R_ARM_LDR_PC_G2 63
#define R_ARM_THM_CALL 10
#define R_ARM_THM_JUMP24 30
@@ -68,6 +68,44 @@ bool module_exit_section(const char *name)
strstarts(name, ".ARM.exidx.exit");
}
+#ifdef CONFIG_ARM_HAS_GROUP_RELOCS
+/*
+ * This implements the partitioning algorithm for group relocations as
+ * documented in the ARM AArch32 ELF psABI (IHI 0044).
+ *
+ * A single PC-relative symbol reference is divided in up to 3 add or subtract
+ * operations, where the final one could be incorporated into a load/store
+ * instruction with immediate offset. E.g.,
+ *
+ * ADD Rd, PC, #... or ADD Rd, PC, #...
+ * ADD Rd, Rd, #... ADD Rd, Rd, #...
+ * LDR Rd, [Rd, #...] ADD Rd, Rd, #...
+ *
+ * The latter has a guaranteed range of only 16 MiB (3x8 == 24 bits), so it is
+ * of limited use in the kernel. However, the ADD/ADD/LDR combo has a range of
+ * -/+ 256 MiB, (2x8 + 12 == 28 bits), which means it has sufficient range for
+ * any in-kernel symbol reference (unless module PLTs are being used).
+ *
+ * The main advantage of this approach over the typical pattern using a literal
+ * load is that literal loads may miss in the D-cache, and generally lead to
+ * lower cache efficiency for variables that are referenced often from many
+ * different places in the code.
+ */
+static u32 get_group_rem(u32 group, u32 *offset)
+{
+ u32 val = *offset;
+ u32 shift;
+ do {
+ shift = val ? (31 - __fls(val)) & ~1 : 32;
+ *offset = val;
+ if (!val)
+ break;
+ val &= 0xffffff >> shift;
+ } while (group--);
+ return shift;
+}
+#endif
+
int
apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
unsigned int relindex, struct module *module)
@@ -87,6 +125,9 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
#ifdef CONFIG_THUMB2_KERNEL
u32 upper, lower, sign, j1, j2;
#endif
+#ifdef CONFIG_ARM_HAS_GROUP_RELOCS
+ u32 shift, group = 1;
+#endif
offset = ELF32_R_SYM(rel->r_info);
if (offset < 0 || offset > (symsec->sh_size / sizeof(Elf32_Sym))) {
@@ -331,6 +372,55 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
*(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower);
break;
#endif
+#ifdef CONFIG_ARM_HAS_GROUP_RELOCS
+ case R_ARM_ALU_PC_G0_NC:
+ group = 0;
+ fallthrough;
+ case R_ARM_ALU_PC_G1_NC:
+ tmp = __mem_to_opcode_arm(*(u32 *)loc);
+ offset = ror32(tmp & 0xff, (tmp & 0xf00) >> 7);
+ if (tmp & BIT(22))
+ offset = -offset;
+ offset += sym->st_value - loc;
+ if (offset < 0) {
+ offset = -offset;
+ tmp = (tmp & ~BIT(23)) | BIT(22); // SUB opcode
+ } else {
+ tmp = (tmp & ~BIT(22)) | BIT(23); // ADD opcode
+ }
+
+ shift = get_group_rem(group, &offset);
+ if (shift < 24) {
+ offset >>= 24 - shift;
+ offset |= (shift + 8) << 7;
+ }
+ *(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset);
+ break;
+
+ case R_ARM_LDR_PC_G2:
+ tmp = __mem_to_opcode_arm(*(u32 *)loc);
+ offset = tmp & 0xfff;
+ if (~tmp & BIT(23)) // U bit cleared?
+ offset = -offset;
+ offset += sym->st_value - loc;
+ if (offset < 0) {
+ offset = -offset;
+ tmp &= ~BIT(23); // clear U bit
+ } else {
+ tmp |= BIT(23); // set U bit
+ }
+ get_group_rem(2, &offset);
+
+ if (offset > 0xfff) {
+ pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
+ module->name, relindex, i, symname,
+ ELF32_R_TYPE(rel->r_info), loc,
+ sym->st_value);
+ return -ENOEXEC;
+ }
+ *(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset);
+ break;
+#endif
default:
pr_err("%s: unknown relocation: %u\n",