@@ -23,11 +23,15 @@ struct kho_serialization;
bool kho_is_enabled(void);
int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
+int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
int register_kho_notifier(struct notifier_block *nb);
int unregister_kho_notifier(struct notifier_block *nb);
void kho_memory_init(void);
+
+void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys,
+ u64 scratch_len);
#else
static inline bool kho_is_enabled(void)
{
@@ -40,6 +44,11 @@ static inline int kho_add_subtree(struct kho_serialization *ser,
return -EOPNOTSUPP;
}
+static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int register_kho_notifier(struct notifier_block *nb)
{
return -EOPNOTSUPP;
@@ -53,6 +62,11 @@ static inline int unregister_kho_notifier(struct notifier_block *nb)
static inline void kho_memory_init(void)
{
}
+
+static inline void kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
+ phys_addr_t scratch_phys, u64 scratch_len)
+{
+}
#endif /* CONFIG_KEXEC_HANDOVER */
#endif /* LINUX_KEXEC_HANDOVER_H */
@@ -501,9 +501,112 @@ static __init int kho_out_debugfs_init(void)
return -ENOENT;
}
+struct kho_in {
+ struct dentry *dir;
+ phys_addr_t fdt_phys;
+ phys_addr_t scratch_phys;
+ struct list_head fdt_list;
+};
+
+static struct kho_in kho_in = {
+ .fdt_list = LIST_HEAD_INIT(kho_in.fdt_list),
+};
+
+static const void *kho_get_fdt(void)
+{
+ return kho_in.fdt_phys ? phys_to_virt(kho_in.fdt_phys) : NULL;
+}
+
+/**
+ * kho_retrieve_subtree - retrieve a preserved sub FDT by its name.
+ * @name: the name of the sub FDT passed to kho_add_subtree().
+ * @phys: if found, the physical address of the sub FDT is stored in @phys.
+ *
+ * Retrieve a preserved sub FDT named @name and store its physical
+ * address in @phys.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
+{
+ const void *fdt = kho_get_fdt();
+ const u64 *val;
+ int offset, len;
+
+ if (!fdt)
+ return -ENOENT;
+
+ if (!phys)
+ return -EINVAL;
+
+ offset = fdt_subnode_offset(fdt, 0, name);
+ if (offset < 0)
+ return -ENOENT;
+
+ val = fdt_getprop(fdt, offset, PROP_SUB_FDT, &len);
+ if (!val || len != sizeof(*val))
+ return -EINVAL;
+
+ *phys = (phys_addr_t)*val;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kho_retrieve_subtree);
+
+/* Handling for debugfs/kho/in */
+
+static __init int kho_in_debugfs_init(const void *fdt)
+{
+ struct dentry *sub_fdt_dir;
+ int err, child;
+
+ kho_in.dir = debugfs_create_dir("in", debugfs_root);
+ if (IS_ERR(kho_in.dir))
+ return PTR_ERR(kho_in.dir);
+
+ sub_fdt_dir = debugfs_create_dir("sub_fdts", kho_in.dir);
+ if (IS_ERR(sub_fdt_dir)) {
+ err = PTR_ERR(sub_fdt_dir);
+ goto err_rmdir;
+ }
+
+ err = kho_debugfs_fdt_add(&kho_in.fdt_list, kho_in.dir, "fdt", fdt);
+ if (err)
+ goto err_rmdir;
+
+ fdt_for_each_subnode(child, fdt, 0) {
+ int len = 0;
+ const char *name = fdt_get_name(fdt, child, NULL);
+ const u64 *fdt_phys;
+
+ fdt_phys = fdt_getprop(fdt, child, "fdt", &len);
+ if (!fdt_phys)
+ continue;
+ if (len != sizeof(*fdt_phys)) {
+ pr_warn("node `%s`'s prop `fdt` has invalid length: %d\n",
+ name, len);
+ continue;
+ }
+ err = kho_debugfs_fdt_add(&kho_in.fdt_list, sub_fdt_dir, name,
+ phys_to_virt(*fdt_phys));
+ if (err) {
+ pr_warn("failed to add fdt `%s` to debugfs: %d\n", name,
+ err);
+ continue;
+ }
+ }
+
+ return 0;
+
+err_rmdir:
+ debugfs_remove_recursive(kho_in.dir);
+ return err;
+}
+
static __init int kho_init(void)
{
int err = 0;
+ const void *fdt = kho_get_fdt();
if (!kho_enable)
return 0;
@@ -524,6 +627,20 @@ static __init int kho_init(void)
if (err)
goto err_free_fdt;
+ if (fdt) {
+ err = kho_in_debugfs_init(fdt);
+ /*
+ * Failure to create /sys/kernel/debug/kho/in does not prevent
+ * reviving state from KHO and setting up KHO for the next
+ * kexec.
+ */
+ if (err)
+ pr_err("failed exposing handover FDT in debugfs: %d\n",
+ err);
+
+ return 0;
+ }
+
for (int i = 0; i < kho_scratch_cnt; i++) {
unsigned long base_pfn = PHYS_PFN(kho_scratch[i].addr);
unsigned long count = kho_scratch[i].size >> PAGE_SHIFT;
@@ -551,7 +668,118 @@ static __init int kho_init(void)
}
late_initcall(kho_init);
+static void __init kho_release_scratch(void)
+{
+ phys_addr_t start, end;
+ u64 i;
+
+ memmap_init_kho_scratch_pages();
+
+ /*
+ * Mark scratch mem as CMA before we return it. That way we
+ * ensure that no kernel allocations happen on it. That means
+ * we can reuse it as scratch memory again later.
+ */
+ __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
+ MEMBLOCK_KHO_SCRATCH, &start, &end, NULL) {
+ ulong start_pfn = pageblock_start_pfn(PFN_DOWN(start));
+ ulong end_pfn = pageblock_align(PFN_UP(end));
+ ulong pfn;
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages)
+ set_pageblock_migratetype(pfn_to_page(pfn),
+ MIGRATE_CMA);
+ }
+}
+
void __init kho_memory_init(void)
{
- kho_reserve_scratch();
+ if (kho_in.scratch_phys) {
+ kho_scratch = phys_to_virt(kho_in.scratch_phys);
+ kho_release_scratch();
+ } else {
+ kho_reserve_scratch();
+ }
+}
+
+void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
+ phys_addr_t scratch_phys, u64 scratch_len)
+{
+ void *fdt = NULL;
+ struct kho_scratch *scratch = NULL;
+ int err = 0;
+ unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
+
+ /* Validate the input FDT */
+ fdt = early_memremap(fdt_phys, fdt_len);
+ if (!fdt) {
+ pr_warn("setup: failed to memremap FDT (0x%llx)\n", fdt_phys);
+ err = -EFAULT;
+ goto out;
+ }
+ err = fdt_check_header(fdt);
+ if (err) {
+ pr_warn("setup: handover FDT (0x%llx) is invalid: %d\n",
+ fdt_phys, err);
+ err = -EINVAL;
+ goto out;
+ }
+ err = fdt_node_check_compatible(fdt, 0, KHO_FDT_COMPATIBLE);
+ if (err) {
+ pr_warn("setup: handover FDT (0x%llx) is incompatible with '%s': %d\n",
+ fdt_phys, KHO_FDT_COMPATIBLE, err);
+ err = -EINVAL;
+ goto out;
+ }
+
+ scratch = early_memremap(scratch_phys, scratch_len);
+ if (!scratch) {
+ pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n",
+ scratch_phys, scratch_len);
+ err = -EFAULT;
+ goto out;
+ }
+
+ /*
+ * We pass a safe contiguous blocks of memory to use for early boot
+ * purporses from the previous kernel so that we can resize the
+ * memblock array as needed.
+ */
+ for (int i = 0; i < scratch_cnt; i++) {
+ struct kho_scratch *area = &scratch[i];
+ u64 size = area->size;
+
+ memblock_add(area->addr, size);
+ err = memblock_mark_kho_scratch(area->addr, size);
+ if (WARN_ON(err)) {
+ pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %d",
+ &area->addr, &size, err);
+ goto out;
+ }
+ pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size);
+ }
+
+ memblock_reserve(scratch_phys, scratch_len);
+
+ /*
+ * Now that we have a viable region of scratch memory, let's tell
+ * the memblocks allocator to only use that for any allocations.
+ * That way we ensure that nothing scribbles over in use data while
+ * we initialize the page tables which we will need to ingest all
+ * memory reservations from the previous kernel.
+ */
+ memblock_set_kho_scratch_only();
+
+ kho_in.fdt_phys = fdt_phys;
+ kho_in.scratch_phys = scratch_phys;
+ kho_scratch_cnt = scratch_cnt;
+ pr_info("found kexec handover data. Will skip init for some devices\n");
+
+out:
+ if (fdt)
+ early_memunmap(fdt, fdt_len);
+ if (scratch)
+ early_memunmap(scratch, scratch_len);
+ if (err)
+ pr_warn("disabling KHO revival: %d\n", err);
}
@@ -2377,6 +2377,7 @@ void __init memblock_free_all(void)
free_unused_memmap();
reset_all_zones_managed_pages();
+ memblock_clear_kho_scratch_only();
pages = free_low_memory_core_early();
totalram_pages_add(pages);
}