diff mbox series

[RFC,v2,3/3] i386/sev: Add KVM_EXIT_SNP_REQ_CERTS support for certificate-fetching

Message ID 20250120213116.521519-4-huibo.wang@amd.com (mailing list archive)
State New
Headers show
Series SEV-SNP: Add support for SNP certificate fetching | expand

Commit Message

Melody Wang Jan. 20, 2025, 9:31 p.m. UTC
From: Michael Roth <michael.roth@amd.com>

The GHCB specification[1] defines a VMGEXIT-based Guest Request
hypercall to allow an SNP guest to issue encrypted requests directly to
SNP firmware to do things like query the attestation report for the
guest. These are generally handled purely in the kernel.

In some some cases, it's useful for the host to be able to additionally
supply the certificate chain for the signing key that SNP firmware uses
to sign these attestation reports. To allow for this, the GHCB
specification defines an Extended Guest Request where this certificate
data can be provided in a special format described in the GHCB spec.
This certificate data may be global or guest-specific depending on how
the guest was configured. Rather than providing interfaces to manage
these within the kernel, KVM provides a new KVM_EXIT_SNP_REQ_CERTS exit
to request the certificate contents from userspace. Implement support
for that in QEMU.

To synchronize delivery of the certificates to the guest in a way where
they will not be rendered invalid by updates to SNP firmware or
attestation singing/endorsement keys by management tools outside the
purview of QEMU, it is expected by users of KVM_EXIT_SNP_REQ_CERTS to
obtain a shared/read lock on the certificate file prior to delivering
them back to KVM. Only after this will the attestation report be
retrieved from firmware and bundled with the certificate data, so QEMU
must continue to hold the file lock until KVM confirms that the
attestation report has been retrieved/bundled. This confirmation is done
by way of the kvm_immediate_exit callback infrastructure that was
introduced in a previous patch.

[1] "Guest Hypervisor Communication Block (GHCB) Standardization",
    https://www.amd.com/en/developer/sev.html

  [Melody: Add a while(1) loop using the libvirt example with fstat and
  stat for the locking certificate blob code in open_certs_locked() to
  fix the recreation race problem suggested by Daniel P. Berrangé. And
  fix the json format and filename suggested by Markus Armbruster.]

Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Melody Wang <huibo.wang@amd.com>
---
 qapi/qom.json                 |  21 ++-
 target/i386/kvm/kvm.c         |  10 ++
 target/i386/sev-system-stub.c |   5 +
 target/i386/sev.c             | 277 ++++++++++++++++++++++++++++++++++
 target/i386/sev.h             |   2 +-
 5 files changed, 313 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/qapi/qom.json b/qapi/qom.json
index 28ce24cd8d..c6f9ad5d45 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -1034,6 +1034,23 @@ 
 #     firmware.  Set this to true to disable the use of VCEK.
 #     (default: false) (since: 9.1)
 #
+# @certs-filename: Certificate data that can be passed to guests via
+#     SNP Extended Guest Requests. File should be in the format
+#     described in the GHCB specification. (default: none)
+#     (since: 10.0)
+#
+# @certs-timeout: Maximum time in milliseconds to wait to obtain a read lock
+#     on the certificate file specified by @certs-filename. This is
+#     not a cumulative value and only affects how long QEMU waits
+#     before returning execution to the vCPU and informing the guest
+#     of the timeout, so the guest can still continue retrying for
+#     as long as it likes (which will be about 60 seconds for linux
+#     guests at the time of this writing). If the guest-side timeout
+#     is insufficient, set this higher to allow more time to fetch the
+#     certificate. If the guest-side timeout is sufficient, set this
+#     lower to reduce the likelihood of soft lockups in the guest.
+#     (default: 100) (since: 10.0)
+#
 # Since: 9.1
 ##
 { 'struct': 'SevSnpGuestProperties',
@@ -1045,7 +1062,9 @@ 
             '*id-auth': 'str',
             '*author-key-enabled': 'bool',
             '*host-data': 'str',
-            '*vcek-disabled': 'bool' } }
+            '*vcek-disabled': 'bool',
+            '*certs-filename': 'str',
+            '*certs-timeout': 'uint32' } }
 
 ##
 # @ThreadContextProperties:
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 2f66e63b88..5df272d3d9 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -6122,6 +6122,16 @@  int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 #endif
     case KVM_EXIT_HYPERCALL:
         ret = kvm_handle_hypercall(run);
+        break;
+    case KVM_EXIT_SNP_REQ_CERTS:
+        if (!sev_snp_enabled()) {
+            error_report("KVM: Encountered a certificate request exit for a "
+                         "non-SEV-SNP guest.");
+            ret = -1;
+        } else {
+            ret = kvm_handle_snp_req_certs(cs, run);
+        }
+
         break;
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
diff --git a/target/i386/sev-system-stub.c b/target/i386/sev-system-stub.c
index d5bf886e79..685c56f62c 100644
--- a/target/i386/sev-system-stub.c
+++ b/target/i386/sev-system-stub.c
@@ -71,3 +71,8 @@  void hmp_info_sev(Monitor *mon, const QDict *qdict)
 void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size)
 {
 }
+
+int kvm_handle_snp_req_certs(CPUState *cpu, struct kvm_run *run)
+{
+    g_assert_not_reached();
+}
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 0e1dbb6959..b29dbc1b90 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -157,6 +157,9 @@  struct SevSnpGuestState {
     char *id_auth_base64;
     uint8_t *id_auth;
     char *host_data;
+    char *certs_filename;
+    int certs_fd;
+    uint32_t certs_timeout;
 
     struct kvm_sev_snp_launch_start kvm_start_conf;
     struct kvm_sev_snp_launch_finish kvm_finish_conf;
@@ -1355,6 +1358,243 @@  sev_snp_launch_finish(SevCommonState *sev_common)
     }
 }
 
+static int open_certs_locked(SevSnpGuestState *sev_snp_guest)
+{
+    int fd, ret;
+
+    if (sev_snp_guest->certs_fd != -1) {
+        return 0;
+    }
+
+    while (1) {
+        struct stat a, b;
+        fd = qemu_open(sev_snp_guest->certs_filename, O_RDONLY, NULL);
+        if (fd == -1) {
+            error_report("Unable to open certificate blob %s, ret %d",
+                    sev_snp_guest->certs_filename, fd);
+            return fd;
+        }
+
+        ret = fstat(fd, &b);
+        if (ret < 0) {
+            error_report("Unable to check status of certficate blob %s, ret %d",
+                    sev_snp_guest->certs_filename, ret);
+            goto out_close;
+        }
+
+        ret = qemu_lock_fd(fd, 0, 0, false);
+        if (ret == -EAGAIN || ret == -EACCES) {
+            error_report("Unable to lock certificate blob %s, ret %d",
+                    sev_snp_guest->certs_filename, ret);
+            ret = -EAGAIN;
+            goto out_close;
+        } else if (ret) {
+            goto out_close;
+        }
+
+        if (stat(sev_snp_guest->certs_filename, &a) < 0) {
+            error_report("certficate blob %s disappeared",
+                    sev_snp_guest->certs_filename);
+            close(fd);
+            continue;
+        }
+
+        if (a.st_ino == b.st_ino) {
+            break;
+        } else {
+            error_report("certificate blob %s was recreated",
+                    sev_snp_guest->certs_filename);
+            close(fd);
+            continue;
+        }
+    }
+
+    sev_snp_guest->certs_fd = fd;
+    return 0;
+out_close:
+    close(fd);
+    return ret;
+}
+
+static void close_certs(SevSnpGuestState *sev_snp_guest)
+{
+    if (sev_snp_guest->certs_fd == -1) {
+        return;
+    }
+
+    qemu_unlock_fd(sev_snp_guest->certs_fd, 0, 0);
+    close(sev_snp_guest->certs_fd);
+    sev_snp_guest->certs_fd = -1;
+}
+
+static ssize_t get_certs_size(SevSnpGuestState *sev_snp_guest)
+{
+    ssize_t size;
+
+    size = lseek(sev_snp_guest->certs_fd, 0, SEEK_END);
+
+    if (size < 0)
+        return -errno;
+
+    return size;
+}
+
+static int read_certs(SevSnpGuestState *sev_snp_guest, void *buf, size_t buf_len)
+{
+    ssize_t n, len = 0;
+
+    n = lseek(sev_snp_guest->certs_fd, 0, SEEK_SET);
+    if (n) {
+        return n;
+    }
+
+    while ((n = read(sev_snp_guest->certs_fd, buf, buf_len)) != 0) {
+        if (n < 0) {
+            if (errno == EINTR) {
+                continue;
+            } else {
+                len = -errno;
+                break;
+            }
+        }
+        len += n;
+    }
+
+    return len;
+}
+
+static void snp_release_certs_lock(void *opaque)
+{
+    SevSnpGuestState *sev_snp_guest = opaque;
+
+    close_certs(sev_snp_guest);
+}
+
+static void certs_timeout(void *opaque)
+{
+    bool *timed_out = opaque;
+
+    *timed_out = true;
+}
+
+int kvm_handle_snp_req_certs(CPUState *cpu, struct kvm_run *run)
+{
+    struct kvm_exit_snp_req_certs *req_certs = &run->snp_req_certs;
+    g_autofree gchar *contents = NULL;
+    SevSnpGuestState *sev_snp_guest;
+    MemTxAttrs attrs = { 0 };
+    bool timed_out = false;
+    QEMUTimer *timer;
+    uint32_t npages;
+    void *guest_buf;
+    hwaddr buf_sz;
+    hwaddr gpa;
+    int ret;
+
+    req_certs->ret = EIO;
+
+    if (!sev_snp_enabled()) {
+        return -EIO;
+    }
+
+    gpa = req_certs->gfn << TARGET_PAGE_BITS;
+    npages = req_certs->npages;
+
+    sev_snp_guest = SEV_SNP_GUEST(MACHINE(qdev_get_machine())->cgs);
+    if (!sev_snp_guest->certs_filename) {
+        req_certs->ret = 0;
+        return 0;
+    }
+
+    /*
+     * -EAGAIN from open_certs_locked() indicates that a lock could not be
+     * obtained on the certificate file. It would be possible to set
+     * req_certs->ret = EAGAIN to immediately inform the requesting vCPU of
+     * this condition so that it can retry again later, however, linux guests
+     * are currently hard-limited to a 60 second timeout, at which point they
+     * will assume misbehavior and refuse to continue issuing attestation
+     * requests from that point forward.
+     *
+     * Allowing for this guest timeout to be configured in some way that will
+     * be easier to coordinate with on the QEMU side will make that approach
+     * more viable, but until then just busy-wait on the QEMU side, which
+     * allows more flexibility in how long QEMU can wait for things like SNP
+     * firmware updates/etc. which may be holding an exclusive lock on the
+     * certificate.
+     *
+     * The down-side to doing things this way is that too long of a busy-wait
+     * will result in soft-lockups in the guest, but the guest will otherwise
+     * continue normally afterward.
+     *
+     * If the QEMU-side timeout is reached, then just go ahead and indicate
+     * EAGAIN to the guest, at which point the above-mentioned guest-side
+     * timeout will trigger if QEMU's timeout exceeds that of the guest. But
+     * that's better than killing the vCPU, which is the only viable
+     * alternative at that point.
+     */
+    timer = timer_new_ms(QEMU_CLOCK_REALTIME, certs_timeout, &timed_out);
+    timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
+                     sev_snp_guest->certs_timeout);
+
+    do {
+        ret = open_certs_locked(sev_snp_guest);
+        g_usleep(1000);
+    } while (ret == -EAGAIN && !timed_out);
+
+    timer_del(timer);
+
+    if (ret) {
+        if (timed_out) {
+            req_certs->ret = EAGAIN;
+            return 0;
+        }
+
+        /*
+         * It's a bit ambiguous when to propagate a generic error to the guest
+         * rather than simply letting QEMU crash. The general methodology here
+         * is to let QEMU crash if there is a misconfiguration issue that the
+         * guest has absolutely no potential role in, but to otherwise notify
+         * the guest if there is some remote possibility that the issue is
+         * related to something on the guest side.
+         */
+        return ret;
+    }
+
+    buf_sz = npages * TARGET_PAGE_SIZE;
+    if (buf_sz < get_certs_size(sev_snp_guest)) {
+        req_certs->ret = ENOSPC;
+        req_certs->npages =
+            (get_certs_size(sev_snp_guest) + TARGET_PAGE_SIZE) / TARGET_PAGE_SIZE;
+        close_certs(sev_snp_guest);
+        goto out;
+    }
+
+    guest_buf = address_space_map(&address_space_memory, gpa, &buf_sz, true, attrs);
+    if (buf_sz < npages * TARGET_PAGE_SIZE) {
+        error_report_once("Unable to map entire guest buffer, mapped size %ld (expected %ld)",
+                          buf_sz, get_certs_size(sev_snp_guest));
+        close_certs(sev_snp_guest);
+        goto out_unmap;
+    }
+
+    ret = read_certs(sev_snp_guest, guest_buf, buf_sz);
+    if (ret < 0) {
+        error_report_once("Unable to read certificate data into guest buffer, ret %d",
+                          ret);
+        close_certs(sev_snp_guest);
+        goto out_unmap;
+
+    }
+
+    req_certs->ret = 0;
+
+    add_immediate_exit_callback(cpu, snp_release_certs_lock, sev_snp_guest);
+out_unmap:
+    address_space_unmap(&address_space_memory, guest_buf, buf_sz, true, buf_sz);
+
+out:
+    return ret;
+}
 
 static void
 sev_vm_state_change(void *opaque, bool running, RunState state)
@@ -1594,6 +1834,7 @@  static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
 
 static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
 {
+    SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(cgs);
     MachineState *ms = MACHINE(qdev_get_machine());
     X86MachineState *x86ms = X86_MACHINE(ms);
 
@@ -1604,6 +1845,13 @@  static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
         return -1;
     }
 
+    if (sev_snp_guest->certs_filename &&
+        kvm_vm_enable_cap(kvm_state, KVM_CAP_EXIT_SNP_REQ_CERTS, 0, 1)) {
+        error_setg(errp, "Failed to enable support for SEV-SNP "
+                         "certificate-fetching requests.");
+        return -1;
+    }
+
     return 0;
 }
 
@@ -2393,6 +2641,26 @@  sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp)
     memcpy(finish->host_data, blob, len);
 }
 
+static char *
+sev_snp_guest_get_certs_filename(Object *obj, Error **errp)
+{
+    SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+
+    return g_strdup(sev_snp_guest->certs_filename);
+}
+
+static void
+sev_snp_guest_set_certs_filename(Object *obj, const char *value, Error **errp)
+{
+    SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+
+    if (sev_snp_guest->certs_filename) {
+        g_free(sev_snp_guest->certs_filename);
+    }
+
+    sev_snp_guest->certs_filename = value ? g_strdup(value) : NULL;
+}
+
 static void
 sev_snp_guest_class_init(ObjectClass *oc, void *data)
 {
@@ -2428,6 +2696,9 @@  sev_snp_guest_class_init(ObjectClass *oc, void *data)
     object_class_property_add_str(oc, "host-data",
                                   sev_snp_guest_get_host_data,
                                   sev_snp_guest_set_host_data);
+    object_class_property_add_str(oc, "certs-filename",
+                                  sev_snp_guest_get_certs_filename,
+                                  sev_snp_guest_set_certs_filename);
 }
 
 static void
@@ -2440,6 +2711,12 @@  sev_snp_guest_instance_init(Object *obj)
 
     /* default init/start/finish params for kvm */
     sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY;
+
+    sev_snp_guest->certs_fd = -1;
+    sev_snp_guest->certs_timeout = 100;
+    object_property_add_uint32_ptr(obj, "certs-timeout",
+                                   &sev_snp_guest->certs_timeout,
+                                   OBJ_PROP_FLAG_READWRITE);
 }
 
 /* guest info specific to sev-snp */
diff --git a/target/i386/sev.h b/target/i386/sev.h
index 373669eaac..c85c585d9d 100644
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -70,5 +70,5 @@  void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size);
 
 uint32_t sev_get_cbit_position(void);
 uint32_t sev_get_reduced_phys_bits(void);
-
+int kvm_handle_snp_req_certs(CPUState *cpu, struct kvm_run *run);
 #endif