Message ID | 1309863815-28236-5-git-send-email-hare@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 07/05/2011 01:03 PM, Hannes Reinecke wrote: > If the serial number is not set we should mask it out in the > list of supported VPD pages and mark it as not supported. > > Signed-off-by: Hannes Reinecke<hare@suse.de> > --- > hw/scsi-disk.c | 15 ++++++++++++--- > 1 files changed, 12 insertions(+), 3 deletions(-) > > diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c > index 5804662..05d14ab 100644 > --- a/hw/scsi-disk.c > +++ b/hw/scsi-disk.c > @@ -398,7 +398,8 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) > "buffer size %zd\n", req->cmd.xfer); > pages = buflen++; > outbuf[buflen++] = 0x00; // list of supported pages (this page) > - outbuf[buflen++] = 0x80; // unit serial number > + if (s->serial) > + outbuf[buflen++] = 0x80; // unit serial number > outbuf[buflen++] = 0x83; // device identification > if (s->drive_kind == SCSI_HD) { > outbuf[buflen++] = 0xb0; // block limits > @@ -409,8 +410,14 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) > } > case 0x80: /* Device serial number, optional */ > { > - int l = strlen(s->serial); > + int l; > > + if (!s->serial) { > + DPRINTF("Inquiry (EVPD[Serial number] not supported\n"); > + return -1; > + } > + > + l = strlen(s->serial); > if (l> req->cmd.xfer) > l = req->cmd.xfer; > if (l> 20) > @@ -1203,7 +1210,9 @@ static int scsi_initfn(SCSIDevice *dev, SCSIDriveKind kind) > if (!s->serial) { > /* try to fall back to value set with legacy -drive serial=... */ > dinfo = drive_get_by_blockdev(s->bs); > - s->serial = qemu_strdup(*dinfo->serial ? dinfo->serial : "0"); > + if (*dinfo->serial) { > + s->serial = qemu_strdup(dinfo->serial); > + } > } > > if (!s->version) { Acked-by: Paolo Bonzini <pbonzini@redhat.com> Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 07/05/2011 01:03 PM, Hannes Reinecke wrote: > This patch adds an emulation for the LSI Megaraid SAS 8708EM2 HBA. > > Signed-off-by: Hannes Reinecke<hare@suse.de> > --- > Makefile.objs | 1 + > default-configs/pci.mak | 1 + > hw/megasas.c | 1950 +++++++++++++++++++++++++++++++++++++++++++++++ > hw/mfi.h | 1197 +++++++++++++++++++++++++++++ > hw/pci_ids.h | 3 +- > trace-events | 71 ++ > 6 files changed, 3222 insertions(+), 1 deletions(-) > create mode 100644 hw/megasas.c > create mode 100644 hw/mfi.h > > diff --git a/Makefile.objs b/Makefile.objs > index cea15e4..6f5d113 100644 > --- a/Makefile.objs > +++ b/Makefile.objs > @@ -258,6 +258,7 @@ hw-obj-$(CONFIG_AHCI) += ide/ich.o > > # SCSI layer > hw-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o > +hw-obj-$(CONFIG_MEGASAS_SCSI_PCI) += megasas.o > hw-obj-$(CONFIG_ESP) += esp.o > > hw-obj-y += dma-helpers.o sysbus.o isa-bus.o > diff --git a/default-configs/pci.mak b/default-configs/pci.mak > index 22bd350..fabb56c 100644 > --- a/default-configs/pci.mak > +++ b/default-configs/pci.mak > @@ -9,6 +9,7 @@ CONFIG_EEPRO100_PCI=y > CONFIG_PCNET_PCI=y > CONFIG_PCNET_COMMON=y > CONFIG_LSI_SCSI_PCI=y > +CONFIG_MEGASAS_SCSI_PCI=y > CONFIG_RTL8139_PCI=y > CONFIG_E1000_PCI=y > CONFIG_IDE_CORE=y > diff --git a/hw/megasas.c b/hw/megasas.c > new file mode 100644 > index 0000000..613bb0a > --- /dev/null > +++ b/hw/megasas.c > @@ -0,0 +1,1950 @@ > +/* > + * QEMU MegaRAID SAS 8708EM2 Host Bus Adapter emulation > + * > + * Copyright (c) 2009-2011 Hannes Reinecke, SUSE Labs > + * > + * This code is licensed under the LGPL. > + */ > + > +#include "hw.h" > +#include "pci.h" > +#include "dma.h" > +#include "iov.h" > +#include "scsi.h" > +#include "scsi-defs.h" > +#include "block_int.h" > +#include "trace.h" > + > +#include "mfi.h" > + > +#ifdef DEBUG_MEGASAS > +#define DPRINTF(fmt, ...) \ > + do { \ > + printf("megasas: " fmt , ## __VA_ARGS__); \ > + } while (0) > +#else > +#define DPRINTF(fmt, ...) do {} while (0) > +#endif > + > +/* Static definitions */ > +#define MEGASAS_VERSION "1.30" > +#define MEGASAS_MAX_FRAMES 2048 /* Firmware limit at 65535 */ > +#define MEGASAS_DEFAULT_FRAMES 1000 /* Windows requires this */ > +#define MEGASAS_MAX_SGE 256 /* Firmware limit */ > +#define MEGASAS_DEFAULT_SGE 80 > +#define MEGASAS_MAX_SECTORS 0xFFFF /* No real limit */ > +#define MEGASAS_MAX_ARRAYS 128 > + > +const char *mfi_frame_desc[] = { > + "MFI init", "LD Read", "LD Write", "LD SCSI", "PD SCSI", > + "MFI Doorbell", "MFI Abort", "MFI SMP", "MFI Stop"}; > + > +struct megasas_cmd_t { > + int index; > + int context; > + int count; > + > + target_phys_addr_t pa; > + target_phys_addr_t pa_size; > + union mfi_frame *frame; > + SCSIRequest *req; > + struct iovec *iov; > + void *iov_buf; > + size_t iov_cnt; > + size_t iov_size; > + size_t iov_offset; > + SCSIDevice *sdev; > + struct megasas_state_t *state; > +}; > + > +typedef struct megasas_state_t { > + PCIDevice dev; > + int mmio_io_addr; > + int io_addr; > + int queue_addr; > + uint32_t frame_hi; > + > + int fw_state; > + uint32_t fw_sge; > + uint32_t fw_cmds; > + int fw_luns; > + int intr_mask; > + int doorbell; > + int busy; > + char *raid_mode_str; > + int is_jbod; > + > + int event_count; > + int shutdown_event; > + int boot_event; > + > + uint64_t reply_queue_pa; > + void *reply_queue; > + int reply_queue_len; > + int reply_queue_index; > + uint64_t consumer_pa; > + uint64_t producer_pa; > + > + struct megasas_cmd_t frames[MEGASAS_MAX_FRAMES]; > + > + SCSIBus bus; > +} MPTState; > + > +#define MEGASAS_INTR_DISABLED_MASK 0xFFFFFFFF > + > +#define MEGASAS_INTR_ENABLED(s) \ > + (((s)->intr_mask& MEGASAS_INTR_DISABLED_MASK) != \ > + MEGASAS_INTR_DISABLED_MASK) Please make this a static function > + > +#define megasas_frame_set_cmd_status(f, v) \ > + stb_phys((f) + offsetof(struct mfi_frame_header, cmd_status), v); > + > +#define megasas_frame_set_scsi_status(f, v) \ > + stb_phys((f) + offsetof(struct mfi_frame_header, scsi_status), v); > + > +#define megasas_frame_get_cmd(f) \ > + ldub_phys((f) + offsetof(struct mfi_frame_header, frame_cmd)) > + > +#define megasas_frame_get_context(f) \ > + ldl_phys(frame_addr + offsetof(struct mfi_frame_header, context)); ... these too :). > + > +static void megasas_soft_reset(MPTState *s); > + > +static int megasas_map_sgl(struct megasas_cmd_t *cmd, int pa_offset) > +{ > + int i; > + uint16_t flags = le16_to_cpu(cmd->frame->header.flags); > + int is_sgl64 = (flags& MFI_FRAME_SGL64) ? 1 : 0; > + int is_write = (flags& MFI_FRAME_DIR_WRITE) ? 1 : 0; > + int sgl_addr_size = is_sgl64 ? sizeof(uint64_t) : sizeof(uint32_t); > + size_t iov_count = 0; > + > + cmd->iov = qemu_malloc(sizeof(struct iovec) * > + (cmd->frame->header.sge_count + 1)); Is there a maximum number of sg elements? You might want to consider making the iov array static and save yourself the malloc (which can really be a performance bottleneck). That's only an idea for future optimization though. > + cmd->iov_cnt = 0; > + for (i = 0; i< cmd->frame->header.sge_count; i++) { > + target_phys_addr_t pa, iov_pa, iov_size; > + > + pa = cmd->pa + pa_offset; > + if (is_sgl64) { > + iov_pa = ldq_phys(pa); > + } else { > + iov_pa = ldl_phys(pa); These load data from memory in target endianness. Are you sure that's what you want? I'd expect this to be defined as little endian (especially given that ldq and ldl on the same address work). > + } > + cmd->iov[i].iov_len = iov_size = ldl_phys(pa + sgl_addr_size); > + cmd->iov[i].iov_base = cpu_physical_memory_map(iov_pa,&iov_size, > + is_write); > + if (!iov_size || iov_size != cmd->iov[i].iov_len) { > + trace_megasas_iovec_map_failed(cmd->index, i, iov_size); > + goto unmap; > + } > + cmd->iov_cnt++; > + pa_offset += sgl_addr_size + sizeof(uint32_t); > + iov_count += iov_size; > + } > + cmd->iov[i].iov_base = NULL; > + cmd->iov[i].iov_len = 0; > + if (pa_offset> (cmd->count + 1) * MFI_FRAME_SIZE) { > + trace_megasas_io_sgl_overflow(cmd->context, cmd->count, > + cmd->frame->header.sge_count, pa_offset); > + } > + if (iov_count> cmd->iov_size * cmd->sdev->blocksize) { > + trace_megasas_io_list_overflow(cmd->context, iov_count, > + cmd->iov_size * cmd->sdev->blocksize); > + } > + cmd->iov_offset = 0; > + return 0; > +unmap: > + while (i>= 0) { > + cpu_physical_memory_unmap(cmd->iov[i].iov_base, > + cmd->iov[i].iov_len, 0, 0); > + i--; > + } > + qemu_free(cmd->iov); > + iov_count = cmd->frame->header.sge_count - cmd->iov_cnt; > + cmd->iov_cnt = 0; > + cmd->iov = NULL; > + return iov_count; > +} > + > +static void megasas_unmap_sgl(struct megasas_cmd_t *cmd) > +{ > + uint16_t flags = le16_to_cpu(cmd->frame->header.flags); > + int i, is_write = (flags& MFI_FRAME_DIR_WRITE) ? 1 : 0; > + > + for (i = 0; i< cmd->frame->header.sge_count; i++) { > + cpu_physical_memory_unmap(cmd->iov[i].iov_base, cmd->iov[i].iov_len, > + is_write, cmd->iov[i].iov_len); > + } > + qemu_free(cmd->iov); > + cmd->iov = NULL; > + cmd->iov_cnt = 0; > + cmd->iov_offset = 0; > +} > + > +/* > + * passthrough sense and io sense are at the same offset > + */ > +static int megasas_build_sense(struct megasas_cmd_t *cmd, uint8_t *sense_ptr, > + uint8_t sense_len) > +{ > + target_phys_addr_t pa, pa_hi = 0, pa_lo; > + uint16_t flags = le16_to_cpu(cmd->frame->header.flags); > + int is_sense64 = (flags& MFI_FRAME_SENSE64) ? 1 : 0; > + > + if (sense_len> cmd->frame->header.sense_len) { > + sense_len = cmd->frame->header.sense_len; > + } > + > + pa_lo = le32_to_cpu(cmd->frame->pass.sense_addr_lo); > + if (is_sense64) { > + pa_hi = le32_to_cpu(cmd->frame->pass.sense_addr_hi); > + } > + pa = ((uint64_t) pa_hi<< 32) | pa_lo; > + cpu_physical_memory_write(pa, sense_ptr, sense_len); > + cmd->frame->header.sense_len = sense_len; > + return sense_len; > +} > + > +static void megasas_write_sense(struct megasas_cmd_t *cmd, SCSISense sense) > +{ > + uint8_t *sense_ptr; > + uint8_t sense_len; > + > + sense_ptr = qemu_mallocz(cmd->frame->header.sense_len); > + sense_len = scsi_build_sense(sense, sense_ptr, > + cmd->frame->header.sense_len, 0); > + megasas_build_sense(cmd, sense_ptr, sense_len); > + qemu_free(sense_ptr); > +} > + > +static void megasas_copy_sense(struct megasas_cmd_t *cmd) > +{ > + uint8_t *sense_ptr; > + uint8_t sense_len; > + > + sense_ptr = qemu_mallocz(cmd->frame->header.sense_len); > + sense_len = scsi_req_get_sense(cmd->req, sense_ptr, > + cmd->frame->header.sense_len); > + megasas_build_sense(cmd, sense_ptr, sense_len); > + qemu_free(sense_ptr); > +} > + > +/* > + * Format an INQUIRY CDB > + */ > +static int megasas_setup_inquiry(uint8_t *cdb, int pg, > + uint8_t *buf, int len) > +{ > + memset(cdb, 0, 6); > + cdb[0] = INQUIRY; > + if (pg> 0) { > + cdb[1] = 0x1; > + cdb[2] = pg; > + } > + cdb[3] = (len>> 8)& 0xff; > + cdb[4] = (len& 0xff); > + return len; > +} > + > +/* > + * Encode lba and len into a READ_16/WRITE_16 CDB > + */ > +static void megasas_encode_lba(uint8_t *cdb, uint64_t lba, > + uint32_t len, int is_write) > +{ > + memset(cdb, 0x0, 16); > + if (is_write) { > + cdb[0] = WRITE_16; > + } else { > + cdb[0] = READ_16; > + } > + cdb[2] = (lba>> 56)& 0xff; > + cdb[3] = (lba>> 48)& 0xff; > + cdb[4] = (lba>> 40)& 0xff; > + cdb[5] = (lba>> 32)& 0xff; > + cdb[6] = (lba>> 24)& 0xff; > + cdb[7] = (lba>> 16)& 0xff; > + cdb[8] = (lba>> 8)& 0xff; > + cdb[9] = (lba)& 0xff; > + cdb[10] = (len>> 24)& 0xff; > + cdb[11] = (len>> 16)& 0xff; > + cdb[12] = (len>> 8)& 0xff; > + cdb[13] = (len)& 0xff; > +} > + > +/* > + * Utility functions > + */ > +static uint64_t megasas_fw_time(void) > +{ > + const time_t now = time(NULL); > + struct tm *curtime; > + uint64_t bcd_time; > + > + curtime = gmtime(&now); Please use qemu_get_timedate(). > + bcd_time = ((uint64_t)curtime->tm_sec& 0xff)<< 48 | > + ((uint64_t)curtime->tm_min& 0xff)<< 40 | > + ((uint64_t)curtime->tm_hour& 0xff)<< 32 | > + ((uint64_t)curtime->tm_mday& 0xff)<< 24 | > + ((uint64_t)curtime->tm_mon& 0xff)<< 16 | > + ((uint64_t)(curtime->tm_year + 1900)& 0xffff); > + > + return bcd_time; > +} > + > +static uint64_t megasas_gen_sas_addr(uint64_t id) > +{ > + uint64_t addr; > + > + addr = 0x5001a4aULL<< 36; > + addr |= id& 0xfffffffff; > + > + return addr; > +} > + > +/* > + * Frame handling > + */ > + > +static void megasas_dump_frame(struct megasas_cmd_t *cmd) > +{ > + DPRINTF("Frame %x: count %d pa %p context %x\n" > + "\t%016lx %016lx\n\t%016lx %016lx\n" > + "\t%016lx %016lx\n\t%016lx %016lx\n", > + cmd->index, cmd->count, cmd->frame, cmd->context, > + be64_to_cpu(cmd->frame->raw[0]), > + be64_to_cpu(cmd->frame->raw[1]), > + be64_to_cpu(cmd->frame->raw[2]), > + be64_to_cpu(cmd->frame->raw[3]), > + be64_to_cpu(cmd->frame->raw[4]), > + be64_to_cpu(cmd->frame->raw[5]), > + be64_to_cpu(cmd->frame->raw[6]), > + be64_to_cpu(cmd->frame->raw[7])); > +} > + > +static inline int megasas_next_index(MPTState *s, int index) Please let the compiler decide the inlining :). Just remove the "inline" from all functions in the .c file. > +{ > + index++; > + if (index == s->fw_cmds) { > + index = 0; > + } > + return index; > +} > + > +static inline struct megasas_cmd_t *megasas_lookup_frame(MPTState *s, > + target_phys_addr_t frame) > +{ > + struct megasas_cmd_t *cmd = NULL; > + int num = 0, index; > + > + index = s->reply_queue_index; > + > + while (num< s->fw_cmds) { > + if (s->frames[index].pa&& s->frames[index].pa == frame) { > + cmd =&s->frames[index]; > + break; > + } > + index = megasas_next_index(s, index); > + num++; > + } > + > + return cmd; > +} > + > +static inline struct megasas_cmd_t *megasas_next_frame(MPTState *s, > + target_phys_addr_t frame) > +{ > + struct megasas_cmd_t *cmd = NULL; > + int num = 0, index; > + > + cmd = megasas_lookup_frame(s, frame); > + if (cmd) { > + trace_megasas_qf_found(cmd->index, cmd->pa); > + return cmd; > + } > + index = s->reply_queue_index; > + num = 0; > + while (num< s->fw_cmds) { > + if (!s->frames[index].pa) { > + cmd =&s->frames[index]; > + break; > + } > + index = megasas_next_index(s, index); > + num++; > + } > + if (!cmd) { > + trace_megasas_qf_failed(frame); > + } > + trace_megasas_qf_new(index, cmd); > + return cmd; > +} > + > +static struct megasas_cmd_t * > +megasas_enqueue_frame(MPTState *s, target_phys_addr_t frame, int count) > +{ > + struct megasas_cmd_t *cmd = NULL; > + uint8_t frame_size = MFI_FRAME_SIZE; > + target_phys_addr_t frame_size_p = frame_size; > + > + cmd = megasas_next_frame(s, frame); > + /* All frames busy */ > + if (!cmd) { > + return NULL; > + } > + if (!cmd->pa) { > + cmd->pa = frame; > + cmd->frame = cpu_physical_memory_map(frame,&frame_size_p, 0); > + if (frame_size_p != frame_size) { > + trace_megasas_frame_map_failed(cmd->index,(unsigned long)frame); > + if (cmd->frame) { > + cpu_physical_memory_unmap(cmd->frame, frame_size_p, 0, 0); > + cmd->frame = NULL; > + cmd->pa = 0; > + } > + s->event_count++; > + return NULL; > + } > + cmd->pa_size = frame_size_p; > + cmd->context = le32_to_cpu(cmd->frame->header.context); > + } > + cmd->count = count; > + s->busy++; > + > + trace_megasas_qf_enqueue(cmd->index, cmd->count, cmd->context, > + s->reply_queue_index, s->busy); > + > + return cmd; > +} > + > +static void megasas_complete_frame(MPTState *s, int context) > +{ > + int tail; > + > + /* Decrement busy count */ > + s->busy--; > + > + if (!MEGASAS_INTR_ENABLED(s)) { > + trace_megasas_qf_complete_noirq(context); > + return; > + } > + > + /* Put command on the reply queue */ > + tail = s->reply_queue_index; > + stl_phys(s->reply_queue_pa + tail * sizeof(uint32_t), context); > + > + s->reply_queue_index = megasas_next_index(s, tail); > + trace_megasas_qf_complete(context, tail, s->busy, s->doorbell); > + > + /* Notify HBA */ > + s->doorbell++; > + if (s->doorbell == 1) { > + qemu_irq_raise(s->dev.irq[0]); > + } > +} > + > +static void megasas_reset_frames(MPTState *s) > +{ > + int i; > + struct megasas_cmd_t *cmd; > + > + for (i = 0; i< s->fw_cmds; i++) { > + cmd =&s->frames[i]; > + if (cmd->pa) { > + cpu_physical_memory_unmap(cmd->frame, cmd->pa_size, 0, 0); > + cmd->frame = NULL; > + cmd->pa = 0; > + } > + } > +} > + > +static void megasas_abort_command(struct megasas_cmd_t *cmd) > +{ > + if (cmd->req) { > + scsi_req_unref(cmd->req); > + cmd->req = NULL; > + } > +} > + > +static int megasas_init_firmware(MPTState *s, struct megasas_cmd_t *cmd) > +{ > + target_phys_addr_t iq_pa, pa_hi, pa_lo, iq_pl, initq_size; > + struct mfi_init_qinfo *initq; > + > + iq_pl = le32_to_cpu(cmd->frame->init.header.data_len); > + pa_lo = le32_to_cpu(cmd->frame->init.qinfo_new_addr_lo); > + pa_hi = le32_to_cpu(cmd->frame->init.qinfo_new_addr_hi); > + iq_pa = (((uint64_t) pa_hi<< 32) | pa_lo); > + trace_megasas_init_firmware((int)iq_pl, (unsigned long)iq_pa); > + initq_size = sizeof(*initq); > + initq = cpu_physical_memory_map(iq_pa,&initq_size, 0); > + if (initq_size != sizeof(*initq)) { > + trace_megasas_init_queue_map_failed(cmd->index); > + if (initq) { > + cpu_physical_memory_unmap(initq, initq_size, 0, 0); > + } > + s->fw_state = MFI_FWSTATE_FAULT; > + s->event_count++; > + goto out; > + } > + s->reply_queue_len = le32_to_cpu(initq->rq_entries); > + pa_lo = le32_to_cpu(initq->rq_addr_lo); > + pa_hi = le32_to_cpu(initq->rq_addr_hi); > + s->reply_queue_pa = ((uint64_t) pa_hi<< 32) | pa_lo; > + pa_lo = le32_to_cpu(initq->ci_addr_lo); > + pa_hi = le32_to_cpu(initq->ci_addr_hi); > + s->consumer_pa = ((uint64_t) pa_hi<< 32) | pa_lo; > + pa_lo = le32_to_cpu(initq->pi_addr_lo); > + pa_hi = le32_to_cpu(initq->pi_addr_hi); > + s->producer_pa = ((uint64_t) pa_hi<< 32) | pa_lo; > + trace_megasas_init_queue((unsigned long)s->reply_queue_pa, > + s->reply_queue_len, (unsigned long)s->producer_pa, > + (unsigned long)s->consumer_pa); > + s->reply_queue_index = ldl_phys(s->producer_pa); Possible endianness problem? > + megasas_reset_frames(s); > + s->fw_state = MFI_FWSTATE_OPERATIONAL; > +out: > + cpu_physical_memory_unmap(initq, initq_size, 0, 0); > + return 0; > +} > + > +static int megasas_map_dcmd(struct megasas_cmd_t *cmd) > +{ > + target_phys_addr_t iov_pa, iov_size_p; > + uint32_t iov_size = 0; > + uint16_t flags = le16_to_cpu(cmd->frame->header.flags); > + int is_sgl64 = (flags& MFI_FRAME_SGL64) ? 1 : 0; > + > + if (!cmd->frame->header.sge_count) { > + trace_megasas_dcmd_zero_sge(cmd->index); > + cmd->iov_size = 0; > + cmd->iov_buf = NULL; > + return 0; > + } else if (cmd->frame->header.sge_count> 1) { > + trace_megasas_dcmd_invalid_sge(cmd->index, > + cmd->frame->header.sge_count); > + return -1; > + } > + if (is_sgl64) { > + iov_pa = le64_to_cpu(cmd->frame->dcmd.sgl.sg64[0].addr); > + iov_size = le32_to_cpu(cmd->frame->dcmd.sgl.sg64[0].len); > + } else { > + iov_pa = le32_to_cpu(cmd->frame->dcmd.sgl.sg32[0].addr); > + iov_size = le32_to_cpu(cmd->frame->dcmd.sgl.sg32[0].len); > + } > + iov_size_p = iov_size; > + cmd->iov_buf = cpu_physical_memory_map(iov_pa,&iov_size_p, 1); > + if (iov_size_p != iov_size) { > + trace_megasas_dcmd_map_failed(cmd->index); > + if (cmd->iov_buf) { > + cpu_physical_memory_unmap(cmd->iov_buf, iov_size, 1, 0); > + cmd->iov_buf = NULL; > + cmd->iov_size = 0; > + } > + return -1; > + } > + cmd->iov_size = iov_size; > + return iov_size; > +} > + > +static int megasas_finish_dcmd(struct megasas_cmd_t *cmd, uint32_t size) > +{ > + uint16_t flags = le16_to_cpu(cmd->frame->header.flags); > + int is_sgl64 = (flags& MFI_FRAME_SGL64) ? 1 : 0; > + int sgl_addr_size = is_sgl64 ? sizeof(uint64_t) : sizeof(uint32_t); > + > + if (size> cmd->iov_size) { > + trace_megasas_dcmd_buffer_overflow(cmd->index, size, cmd->iov_size); > + size = cmd->iov_size; > + } > + if (!cmd->iov_buf) { > + return 0; > + } > + > + cpu_physical_memory_unmap(cmd->iov_buf, cmd->iov_size, 1, size); > + if (cmd->iov_size> size) { > + stl_phys(cmd->pa + offsetof(struct mfi_dcmd_frame, sgl) > + + sgl_addr_size, size); Endianness? > + } > + return size; > +} > + > +static int megasas_ctrl_get_info(MPTState *s, struct megasas_cmd_t *cmd) > +{ > + struct mfi_ctrl_info *info = cmd->iov_buf; > + int n, num_ld_disks = 0; > + > + for (n = 0; n< s->fw_luns; n++) { > + if (s->bus.devs[n]) { > + num_ld_disks++; > + } > + } > + > + memset(cmd->iov_buf, 0x0, cmd->iov_size); > + if (cmd->iov_size != sizeof(struct mfi_ctrl_info)) { > + trace_megasas_dcmd_invalid_xfer_len(cmd->index, "Ctrl Get Info", > + cmd->iov_size); > + return MFI_STAT_INVALID_PARAMETER; > + } > + > + trace_megasas_dcmd_enter(cmd->index,"MFI DCMD get controller info"); > + info->pci.vendor = PCI_VENDOR_ID_LSI_LOGIC; > + info->pci.device = PCI_DEVICE_ID_LSI_SAS1078; > + info->pci.subvendor = PCI_VENDOR_ID_LSI_LOGIC; > + info->pci.subdevice = 0x1013; Endianness issues in this whole function :) > + > + info->host.type = MFI_INFO_HOST_PCIX; > + info->device.type = MFI_INFO_DEV_SAS3G; > + info->device.port_count = 2; > + info->device.port_addr[0] = megasas_gen_sas_addr((unsigned long)s); > + > + memcpy(info->product_name, "MegaRAID SAS 8708EM2", 20); > + snprintf(info->serial_number, 32, "QEMU%08lx", > + (unsigned long)s& 0xFFFFFFFF); > + snprintf(info->package_version, 0x60, "%s-QEMU", QEMU_VERSION); > + memcpy(info->image_component[0].name, "APP", 8); > + memcpy(info->image_component[0].version, MEGASAS_VERSION "-QEMU", 32); > + memcpy(info->image_component[0].build_date, __DATE__, 16); > + memcpy(info->image_component[0].build_time, __TIME__, 16); > + info->image_component_count = 1; > + info->current_fw_time = megasas_fw_time(); > + info->max_arms = 32; > + info->max_spans = 8; > + info->max_arrays = MEGASAS_MAX_ARRAYS; > + info->max_lds = s->fw_luns; > + info->max_cmds = s->fw_cmds; > + info->max_sg_elements = s->fw_sge; > + info->max_request_size = MEGASAS_MAX_SECTORS; > + info->lds_present = num_ld_disks; > + info->pd_present = num_ld_disks + 1; > + info->pd_disks_present = num_ld_disks; > + info->hw_present = MFI_INFO_HW_NVRAM | MFI_INFO_HW_MEM | MFI_INFO_HW_FLASH; > + info->memory_size = 512; > + info->nvram_size = 32; > + info->flash_size = 16; > + info->raid_levels = MFI_INFO_RAID_0; > + info->adapter_ops = MFI_INFO_AOPS_RBLD_RATE | > + MFI_INFO_AOPS_SELF_DIAGNOSTIC | > + MFI_INFO_AOPS_MIXED_ARRAY; > + info->ld_ops = MFI_INFO_LDOPS_DISK_CACHE_POLICY | > + MFI_INFO_LDOPS_ACCESS_POLICY | > + MFI_INFO_LDOPS_IO_POLICY | > + MFI_INFO_LDOPS_WRITE_POLICY | > + MFI_INFO_LDOPS_READ_POLICY; > + info->max_strips_per_io = s->fw_sge; > + info->stripe_sz_ops.min = 3; > + info->stripe_sz_ops.max = ffs(MEGASAS_MAX_SECTORS + 1) - 1; > + info->properties.pred_fail_poll_interval = 300; > + info->properties.intr_throttle_cnt = 16; > + info->properties.intr_throttle_timeout = 50; > + info->properties.rebuild_rate = 30; > + info->properties.patrol_read_rate = 30; > + info->properties.bgi_rate = 30; > + info->properties.cc_rate = 30; > + info->properties.recon_rate = 30; > + info->properties.cache_flush_interval = 4; > + info->properties.spinup_drv_cnt = 2; > + info->properties.spinup_delay = 6; > + info->properties.ecc_bucket_size = 15; > + info->properties.ecc_bucket_leak_rate = 1440; > + info->properties.expose_encl_devices = 1; > + info->pd_ops = MFI_INFO_PDOPS_FORCE_ONLINE | MFI_INFO_PDOPS_FORCE_OFFLINE; > + info->pd_mix_support = MFI_INFO_PDMIX_SAS | > + MFI_INFO_PDMIX_SATA | > + MFI_INFO_PDMIX_LD; > + > + return MFI_STAT_OK; > +} > + > +static int megasas_mfc_get_defaults(MPTState *s, struct megasas_cmd_t *cmd) > +{ > + struct mfi_defaults *info = cmd->iov_buf; > + > + memset(cmd->iov_buf, 0x0, cmd->iov_size); > + if (cmd->iov_size != sizeof(struct mfi_defaults)) { > + trace_megasas_dcmd_invalid_xfer_len(cmd->index,"MFC Get defaults", > + cmd->iov_size); > + return MFI_STAT_INVALID_PARAMETER; > + } > + > + info->stripe_size = 3; ... and more endianness > + info->flush_time = 4; > + info->background_rate = 30; > + info->allow_mix_in_enclosure = 1; > + info->allow_mix_in_ld = 1; > + info->direct_pd_mapping = 1; > + info->bios_enumerate_lds = 1; > + info->disable_ctrl_r = 1; > + info->expose_enclosure_devices = 1; > + info->disable_preboot_cli = 1; > + info->cluster_disable = 1; > + > + return MFI_STAT_OK; > +} > + > +static int megasas_dcmd_get_bios_info(MPTState *s, struct megasas_cmd_t *cmd) > +{ > + struct mfi_bios_data *info = cmd->iov_buf; > + > + memset(cmd->iov_buf, 0x0, cmd->iov_size); > + if (cmd->iov_size != sizeof(struct mfi_bios_data)) { > + trace_megasas_dcmd_invalid_xfer_len(cmd->index,"Get BIOS info", > + cmd->iov_size); > + return MFI_STAT_INVALID_PARAMETER; > + } > + info->continue_on_error = 1; > + > + return MFI_STAT_OK; > +} > + > +static int megasas_dcmd_get_fw_time(MPTState *s, struct megasas_cmd_t *cmd) > +{ > + uint64_t fw_time; > + > + fw_time = megasas_fw_time(); > + > + memcpy(cmd->iov_buf,&fw_time, sizeof(fw_time)); > + return MFI_STAT_OK; > +} > + > +static int megasas_dcmd_set_fw_time(MPTState *s, struct megasas_cmd_t *cmd) > +{ > + uint64_t fw_time; > + > + memset(cmd->iov_buf, 0x0, cmd->iov_size); > + memcpy(&fw_time, cmd->frame->dcmd.mbox, sizeof(fw_time)); > + > + trace_megasas_dcmd_set_fw_time(cmd->index, fw_time); > + fw_time = megasas_fw_time(); > + memcpy(cmd->iov_buf,&fw_time, sizeof(fw_time)); > + return MFI_STAT_OK; > +} > + > +static int megasas_event_info(MPTState *s, struct megasas_cmd_t *cmd) > +{ > + struct mfi_evt_log_state *info = cmd->iov_buf; > + > + memset(info, 0, cmd->iov_size); > + info->newest_seq_num = s->event_count; Endianness > + info->shutdown_seq_num = s->shutdown_event; > + info->boot_seq_num = s->boot_event; > + > + return MFI_STAT_OK; > +} > + > +static int megasas_dcmd_pd_get_list(MPTState *s, struct megasas_cmd_t *cmd) > +{ > + struct mfi_pd_list *info = cmd->iov_buf; > + uint32_t offset, num_pd_disks = 0, max_luns; > + uint16_t dev_id; > + > + memset(cmd->iov_buf, 0, cmd->iov_size); > + offset = 8; > + if (cmd->iov_size< (offset + sizeof(struct mfi_pd_address))) { > + trace_megasas_dcmd_invalid_xfer_len(cmd->index,"PD get list", > + cmd->iov_size); > + return MFI_STAT_INVALID_PARAMETER; > + } > + > + max_luns = (cmd->iov_size - offset) / sizeof(struct mfi_pd_address); > + if (max_luns> s->fw_luns) { > + max_luns = s->fw_luns; > + } > + trace_megasas_dcmd_enter(cmd->index, "PD get list"); > + > + for (dev_id = 0; dev_id< max_luns; dev_id++) { > + SCSIDevice *sdev; > + > + sdev = s->bus.devs[dev_id]; > + if (sdev) { > + info->addr[num_pd_disks].device_id = dev_id; Here too info is guest visible, so needs to be endian converted. I'll stop reading here. Please check all functions for possible endianness problems :) Alex -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 07/05/2011 03:38 PM, Alexander Graf wrote: >> >> + if (is_sgl64) { >> + iov_pa = ldq_phys(pa); >> + } else { >> + iov_pa = ldl_phys(pa); > > These load data from memory in target endianness. Are you sure that's > what you want? I'd expect this to be defined as little endian > (especially given that ldq and ldl on the same address work). Seems to be target endianness from the corresponding Linux code: if (sge_count) { scsi_for_each_sg(scp, os_sgl, sge_count, i) { mfi_sgl->sge32[i].length = sg_dma_len(os_sgl); mfi_sgl->sge32[i].phys_addr = sg_dma_address(os_sgl); } } ... if (sge_count) { scsi_for_each_sg(scp, os_sgl, sge_count, i) { mfi_sgl->sge64[i].length = sg_dma_len(os_sgl); mfi_sgl->sge64[i].phys_addr = sg_dma_address(os_sgl); } } Note that this is _either_ a ldq or a ldl depending on what the driver told the device. It is not accessing a 64-bit value as 32-bit. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 07/05/2011 03:59 PM, Paolo Bonzini wrote: > On 07/05/2011 03:38 PM, Alexander Graf wrote: >>> + if (is_sgl64) { >>> + iov_pa = ldq_phys(pa); >>> + } else { >>> + iov_pa = ldl_phys(pa); >> These load data from memory in target endianness. Are you sure that's >> what you want? I'd expect this to be defined as little endian >> (especially given that ldq and ldl on the same address work). > Seems to be target endianness from the corresponding Linux code: > > if (sge_count) { > scsi_for_each_sg(scp, os_sgl, sge_count, i) { > mfi_sgl->sge32[i].length = sg_dma_len(os_sgl); > mfi_sgl->sge32[i].phys_addr = sg_dma_address(os_sgl); > } > } > > ... > > if (sge_count) { > scsi_for_each_sg(scp, os_sgl, sge_count, i) { > mfi_sgl->sge64[i].length = sg_dma_len(os_sgl); > mfi_sgl->sge64[i].phys_addr = sg_dma_address(os_sgl); > } > } > > Note that this is _either_ a ldq or a ldl depending on what the driver told > the device. It is not accessing a 64-bit value as 32-bit. So how would the device know which endianness the target is then? This looks like broken Linux code to me then. Christoph, is the above correct for big endian systems? Btw, reading through the Qemu sources, there is ldl_le_p() to read little endian values from memory. I haven't quite found one for _phys though. We can just add that however. Alex -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jul 5, 2011 at 12:03 PM, Hannes Reinecke <hare@suse.de> wrote: > +static void megasas_unmap_sgl(struct megasas_cmd_t *cmd) > +{ > + uint16_t flags = le16_to_cpu(cmd->frame->header.flags); > + int i, is_write = (flags & MFI_FRAME_DIR_WRITE) ? 1 : 0; > + > + for (i = 0; i < cmd->frame->header.sge_count; i++) { > + cpu_physical_memory_unmap(cmd->iov[i].iov_base, cmd->iov[i].iov_len, > + is_write, cmd->iov[i].iov_len); > + } We cannot map control structures from guest memory and treating them as valid request state later on. A malicious guest can issue the request, then change the fields the control structure while QEMU is processing the I/O, and then this function will execute with is_write/sge_count no longer the same as when the request started. Good practice would be to copy in any request state needed instead of reaching into guest memory at later points of the request lifecycle. This way a malicious guest can never cause QEMU to crash or do something due to inconsistent state. The particular problem I see here is starting the request with sge_count=1 and then setting it to sge_count=255. We will perform invalid iov[] accesses. Stefan -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 07/05/2011 05:21 PM, Stefan Hajnoczi wrote: > On Tue, Jul 5, 2011 at 12:03 PM, Hannes Reinecke<hare@suse.de> wrote: >> +static void megasas_unmap_sgl(struct megasas_cmd_t *cmd) >> +{ >> + uint16_t flags = le16_to_cpu(cmd->frame->header.flags); >> + int i, is_write = (flags& MFI_FRAME_DIR_WRITE) ? 1 : 0; >> + >> + for (i = 0; i< cmd->frame->header.sge_count; i++) { >> + cpu_physical_memory_unmap(cmd->iov[i].iov_base, cmd->iov[i].iov_len, >> + is_write, cmd->iov[i].iov_len); >> + } > > We cannot map control structures from guest memory and treating them > as valid request state later on. > Yes, I've been working on that one already. What I'll be doing is to read in the sge count during 'map_sgl' and store this value internally (in ->iov_cnt). And during unmap I'll be using this value instead of the frame-provided one. That way we'll be checking the sge_count field only once when we slurp in the entire frame. > A malicious guest can issue the request, then change the fields the > control structure while QEMU is processing the I/O, and then this > function will execute with is_write/sge_count no longer the same as > when the request started. > > Good practice would be to copy in any request state needed instead of > reaching into guest memory at later points of the request lifecycle. > This way a malicious guest can never cause QEMU to crash or do > something due to inconsistent state. > See above, that's what I'll be doing. > The particular problem I see here is starting the request with > sge_count=1 and then setting it to sge_count=255. We will perform > invalid iov[] accesses. > Thanks for the hint. Will be fixing it up. Cheers, Hannes
On 07/06/2011 08:20 AM, Hannes Reinecke wrote: >> >> We cannot map control structures from guest memory and treating them >> as valid request state later on. >> > Yes, I've been working on that one already. > What I'll be doing is to read in the sge count during 'map_sgl' and > store this value internally (in ->iov_cnt). And during unmap I'll be > using this value instead of the frame-provided one. > > That way we'll be checking the sge_count field only once when we slurp > in the entire frame. Note the flags too. Perhaps it's easier to simply copy the entire frame header... Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 5804662..05d14ab 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -398,7 +398,8 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) "buffer size %zd\n", req->cmd.xfer); pages = buflen++; outbuf[buflen++] = 0x00; // list of supported pages (this page) - outbuf[buflen++] = 0x80; // unit serial number + if (s->serial) + outbuf[buflen++] = 0x80; // unit serial number outbuf[buflen++] = 0x83; // device identification if (s->drive_kind == SCSI_HD) { outbuf[buflen++] = 0xb0; // block limits @@ -409,8 +410,14 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) } case 0x80: /* Device serial number, optional */ { - int l = strlen(s->serial); + int l; + if (!s->serial) { + DPRINTF("Inquiry (EVPD[Serial number] not supported\n"); + return -1; + } + + l = strlen(s->serial); if (l > req->cmd.xfer) l = req->cmd.xfer; if (l > 20) @@ -1203,7 +1210,9 @@ static int scsi_initfn(SCSIDevice *dev, SCSIDriveKind kind) if (!s->serial) { /* try to fall back to value set with legacy -drive serial=... */ dinfo = drive_get_by_blockdev(s->bs); - s->serial = qemu_strdup(*dinfo->serial ? dinfo->serial : "0"); + if (*dinfo->serial) { + s->serial = qemu_strdup(dinfo->serial); + } } if (!s->version) {
If the serial number is not set we should mask it out in the list of supported VPD pages and mark it as not supported. Signed-off-by: Hannes Reinecke <hare@suse.de> --- hw/scsi-disk.c | 15 ++++++++++++--- 1 files changed, 12 insertions(+), 3 deletions(-)