diff mbox

[RFC,v2] tcg: Optimize fence instructions

Message ID 20160809231246.4537-1-bobby.prani@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Pranith Kumar Aug. 9, 2016, 11:12 p.m. UTC
This commit optimizes fence instructions. Two optimizations are
currently implemented. These are:

1. Unnecessary duplicate fence instructions

   If the same fence instruction is detected consecutively, we remove
   one instance of it.

   ex: mb; mb => mb, strl; strl => strl

2. Merging weaker fence with subsequent/previous stronger fence

   load-acquire/store-release fence can be combined with a full fence
   without relaxing the ordering constraint.

   ex: a) ld; ldaq; mb => ld; mb
       b) mb; strl; st => mb; st

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
v2:
  - Properly remove current op
  - Reset only when you encounter memory operations or end of block
  - Review comments from v1

 tcg/optimize.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/tcg.c      |  4 +++
 tcg/tcg.h      |  1 +
 3 files changed, 89 insertions(+)

Comments

no-reply@patchew.org Aug. 9, 2016, 11:18 p.m. UTC | #1
Hi,

Your series failed automatic build test. Please find the testing commands and
their output below. If you have docker installed, you can probably reproduce it
locally.

Message-id: 20160809231246.4537-1-bobby.prani@gmail.com
Type: series
Subject: [Qemu-devel] [RFC v2 PATCH] tcg: Optimize fence instructions

=== TEST SCRIPT BEGIN ===
#!/bin/bash
set -e
git submodule update --init dtc
make J=8 docker-test-quick@centos6

# we need CURL DPRINTF patch
# http://patchew.org/QEMU/1470027888-24381-1-git-send-email-famz%40redhat.com/
#make J=8 docker-test-mingw@fedora
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag]         patchew/20160809231246.4537-1-bobby.prani@gmail.com -> patchew/20160809231246.4537-1-bobby.prani@gmail.com
Switched to a new branch 'test'
7f383b9 tcg: Optimize fence instructions

=== OUTPUT BEGIN ===
Submodule 'dtc' (git://git.qemu-project.org/dtc.git) registered for path 'dtc'
Cloning into 'dtc'...
Submodule path 'dtc': checked out '65cc4d2748a2c2e6f27f1cf39e07a5dbabd80ebf'
  BUILD centos6
  ARCHIVE qemu.tgz
  ARCHIVE dtc.tgz
  COPY RUNNER
  RUN test-quick in centos6
No C++ compiler available; disabling C++ specific optional code
Install prefix    /tmp/qemu-test/src/tests/docker/install
BIOS directory    /tmp/qemu-test/src/tests/docker/install/share/qemu
binary directory  /tmp/qemu-test/src/tests/docker/install/bin
library directory /tmp/qemu-test/src/tests/docker/install/lib
module directory  /tmp/qemu-test/src/tests/docker/install/lib/qemu
libexec directory /tmp/qemu-test/src/tests/docker/install/libexec
include directory /tmp/qemu-test/src/tests/docker/install/include
config directory  /tmp/qemu-test/src/tests/docker/install/etc
local state directory   /tmp/qemu-test/src/tests/docker/install/var
Manual directory  /tmp/qemu-test/src/tests/docker/install/share/man
ELF interp prefix /usr/gnemul/qemu-%M
Source path       /tmp/qemu-test/src
C compiler        cc
Host C compiler   cc
C++ compiler      
Objective-C compiler cc
ARFLAGS           rv
CFLAGS            -O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -pthread -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include   -g 
QEMU_CFLAGS       -I/usr/include/pixman-1    -fPIE -DPIE -m64 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wall -Wundef -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common  -Wendif-labels -Wmissing-include-dirs -Wempty-body -Wnested-externs -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wold-style-declaration -Wold-style-definition -Wtype-limits -fstack-protector-all
LDFLAGS           -Wl,--warn-common -Wl,-z,relro -Wl,-z,now -pie -m64 -g 
make              make
install           install
python            python -B
smbd              /usr/sbin/smbd
module support    no
host CPU          x86_64
host big endian   no
target list       x86_64-softmmu aarch64-softmmu
tcg debug enabled no
gprof enabled     no
sparse enabled    no
strip binaries    yes
profiler          no
static build      no
pixman            system
SDL support       yes (1.2.14)
GTK support       no 
GTK GL support    no
VTE support       no 
TLS priority      NORMAL
GNUTLS support    no
GNUTLS rnd        no
libgcrypt         no
libgcrypt kdf     no
nettle            no 
nettle kdf        no
libtasn1          no
curses support    no
virgl support     no
curl support      no
mingw32 support   no
Audio drivers     oss
Block whitelist (rw) 
Block whitelist (ro) 
VirtFS support    no
VNC support       yes
VNC SASL support  no
VNC JPEG support  no
VNC PNG support   no
xen support       no
brlapi support    no
bluez  support    no
Documentation     no
PIE               yes
vde support       no
netmap support    no
Linux AIO support no
ATTR/XATTR support yes
Install blobs     yes
KVM support       yes
RDMA support      no
TCG interpreter   no
fdt support       yes
preadv support    yes
fdatasync         yes
madvise           yes
posix_madvise     yes
uuid support      no
libcap-ng support no
vhost-net support yes
vhost-scsi support yes
Trace backends    log
spice support     no 
rbd support       no
xfsctl support    no
smartcard support no
libusb            no
usb net redir     no
OpenGL support    no
OpenGL dmabufs    no
libiscsi support  no
libnfs support    no
build guest agent yes
QGA VSS support   no
QGA w32 disk info no
QGA MSI support   no
seccomp support   no
coroutine backend ucontext
coroutine pool    yes
GlusterFS support no
Archipelago support no
gcov              gcov
gcov enabled      no
TPM support       yes
libssh2 support   no
TPM passthrough   yes
QOM debugging     yes
vhdx              no
lzo support       no
snappy support    no
bzip2 support     no
NUMA host support no
tcmalloc support  no
jemalloc support  no
avx2 optimization no
  GEN   aarch64-softmmu/config-devices.mak.tmp
  GEN   x86_64-softmmu/config-devices.mak.tmp
  GEN   config-host.h
  GEN   qemu-options.def
  GEN   qmp-commands.h
  GEN   qapi-types.h
  GEN   qapi-visit.h
  GEN   qapi-event.h
  GEN   x86_64-softmmu/config-devices.mak
  GEN   aarch64-softmmu/config-devices.mak
  GEN   qmp-introspect.h
  GEN   tests/test-qapi-types.h
  GEN   tests/test-qapi-visit.h
  GEN   tests/test-qmp-commands.h
  GEN   tests/test-qapi-event.h
  GEN   tests/test-qmp-introspect.h
  GEN   config-all-devices.mak
  GEN   trace/generated-events.h
  GEN   trace/generated-tracers.h
  GEN   trace/generated-tcg-tracers.h
  GEN   trace/generated-helpers-wrappers.h
  GEN   trace/generated-helpers.h
  CC    tests/qemu-iotests/socket_scm_helper.o
  GEN   qga/qapi-generated/qga-qapi-types.h
  GEN   qga/qapi-generated/qga-qapi-visit.h
  GEN   qga/qapi-generated/qga-qmp-commands.h
  GEN   qga/qapi-generated/qga-qapi-types.c
  GEN   qga/qapi-generated/qga-qapi-visit.c
  GEN   qga/qapi-generated/qga-qmp-marshal.c
  GEN   qmp-introspect.c
  GEN   qapi-types.c
  GEN   qapi-visit.c
  GEN   qapi-event.c
  CC    qapi/qapi-visit-core.o
  CC    qapi/qapi-dealloc-visitor.o
  CC    qapi/qmp-input-visitor.o
  CC    qapi/qmp-output-visitor.o
  CC    qapi/qmp-registry.o
  CC    qapi/qmp-dispatch.o
  CC    qapi/string-input-visitor.o
  CC    qapi/string-output-visitor.o
  CC    qapi/opts-visitor.o
  CC    qapi/qapi-clone-visitor.o
  CC    qapi/qmp-event.o
  CC    qapi/qapi-util.o
  CC    qobject/qnull.o
  CC    qobject/qint.o
  CC    qobject/qstring.o
  CC    qobject/qdict.o
  CC    qobject/qlist.o
  CC    qobject/qfloat.o
  CC    qobject/qbool.o
  CC    qobject/qjson.o
  CC    qobject/qobject.o
  CC    qobject/json-lexer.o
  CC    qobject/json-streamer.o
  CC    qobject/json-parser.o
  GEN   trace/generated-events.c
  CC    trace/control.o
  CC    trace/qmp.o
  CC    util/osdep.o
  CC    util/cutils.o
  CC    util/unicode.o
  CC    util/qemu-timer-common.o
  CC    util/compatfd.o
  CC    util/event_notifier-posix.o
  CC    util/mmap-alloc.o
  CC    util/oslib-posix.o
  CC    util/qemu-openpty.o
  CC    util/qemu-thread-posix.o
  CC    util/memfd.o
  CC    util/envlist.o
  CC    util/path.o
  CC    util/module.o
  CC    util/bitmap.o
  CC    util/bitops.o
  CC    util/hbitmap.o
  CC    util/fifo8.o
  CC    util/acl.o
  CC    util/error.o
  CC    util/id.o
  CC    util/qemu-error.o
  CC    util/iov.o
  CC    util/qemu-config.o
  CC    util/qemu-sockets.o
  CC    util/uri.o
  CC    util/notify.o
  CC    util/qemu-option.o
  CC    util/qemu-progress.o
  CC    util/hexdump.o
  CC    util/crc32c.o
  CC    util/throttle.o
  CC    util/getauxval.o
  CC    util/readline.o
  CC    util/rfifolock.o
  CC    util/rcu.o
  CC    util/qemu-coroutine.o
  CC    util/qemu-coroutine-lock.o
  CC    util/qemu-coroutine-io.o
  CC    util/qemu-coroutine-sleep.o
  CC    util/coroutine-ucontext.o
  CC    util/buffer.o
  CC    util/timed-average.o
  CC    util/base64.o
  CC    util/log.o
  CC    util/qdist.o
  CC    util/qht.o
  CC    util/range.o
  CC    crypto/pbkdf-stub.o
  CC    stubs/arch-query-cpu-def.o
  CC    stubs/bdrv-next-monitor-owned.o
  CC    stubs/blk-commit-all.o
/tmp/qemu-test/src/util/qht.c: In function ‘qht_reset_size’:
/tmp/qemu-test/src/util/qht.c:413: warning: ‘new’ may be used uninitialized in this function
  CC    stubs/blockdev-close-all-bdrv-states.o
  CC    stubs/clock-warp.o
  CC    stubs/cpu-get-clock.o
  CC    stubs/cpu-get-icount.o
  CC    stubs/dump.o
  CC    stubs/fdset-add-fd.o
  CC    stubs/fdset-find-fd.o
  CC    stubs/fdset-get-fd.o
  CC    stubs/fdset-remove-fd.o
  CC    stubs/gdbstub.o
  CC    stubs/get-fd.o
  CC    stubs/get-next-serial.o
  CC    stubs/get-vm-name.o
  CC    stubs/iothread-lock.o
  CC    stubs/is-daemonized.o
  CC    stubs/machine-init-done.o
  CC    stubs/migr-blocker.o
  CC    stubs/mon-is-qmp.o
  CC    stubs/mon-printf.o
  CC    stubs/monitor-init.o
  CC    stubs/notify-event.o
  CC    stubs/qtest.o
  CC    stubs/replay.o
  CC    stubs/replay-user.o
  CC    stubs/reset.o
  CC    stubs/runstate-check.o
  CC    stubs/set-fd-handler.o
  CC    stubs/slirp.o
  CC    stubs/sysbus.o
  CC    stubs/trace-control.o
  CC    stubs/uuid.o
  CC    stubs/vm-stop.o
  CC    stubs/vmstate.o
  CC    stubs/cpus.o
  CC    stubs/kvm.o
  CC    stubs/qmp_pc_dimm_device_list.o
  CC    stubs/target-monitor-defs.o
  CC    stubs/target-get-monitor-def.o
  CC    stubs/vhost.o
  CC    stubs/iohandler.o
  CC    stubs/smbios_type_38.o
  CC    stubs/ipmi.o
  CC    stubs/pc_madt_cpu_entry.o
  CC    contrib/ivshmem-client/ivshmem-client.o
  CC    contrib/ivshmem-client/main.o
  CC    contrib/ivshmem-server/ivshmem-server.o
  CC    contrib/ivshmem-server/main.o
  CC    qemu-nbd.o
  CC    async.o
  CC    thread-pool.o
  CC    block.o
  CC    blockjob.o
  CC    main-loop.o
  CC    iohandler.o
  CC    qemu-timer.o
  CC    aio-posix.o
  CC    qemu-io-cmds.o
  CC    block/raw_bsd.o
  CC    block/qcow.o
  CC    block/vdi.o
  CC    block/vmdk.o
  CC    block/cloop.o
  CC    block/bochs.o
  CC    block/vpc.o
  CC    block/vvfat.o
  CC    block/qcow2.o
  CC    block/qcow2-refcount.o
  CC    block/qcow2-cluster.o
  CC    block/qcow2-snapshot.o
  CC    block/qcow2-cache.o
  CC    block/qed.o
  CC    block/qed-gencb.o
  CC    block/qed-l2-cache.o
  CC    block/qed-table.o
  CC    block/qed-cluster.o
  CC    block/qed-check.o
  CC    block/quorum.o
  CC    block/parallels.o
  CC    block/blkdebug.o
  CC    block/blkverify.o
  CC    block/blkreplay.o
  CC    block/block-backend.o
  CC    block/snapshot.o
  CC    block/qapi.o
  CC    block/raw-posix.o
  CC    block/null.o
  CC    block/mirror.o
  CC    block/commit.o
  CC    block/io.o
  CC    block/throttle-groups.o
  CC    block/nbd.o
  CC    block/nbd-client.o
  CC    block/sheepdog.o
  CC    block/accounting.o
  CC    block/dirty-bitmap.o
  CC    block/write-threshold.o
  CC    block/crypto.o
  CC    nbd/server.o
  CC    nbd/client.o
  CC    nbd/common.o
  CC    block/dmg.o
  CC    crypto/init.o
  CC    crypto/hash.o
  CC    crypto/hash-glib.o
  CC    crypto/aes.o
  CC    crypto/desrfb.o
  CC    crypto/cipher.o
  CC    crypto/tlscreds.o
  CC    crypto/tlscredsanon.o
  CC    crypto/tlscredsx509.o
  CC    crypto/tlssession.o
  CC    crypto/secret.o
  CC    crypto/random-platform.o
  CC    crypto/pbkdf.o
  CC    crypto/ivgen.o
  CC    crypto/ivgen-essiv.o
  CC    crypto/ivgen-plain.o
  CC    crypto/ivgen-plain64.o
  CC    crypto/afsplit.o
  CC    crypto/xts.o
  CC    crypto/block.o
  CC    crypto/block-qcow.o
  CC    crypto/block-luks.o
  CC    io/channel.o
  CC    io/channel-buffer.o
  CC    io/channel-command.o
  CC    io/channel-file.o
  CC    io/channel-socket.o
  CC    io/channel-tls.o
  CC    io/channel-watch.o
  CC    io/channel-websock.o
  CC    io/channel-util.o
  CC    io/task.o
  CC    qom/object.o
  CC    qom/container.o
  CC    qom/qom-qobject.o
  CC    qom/object_interfaces.o
  GEN   qemu-img-cmds.h
  CC    qemu-io.o
  CC    qemu-bridge-helper.o
  CC    blockdev.o
  CC    blockdev-nbd.o
  CC    iothread.o
  CC    qdev-monitor.o
  CC    device-hotplug.o
  CC    os-posix.o
  CC    qemu-char.o
  CC    page_cache.o
  CC    accel.o
  CC    bt-host.o
  CC    bt-vhci.o
  CC    dma-helpers.o
  CC    vl.o
  CC    tpm.o
  CC    device_tree.o
  GEN   qmp-marshal.c
  CC    qmp.o
  CC    hmp.o
  CC    tcg-runtime.o
  CC    audio/audio.o
  CC    audio/noaudio.o
  CC    audio/wavaudio.o
  CC    audio/mixeng.o
  CC    audio/sdlaudio.o
  CC    audio/ossaudio.o
  CC    audio/wavcapture.o
  CC    backends/rng.o
  CC    backends/rng-egd.o
  CC    backends/rng-random.o
  CC    backends/msmouse.o
  CC    backends/testdev.o
  CC    backends/tpm.o
  CC    backends/hostmem.o
  CC    backends/hostmem-ram.o
  CC    backends/hostmem-file.o
  CC    block/stream.o
  CC    block/backup.o
  CC    disas/arm.o
  CC    disas/i386.o
  CC    fsdev/qemu-fsdev-dummy.o
  CC    fsdev/qemu-fsdev-opts.o
  CC    hw/acpi/core.o
  CC    hw/acpi/piix4.o
  CC    hw/acpi/pcihp.o
  CC    hw/acpi/ich9.o
  CC    hw/acpi/tco.o
  CC    hw/acpi/cpu_hotplug.o
  CC    hw/acpi/memory_hotplug.o
  CC    hw/acpi/memory_hotplug_acpi_table.o
  CC    hw/acpi/cpu.o
  CC    hw/acpi/acpi_interface.o
  CC    hw/acpi/bios-linker-loader.o
  CC    hw/acpi/aml-build.o
  CC    hw/acpi/ipmi.o
  CC    hw/audio/sb16.o
  CC    hw/audio/es1370.o
  CC    hw/audio/ac97.o
  CC    hw/audio/fmopl.o
  CC    hw/audio/adlib.o
  CC    hw/audio/gus.o
  CC    hw/audio/gusemu_hal.o
  CC    hw/audio/gusemu_mixer.o
  CC    hw/audio/cs4231a.o
  CC    hw/audio/intel-hda.o
  CC    hw/audio/hda-codec.o
  CC    hw/audio/pcspk.o
  CC    hw/audio/wm8750.o
  CC    hw/audio/pl041.o
  CC    hw/audio/lm4549.o
  CC    hw/audio/marvell_88w8618.o
  CC    hw/block/block.o
  CC    hw/block/cdrom.o
  CC    hw/block/hd-geometry.o
  CC    hw/block/fdc.o
  CC    hw/block/m25p80.o
  CC    hw/block/nand.o
  CC    hw/block/pflash_cfi01.o
  CC    hw/block/pflash_cfi02.o
  CC    hw/block/ecc.o
  CC    hw/block/onenand.o
  CC    hw/block/nvme.o
  CC    hw/bt/core.o
  CC    hw/bt/l2cap.o
  CC    hw/bt/sdp.o
  CC    hw/bt/hci.o
  CC    hw/bt/hid.o
  CC    hw/bt/hci-csr.o
  CC    hw/char/ipoctal232.o
  CC    hw/char/parallel.o
  CC    hw/char/pl011.o
  CC    hw/char/serial.o
  CC    hw/char/serial-isa.o
  CC    hw/char/serial-pci.o
  CC    hw/char/virtio-console.o
  CC    hw/char/cadence_uart.o
  CC    hw/char/debugcon.o
  CC    hw/char/imx_serial.o
  CC    hw/core/qdev.o
  CC    hw/core/qdev-properties.o
  CC    hw/core/bus.o
  CC    hw/core/fw-path-provider.o
  CC    hw/core/irq.o
  CC    hw/core/hotplug.o
  CC    hw/core/ptimer.o
  CC    hw/core/sysbus.o
  CC    hw/core/machine.o
  CC    hw/core/null-machine.o
  CC    hw/core/loader.o
  CC    hw/core/qdev-properties-system.o
  CC    hw/core/register.o
  CC    hw/core/platform-bus.o
  CC    hw/display/ads7846.o
  CC    hw/display/cirrus_vga.o
  CC    hw/display/pl110.o
  CC    hw/display/ssd0303.o
  CC    hw/display/ssd0323.o
  CC    hw/display/vga-pci.o
  CC    hw/display/vga-isa.o
  CC    hw/display/vmware_vga.o
  CC    hw/display/blizzard.o
  CC    hw/display/exynos4210_fimd.o
  CC    hw/display/framebuffer.o
  CC    hw/display/tc6393xb.o
  CC    hw/dma/pl080.o
  CC    hw/dma/pl330.o
  CC    hw/dma/i8257.o
  CC    hw/dma/xlnx-zynq-devcfg.o
  CC    hw/gpio/max7310.o
  CC    hw/gpio/pl061.o
  CC    hw/gpio/zaurus.o
  CC    hw/gpio/gpio_key.o
  CC    hw/i2c/core.o
  CC    hw/i2c/smbus.o
  CC    hw/i2c/smbus_eeprom.o
  CC    hw/i2c/i2c-ddc.o
  CC    hw/i2c/versatile_i2c.o
  CC    hw/i2c/smbus_ich9.o
  CC    hw/i2c/pm_smbus.o
  CC    hw/i2c/bitbang_i2c.o
  CC    hw/i2c/exynos4210_i2c.o
  CC    hw/i2c/imx_i2c.o
  CC    hw/i2c/aspeed_i2c.o
  CC    hw/ide/core.o
  CC    hw/ide/atapi.o
  CC    hw/ide/qdev.o
  CC    hw/ide/pci.o
  CC    hw/ide/isa.o
  CC    hw/ide/piix.o
  CC    hw/ide/microdrive.o
  CC    hw/ide/ahci.o
  CC    hw/ide/ich.o
  CC    hw/input/hid.o
  CC    hw/input/lm832x.o
  CC    hw/input/pckbd.o
  CC    hw/input/pl050.o
  CC    hw/input/ps2.o
  CC    hw/input/stellaris_input.o
  CC    hw/input/tsc2005.o
  CC    hw/input/vmmouse.o
  CC    hw/input/virtio-input.o
  CC    hw/input/virtio-input-hid.o
  CC    hw/input/virtio-input-host.o
  CC    hw/intc/i8259_common.o
  CC    hw/intc/i8259.o
  CC    hw/intc/pl190.o
  CC    hw/intc/imx_avic.o
  CC    hw/intc/realview_gic.o
  CC    hw/intc/ioapic_common.o
  CC    hw/intc/arm_gic_common.o
  CC    hw/intc/arm_gic.o
  CC    hw/intc/arm_gicv2m.o
  CC    hw/intc/arm_gicv3_common.o
  CC    hw/intc/arm_gicv3.o
  CC    hw/intc/arm_gicv3_dist.o
  CC    hw/intc/arm_gicv3_redist.o
  CC    hw/ipack/ipack.o
  CC    hw/ipack/tpci200.o
  CC    hw/ipmi/ipmi.o
  CC    hw/ipmi/ipmi_bmc_sim.o
  CC    hw/ipmi/ipmi_bmc_extern.o
  CC    hw/ipmi/isa_ipmi_kcs.o
  CC    hw/ipmi/isa_ipmi_bt.o
  CC    hw/isa/isa-bus.o
  CC    hw/isa/apm.o
  CC    hw/mem/pc-dimm.o
  CC    hw/mem/nvdimm.o
  CC    hw/misc/applesmc.o
  CC    hw/misc/max111x.o
  CC    hw/misc/tmp105.o
  CC    hw/misc/debugexit.o
  CC    hw/misc/sga.o
  CC    hw/misc/pc-testdev.o
  CC    hw/misc/pci-testdev.o
  CC    hw/misc/arm_l2x0.o
  CC    hw/misc/arm_integrator_debug.o
  CC    hw/misc/a9scu.o
  CC    hw/misc/arm11scu.o
  CC    hw/net/ne2000.o
  CC    hw/net/eepro100.o
  CC    hw/net/pcnet-pci.o
  CC    hw/net/pcnet.o
  CC    hw/net/e1000.o
  CC    hw/net/e1000x_common.o
  CC    hw/net/net_tx_pkt.o
  CC    hw/net/net_rx_pkt.o
  CC    hw/net/e1000e.o
  CC    hw/net/e1000e_core.o
  CC    hw/net/rtl8139.o
  CC    hw/net/vmxnet3.o
  CC    hw/net/smc91c111.o
  CC    hw/net/lan9118.o
  CC    hw/net/ne2000-isa.o
  CC    hw/net/xgmac.o
  CC    hw/net/allwinner_emac.o
  CC    hw/net/imx_fec.o
  CC    hw/net/cadence_gem.o
  CC    hw/net/stellaris_enet.o
  CC    hw/net/rocker/rocker.o
  CC    hw/net/rocker/rocker_fp.o
  CC    hw/net/rocker/rocker_desc.o
  CC    hw/net/rocker/rocker_world.o
  CC    hw/net/rocker/rocker_of_dpa.o
  CC    hw/nvram/eeprom93xx.o
  CC    hw/nvram/fw_cfg.o
  CC    hw/pci-bridge/pci_bridge_dev.o
  CC    hw/pci-bridge/pci_expander_bridge.o
  CC    hw/pci-bridge/xio3130_upstream.o
  CC    hw/pci-bridge/xio3130_downstream.o
  CC    hw/pci-bridge/ioh3420.o
  CC    hw/pci-bridge/i82801b11.o
  CC    hw/pci-host/pam.o
  CC    hw/pci-host/versatile.o
  CC    hw/pci-host/piix.o
/tmp/qemu-test/src/hw/nvram/fw_cfg.c: In function ‘fw_cfg_dma_transfer’:
/tmp/qemu-test/src/hw/nvram/fw_cfg.c:330: warning: ‘read’ may be used uninitialized in this function
  CC    hw/pci-host/q35.o
  CC    hw/pci-host/gpex.o
  CC    hw/pci/pci.o
  CC    hw/pci/pci_bridge.o
  CC    hw/pci/msix.o
  CC    hw/pci/msi.o
  CC    hw/pci/shpc.o
  CC    hw/pci/slotid_cap.o
  CC    hw/pci/pci_host.o
  CC    hw/pci/pcie_host.o
  CC    hw/pci/pcie.o
  CC    hw/pci/pcie_aer.o
  CC    hw/pci/pcie_port.o
  CC    hw/pci/pci-stub.o
  CC    hw/pcmcia/pcmcia.o
  CC    hw/scsi/scsi-disk.o
  CC    hw/scsi/scsi-generic.o
  CC    hw/scsi/scsi-bus.o
  CC    hw/scsi/lsi53c895a.o
  CC    hw/scsi/mptsas.o
  CC    hw/scsi/mptconfig.o
  CC    hw/scsi/mptendian.o
  CC    hw/scsi/megasas.o
  CC    hw/scsi/vmw_pvscsi.o
  CC    hw/scsi/esp.o
  CC    hw/scsi/esp-pci.o
  CC    hw/sd/pl181.o
  CC    hw/sd/ssi-sd.o
  CC    hw/sd/sd.o
  CC    hw/sd/core.o
  CC    hw/sd/sdhci.o
  CC    hw/smbios/smbios.o
  CC    hw/smbios/smbios_type_38.o
  CC    hw/ssi/pl022.o
  CC    hw/ssi/ssi.o
  CC    hw/ssi/xilinx_spips.o
  CC    hw/ssi/aspeed_smc.o
  CC    hw/timer/arm_timer.o
  CC    hw/timer/arm_mptimer.o
  CC    hw/timer/a9gtimer.o
  CC    hw/timer/cadence_ttc.o
  CC    hw/timer/ds1338.o
  CC    hw/timer/hpet.o
  CC    hw/timer/i8254_common.o
  CC    hw/timer/i8254.o
  CC    hw/timer/pl031.o
  CC    hw/timer/twl92230.o
  CC    hw/timer/imx_epit.o
  CC    hw/timer/imx_gpt.o
  CC    hw/timer/stm32f2xx_timer.o
  CC    hw/timer/aspeed_timer.o
  CC    hw/tpm/tpm_tis.o
  CC    hw/tpm/tpm_passthrough.o
  CC    hw/tpm/tpm_util.o
  CC    hw/usb/core.o
  CC    hw/usb/combined-packet.o
  CC    hw/usb/bus.o
  CC    hw/usb/libhw.o
  CC    hw/usb/desc.o
  CC    hw/usb/desc-msos.o
  CC    hw/usb/hcd-uhci.o
  CC    hw/usb/hcd-ohci.o
  CC    hw/usb/hcd-ehci.o
  CC    hw/usb/hcd-ehci-pci.o
  CC    hw/usb/hcd-ehci-sysbus.o
  CC    hw/usb/hcd-xhci.o
  CC    hw/usb/hcd-musb.o
  CC    hw/usb/dev-hub.o
  CC    hw/usb/dev-hid.o
  CC    hw/usb/dev-wacom.o
  CC    hw/usb/dev-storage.o
  CC    hw/usb/dev-uas.o
  CC    hw/usb/dev-audio.o
  CC    hw/usb/dev-serial.o
  CC    hw/usb/dev-network.o
  CC    hw/usb/dev-bluetooth.o
  CC    hw/usb/dev-smartcard-reader.o
  CC    hw/usb/dev-mtp.o
  CC    hw/usb/host-stub.o
  CC    hw/virtio/virtio-rng.o
  CC    hw/virtio/virtio-pci.o
  CC    hw/virtio/virtio-bus.o
  CC    hw/virtio/virtio-mmio.o
  CC    hw/watchdog/watchdog.o
  CC    hw/watchdog/wdt_i6300esb.o
  CC    hw/watchdog/wdt_ib700.o
  CC    migration/migration.o
  CC    migration/socket.o
  CC    migration/fd.o
  CC    migration/exec.o
  CC    migration/tls.o
  CC    migration/vmstate.o
  CC    migration/qemu-file.o
  CC    migration/qemu-file-channel.o
  CC    migration/xbzrle.o
  CC    migration/postcopy-ram.o
  CC    migration/qjson.o
  CC    migration/block.o
  CC    net/net.o
  CC    net/queue.o
  CC    net/checksum.o
  CC    net/util.o
  CC    net/hub.o
  CC    net/socket.o
  CC    net/dump.o
  CC    net/eth.o
  CC    net/l2tpv3.o
  CC    net/tap.o
  CC    net/vhost-user.o
  CC    net/tap-linux.o
  CC    net/slirp.o
  CC    net/filter.o
  CC    net/filter-buffer.o
  CC    net/filter-mirror.o
  CC    qom/cpu.o
  CC    replay/replay.o
  CC    replay/replay-internal.o
  CC    replay/replay-events.o
  CC    replay/replay-time.o
/tmp/qemu-test/src/replay/replay-internal.c: In function ‘replay_put_array’:
/tmp/qemu-test/src/replay/replay-internal.c:68: warning: ignoring return value of ‘fwrite’, declared with attribute warn_unused_result
  CC    replay/replay-input.o
  CC    replay/replay-char.o
  CC    slirp/cksum.o
  CC    slirp/if.o
  CC    slirp/ip_icmp.o
  CC    slirp/ip6_icmp.o
  CC    slirp/ip6_input.o
  CC    slirp/ip6_output.o
  CC    slirp/ip_input.o
  CC    slirp/ip_output.o
  CC    slirp/dnssearch.o
  CC    slirp/dhcpv6.o
  CC    slirp/slirp.o
  CC    slirp/mbuf.o
  CC    slirp/misc.o
  CC    slirp/sbuf.o
  CC    slirp/socket.o
  CC    slirp/tcp_input.o
  CC    slirp/tcp_output.o
/tmp/qemu-test/src/slirp/tcp_input.c: In function ‘tcp_input’:
/tmp/qemu-test/src/slirp/tcp_input.c:219: warning: ‘save_ip.ip_p’ may be used uninitialized in this function
/tmp/qemu-test/src/slirp/tcp_input.c:219: warning: ‘save_ip.ip_len’ may be used uninitialized in this function
/tmp/qemu-test/src/slirp/tcp_input.c:219: warning: ‘save_ip.ip_tos’ may be used uninitialized in this function
/tmp/qemu-test/src/slirp/tcp_input.c:219: warning: ‘save_ip.ip_id’ may be used uninitialized in this function
/tmp/qemu-test/src/slirp/tcp_input.c:219: warning: ‘save_ip.ip_off’ may be used uninitialized in this function
/tmp/qemu-test/src/slirp/tcp_input.c:219: warning: ‘save_ip.ip_ttl’ may be used uninitialized in this function
/tmp/qemu-test/src/slirp/tcp_input.c:219: warning: ‘save_ip.ip_sum’ may be used uninitialized in this function
/tmp/qemu-test/src/slirp/tcp_input.c:219: warning: ‘save_ip.ip_src.s_addr’ may be used uninitialized in this function
/tmp/qemu-test/src/slirp/tcp_input.c:219: warning: ‘save_ip.ip_dst.s_addr’ may be used uninitialized in this function
/tmp/qemu-test/src/slirp/tcp_input.c:220: warning: ‘save_ip6.ip_nh’ may be used uninitialized in this function
  CC    slirp/tcp_subr.o
  CC    slirp/tcp_timer.o
  CC    slirp/udp.o
  CC    slirp/udp6.o
  CC    slirp/bootp.o
  CC    slirp/tftp.o
  CC    slirp/arp_table.o
  CC    slirp/ndp_table.o
  CC    ui/keymaps.o
  CC    ui/console.o
  CC    ui/cursor.o
  CC    ui/qemu-pixman.o
  CC    ui/input.o
  CC    ui/input-keymap.o
  CC    ui/input-legacy.o
  CC    ui/input-linux.o
  CC    ui/sdl.o
  CC    ui/sdl_zoom.o
  CC    ui/x_keymap.o
  CC    ui/vnc.o
  CC    ui/vnc-enc-zlib.o
  CC    ui/vnc-enc-hextile.o
  CC    ui/vnc-enc-tight.o
  CC    ui/vnc-palette.o
  CC    ui/vnc-enc-zrle.o
  CC    ui/vnc-auth-vencrypt.o
  CC    ui/vnc-ws.o
  CC    ui/vnc-jobs.o
  LINK  tests/qemu-iotests/socket_scm_helper
  CC    qga/commands.o
  CC    qga/guest-agent-command-state.o
  CC    qga/main.o
  CC    qga/commands-posix.o
  CC    qga/channel-posix.o
  CC    qga/qapi-generated/qga-qapi-types.o
  CC    qga/qapi-generated/qga-qapi-visit.o
  CC    qga/qapi-generated/qga-qmp-marshal.o
  CC    qmp-introspect.o
  CC    qapi-types.o
  CC    qapi-visit.o
  CC    qapi-event.o
  AR    libqemustub.a
  CC    qemu-img.o
  AS    optionrom/multiboot.o
  CC    qmp-marshal.o
  AS    optionrom/linuxboot.o
  CC    optionrom/linuxboot_dma.o
  CC    trace/generated-events.o
cc: unrecognized option '-no-integrated-as'
cc: unrecognized option '-no-integrated-as'
  AS    optionrom/kvmvapic.o
  Building optionrom/multiboot.img
  Building optionrom/linuxboot.img
  Building optionrom/linuxboot_dma.img
  Building optionrom/kvmvapic.img
  Building optionrom/multiboot.raw
  Building optionrom/linuxboot.raw
  Building optionrom/linuxboot_dma.raw
  Building optionrom/kvmvapic.raw
  AR    libqemuutil.a
  Signing optionrom/linuxboot.bin
  Signing optionrom/linuxboot_dma.bin
  Signing optionrom/kvmvapic.bin
  LINK  qemu-ga
  LINK  ivshmem-client
  Signing optionrom/multiboot.bin
  LINK  ivshmem-server
  LINK  qemu-nbd
  LINK  qemu-img
  LINK  qemu-io
  LINK  qemu-bridge-helper
  GEN   x86_64-softmmu/hmp-commands.h
  GEN   x86_64-softmmu/hmp-commands-info.h
  GEN   x86_64-softmmu/qmp-commands-old.h
  GEN   x86_64-softmmu/config-target.h
  GEN   aarch64-softmmu/hmp-commands.h
  GEN   aarch64-softmmu/hmp-commands-info.h
  GEN   aarch64-softmmu/qmp-commands-old.h
  GEN   aarch64-softmmu/config-target.h
  CC    x86_64-softmmu/exec.o
  CC    x86_64-softmmu/translate-all.o
  CC    x86_64-softmmu/cpu-exec.o
  CC    x86_64-softmmu/translate-common.o
  CC    x86_64-softmmu/cpu-exec-common.o
  CC    x86_64-softmmu/tcg/tcg.o
  CC    x86_64-softmmu/tcg/tcg-op.o
  CC    x86_64-softmmu/tcg/optimize.o
  CC    x86_64-softmmu/tcg/tcg-common.o
  CC    aarch64-softmmu/exec.o
  CC    x86_64-softmmu/fpu/softfloat.o
/tmp/qemu-test/src/tcg/optimize.c: In function ‘tcg_optimize_mb’:
/tmp/qemu-test/src/tcg/optimize.c:554: error: ‘INDEX_op_mb’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:554: error: (Each undeclared identifier is reported only once
/tmp/qemu-test/src/tcg/optimize.c:554: error: for each function it appears in.)
/tmp/qemu-test/src/tcg/optimize.c:556: error: ‘TCGBar’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:556: error: expected ‘;’ before ‘curr_mb_type’
/tmp/qemu-test/src/tcg/optimize.c:557: error: expected ‘;’ before ‘prev_mb_type’
/tmp/qemu-test/src/tcg/optimize.c:559: error: ‘curr_mb_type’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:559: error: ‘prev_mb_type’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:560: error: ‘TCG_BAR_STRL’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:560: error: ‘TCG_BAR_SC’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:569: error: ‘TCG_BAR_LDAQ’ undeclared (first use in this function)
make[1]: *** [tcg/optimize.o] Error 1
make[1]: *** Waiting for unfinished jobs....
  CC    aarch64-softmmu/translate-all.o
  CC    aarch64-softmmu/cpu-exec.o
  CC    aarch64-softmmu/translate-common.o
  CC    aarch64-softmmu/cpu-exec-common.o
  CC    aarch64-softmmu/tcg/tcg.o
  CC    aarch64-softmmu/tcg/tcg-op.o
  CC    aarch64-softmmu/tcg/optimize.o
  CC    aarch64-softmmu/tcg/tcg-common.o
  CC    aarch64-softmmu/fpu/softfloat.o
  CC    aarch64-softmmu/disas.o
  GEN   aarch64-softmmu/gdbstub-xml.c
/tmp/qemu-test/src/tcg/optimize.c: In function ‘tcg_optimize_mb’:
/tmp/qemu-test/src/tcg/optimize.c:554: error: ‘INDEX_op_mb’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:554: error: (Each undeclared identifier is reported only once
/tmp/qemu-test/src/tcg/optimize.c:554: error: for each function it appears in.)
/tmp/qemu-test/src/tcg/optimize.c:556: error: ‘TCGBar’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:556: error: expected ‘;’ before ‘curr_mb_type’
/tmp/qemu-test/src/tcg/optimize.c:557: error: expected ‘;’ before ‘prev_mb_type’
/tmp/qemu-test/src/tcg/optimize.c:559: error: ‘curr_mb_type’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:559: error: ‘prev_mb_type’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:560: error: ‘TCG_BAR_STRL’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:560: error: ‘TCG_BAR_SC’ undeclared (first use in this function)
/tmp/qemu-test/src/tcg/optimize.c:569: error: ‘TCG_BAR_LDAQ’ undeclared (first use in this function)
make[1]: *** [tcg/optimize.o] Error 1
make[1]: *** Waiting for unfinished jobs....
make: *** [subdir-x86_64-softmmu] Error 2
make: *** Waiting for unfinished jobs....
make: *** [subdir-aarch64-softmmu] Error 2
tests/docker/Makefile.include:104: recipe for target 'docker-run-test-quick@centos6' failed
make: *** [docker-run-test-quick@centos6] Error 1
=== OUTPUT END ===

Test command exited with code: 2


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-devel@freelists.org
Pranith Kumar Aug. 10, 2016, 5:58 p.m. UTC | #2
Hi Richard,

On Wed, Aug 10, 2016 at 6:44 AM, Richard Henderson <rth@twiddle.net> wrote:
> On 08/10/2016 04:42 AM, Pranith Kumar wrote:
>>
>> +/* Eliminate duplicate and unnecessary fence instructions */
>> +void tcg_optimize_mb(TCGContext *s)
>> +{
>> +    int oi, oi_next;
>> +    TCGArg prev_op_mb = -1;
>> +    TCGOp *prev_op = NULL;
>> +
>> +    for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
>> +        TCGOp *op = &s->gen_op_buf[oi];
>> +        TCGArg *args = &s->gen_opparam_buf[op->args];
>> +        TCGOpcode opc = op->opc;
>> +
>> +        switch (opc) {
>> +        case INDEX_op_mb:
>> +        {
>> +            TCGBar curr_mb_type = args[0] & 0xF0;
>> +            TCGBar prev_mb_type = prev_op_mb & 0xF0;
>> +
>> +            if (curr_mb_type == prev_mb_type ||
>> +                (curr_mb_type == TCG_BAR_STRL && prev_mb_type ==
>> TCG_BAR_SC)) {
>
>
> As I said before, I don't think you should be doing any of this if
> prev_op_mb == -1.  I think that's the first thing you should test within
> INDEX_op_mb.

If prev_op_mb is -1, none of the conditions evaluate to true and we
will fall through. Did you want me to check this as an optimization or
am I missing your point?

>
>> +                /* Remove the current weaker barrier op. The previous
>> +                 * barrier is stronger and sufficient.
>> +                 * mb; strl => mb; st
>> +                 */
>> +                tcg_op_remove(s, op);
>> +                op = prev_op;
>
>
> As I said before, surely you have to merge the low 4 bits as well.

Since we are removing the weaker barrier and leaving the stronger
barrier, I think just updating args[0] to have the stronger barrier
semantics should be enough.

                tcg_op_remove(s, op);
                op = prev_op;
                args[0] = prev_op_mb;
                break;

Is there something I am missing when you say merge?

>
>> +        case INDEX_op_ld8u_i32:
>> +        case INDEX_op_ld8u_i64:
>> +        case INDEX_op_ld8s_i32:
>> +        case INDEX_op_ld8s_i64:
>> +        case INDEX_op_ld16u_i32:
>> +        case INDEX_op_ld16u_i64:
>> +        case INDEX_op_ld16s_i32:
>> +        case INDEX_op_ld16s_i64:
>> +        case INDEX_op_ld_i32:
>> +        case INDEX_op_ld32u_i64:
>> +        case INDEX_op_ld32s_i64:
>> +        case INDEX_op_ld_i64:
>> +        case INDEX_op_st8_i32:
>> +        case INDEX_op_st8_i64:
>> +        case INDEX_op_st16_i32:
>> +        case INDEX_op_st16_i64:
>> +        case INDEX_op_st_i32:
>> +        case INDEX_op_st32_i64:
>> +        case INDEX_op_st_i64:
>
>
> None of these are guest memory operations.  These are host access to the ENV
> structure, primarily for cpu registers that aren't common enough to warrant
> a TCG temporary.
>
> You wanted to be checking for
>
>   INDEX_op_qemu_ld_i32
>   INDEX_op_qemu_ld_i64
>   INDEX_op_qemu_st_i32
>   INDEX_op_qemu_st_i64
>
> which are the guest memory operations.
>

Yes, I got confused. I will fix this.


>> +        case INDEX_op_call:
>> +            prev_op_mb = -1;
>> +            prev_op = NULL;
>> +            break;
>
>
> Probably you could skip the calls to "pure" helpers.  I.e. those with
> TCG_CALL_NO_RWG_SE.  But it's probably not that important and we can leave
> it for another day.
>
>
> r~
diff mbox

Patch

diff --git a/tcg/optimize.c b/tcg/optimize.c
index cffe89b..5963a39 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -538,6 +538,90 @@  static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
     return false;
 }
 
+/* Eliminate duplicate and unnecessary fence instructions */
+void tcg_optimize_mb(TCGContext *s)
+{
+    int oi, oi_next;
+    TCGArg prev_op_mb = -1;
+    TCGOp *prev_op = NULL;
+
+    for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
+        TCGOp *op = &s->gen_op_buf[oi];
+        TCGArg *args = &s->gen_opparam_buf[op->args];
+        TCGOpcode opc = op->opc;
+
+        switch (opc) {
+        case INDEX_op_mb:
+        {
+            TCGBar curr_mb_type = args[0] & 0xF0;
+            TCGBar prev_mb_type = prev_op_mb & 0xF0;
+
+            if (curr_mb_type == prev_mb_type ||
+                (curr_mb_type == TCG_BAR_STRL && prev_mb_type == TCG_BAR_SC)) {
+                /* Remove the current weaker barrier op. The previous
+                 * barrier is stronger and sufficient.
+                 * mb; strl => mb; st
+                 */
+                tcg_op_remove(s, op);
+                op = prev_op;
+                break;
+            } else if (curr_mb_type == TCG_BAR_SC &&
+                       prev_mb_type == TCG_BAR_LDAQ) {
+                /* Remove the previous weaker barrier op. The current
+                 * barrier is stronger and sufficient.
+                 * ldaq; mb => ld; mb
+                 */
+                tcg_op_remove(s, prev_op);
+            } else if (curr_mb_type == TCG_BAR_STRL &&
+                       prev_mb_type == TCG_BAR_LDAQ) {
+                /* Consecutive load-acquire and store-release barriers
+                 * can be merged into one stronger SC barrier
+                 * ldaq; strl => ld; mb; st
+                 */
+                args[0] = (args[0] & 0x0F) | TCG_BAR_SC;
+                tcg_op_remove(s, prev_op);
+            }
+            prev_op_mb = args[0];
+            prev_op = op;
+            break;
+        }
+        case INDEX_op_insn_start:
+            break;
+        case INDEX_op_ld8u_i32:
+        case INDEX_op_ld8u_i64:
+        case INDEX_op_ld8s_i32:
+        case INDEX_op_ld8s_i64:
+        case INDEX_op_ld16u_i32:
+        case INDEX_op_ld16u_i64:
+        case INDEX_op_ld16s_i32:
+        case INDEX_op_ld16s_i64:
+        case INDEX_op_ld_i32:
+        case INDEX_op_ld32u_i64:
+        case INDEX_op_ld32s_i64:
+        case INDEX_op_ld_i64:
+        case INDEX_op_st8_i32:
+        case INDEX_op_st8_i64:
+        case INDEX_op_st16_i32:
+        case INDEX_op_st16_i64:
+        case INDEX_op_st_i32:
+        case INDEX_op_st32_i64:
+        case INDEX_op_st_i64:
+        case INDEX_op_call:
+            prev_op_mb = -1;
+            prev_op = NULL;
+            break;
+        default:
+            if (tcg_op_defs[opc].flags & TCG_OPF_BB_END) {
+                prev_op_mb = -1;
+                prev_op = NULL;
+            }
+            break;
+        }
+
+        oi_next = op->next;
+    }
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 void tcg_optimize(TCGContext *s)
 {
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 42417bd..1db319e 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2587,6 +2587,10 @@  int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
         }
     }
 
+#ifdef USE_TCG_OPTIMIZATIONS
+    tcg_optimize_mb(s);
+#endif
+
 #ifdef CONFIG_PROFILER
     s->la_time += profile_getclock();
 #endif
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 9ed78dc..79bb5bb 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -921,6 +921,7 @@  void tcg_op_remove(TCGContext *s, TCGOp *op);
 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
 
+void tcg_optimize_mb(TCGContext *s);
 void tcg_optimize(TCGContext *s);
 
 /* only used for debugging purposes */