diff mbox

[01/50] scripts: add script to build QEMU and analyze inclusions

Message ID 1460147350-7601-2-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paolo Bonzini April 8, 2016, 8:28 p.m. UTC
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 scripts/analyze-inclusions | 89 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 scripts/analyze-inclusions

Comments

Markus Armbruster April 18, 2016, 1:10 p.m. UTC | #1
Paolo Bonzini <pbonzini@redhat.com> writes:

> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  scripts/analyze-inclusions | 89 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 89 insertions(+)
>  create mode 100644 scripts/analyze-inclusions
>
> diff --git a/scripts/analyze-inclusions b/scripts/analyze-inclusions
> new file mode 100644
> index 0000000..e241bd4
> --- /dev/null
> +++ b/scripts/analyze-inclusions
> @@ -0,0 +1,89 @@
> +#! /bin/sh
> +#
> +# Copyright (C) 2016 Red Hat, Inc.
> +#
> +# Author: Paolo Bonzini <pbonzini@redhat.com>
> +#
> +# Print statistics about header file inclusions.
> +# The script configures and builds QEMU itself in a "+build"
> +# subdirectory which is left around when the script exits.
> +# To run the statistics on a pre-existing "+build" directory,
> +# pass "--no-build" as the first argument on the command line.
> +# Any other command line is passed directly to "make" (so
> +# you can for example pass a "-j" argument suitable for your
> +# system).
> +#
> +# Inspired by a post by Markus Armbruster.
> +
> +mkdir -p +build
> +cd +build
> +if test "x$1" != "x--no-build"; then
> +  test -f Makefile && make distclean
> +  ../configure
> +  make "$@"
> +fi

Instead of hardcoding +build, I'd require the user to run this in a
build tree of his choice.

Hardcoding +build is harder to misuse, though, since you get a clean
build by default.

Your choice.

Unfortunate: "mkdir -p +build" clobbers an existing symbolic link from
+build to the build tree of my choice.

> +
> +QEMU_CFLAGS=$(sed -n s/^QEMU_CFLAGS=//p config-host.mak)
> +QEMU_INCLUDES=$(sed -n s/^QEMU_INCLUDES=//p config-host.mak | \
> +    sed 's/$(SRC_PATH)/../g' )
> +CFLAGS=$(sed -n s/^CFLAGS=//p config-host.mak)
> +
> +grep_include() {
> +  find . -name "*.d" | xargs grep -l "$@" | wc -l

More robust against funny names would be:

     find . -name "*.d" -exec grep -l {} + | wc -l

Also slightly more efficient.  Neither matters much here, though.

> +}
> +
> +echo Found $(find . -name "*.d" | wc -l) object files
> +echo $(grep_include -F 'include/qemu-common.h') files include qemu-common.h
> +echo $(grep_include -F 'hw/hw.h') files include hw/hw.h
> +echo $(grep_include 'target-[a-z0-9]*/cpu\.h') files include cpu.h
> +echo $(grep_include -F 'qapi-types.h') files include qapi-types.h
> +echo $(grep_include -F 'trace/generated-tracers.h') files include generated-tracers.h
> +echo $(grep_include -F 'qapi/error.h') files include qapi/error.h
> +echo $(grep_include -F 'qom/object.h') files include qom/object.h
> +echo $(grep_include -F 'block/aio.h') files include block/aio.h
> +echo $(grep_include -F 'exec/memory.h') files include exec/memory.h
> +echo $(grep_include -F 'fpu/softfloat.h') files include fpu/softfloat.h
> +echo $(grep_include -F 'qemu/bswap.h') files include qemu/bswap.h
> +echo

How did you select these headers?

Instead of hardcoding a few well-known headers, we could count
everything.  We'd probably want to suppress the long tail by default.

Here's the ad hoc bash hackery I used for that (assumes source tree in
..):

    for i in `find . -name \*.d`
    do
        sed -n 's/\.h:$/.h/p' $i | sort -u
    done | sort | uniq -c | {
        declare -A h
        while read n f
        do
            [ -e $f ] || f=${f#../}
            [ -e $f ] || f=x86_64-softmmu/$f
            [ -e $f ]
            let 'h[$f] += n'
        done
        for f in ${!h[@]}
        do
            n=${h[$f]}
            s=`wc -c $f | sed 's/ .*//'`
            printf "%9d %7d %6d %s\n" $((s*n)) $s $n $f
        done
    } | sort -nr

Prints four columns: header size * number of inclusions, header size,
number of inclusions, header file name.

> +
> +awk1='
> +    /^# / { file = $3;next }
> +    NR>1 { bytes[file]+=length; lines[file]++ }

Your #bytes is off by one, because AWK chops off the newlines.  I think
you want length() + 1.

From the gawk docs:

          NOTE: In older versions of 'awk', the 'length()' function
          could be called without any parentheses.  Doing so is
          considered poor practice, although the 2008 POSIX standard
          explicitly allows it, to support historical practice.  For
          programs to be maximally portable, always supply the
          parentheses.

> +    END { for(i in lines) print i,lines[i],bytes[i] }'
> +
> +awk2='
> +    {tot_l+=$2;tot_b+=$3;tot_f++}
> +    /\/usr.*\/glib/ {glib_l+=$2;glib_b+=$3;glib_f++;next}
> +    /\/usr/ {sys_l+=$2;sys_b+=$3;sys_f++;next}
> +    {qemu_l+=$2;qemu_b+=$3;qemu_f++;next}
> +    END {
> +      printf "%s\t %s\t %s\t %s\n", "lines", "bytes", "files", "source"
> +      printf "%s\t %s\t %s\t %s\n", qemu_l, qemu_b, qemu_f, "QEMU"
> +      printf "%s\t %s\t %s\t %s\n", sys_l, sys_b, sys_f, "system"
> +      printf "%s\t %s\t %s\t %s\n", glib_l, glib_b, glib_f, "glib"
> +      printf "%s\t %s\t %s\t %s\n", tot_l, tot_b, tot_f, "total"
> +    }'

For comparision, here's how I hacked this up:

stats()
{
    n=$1
    shift
    awk '/^#/ { c["#"]++; s["#"]+=length($0)+1; f=$3; next }
{ c[f]++; s[f]+=length($0)+1 }
END { for (i in c) {
        printf "%8d %7d %s\n", c[i], s[i], i; n++
        ct+=c[i]; st+=s[i]
    }
    printf "%8d %7d %d\n", ct, st, n }' $n.i | sort -k 3 >$n.out
    echo
    echo "$@"
    echo "  #lines  KiBytes  #files  source"
    awk 'function pr(c, s, n, t) { printf "%8d %7d %7d   %s\n", c, s/1024, n, t }
{ tc+=$1; ts+=$2; tn++ }
$3 ~ /\/glib-2.0\// { gc+=$1; gs+=$2; gn++; next }
$3 ~ /^"\/usr\// { uc+=$1; us+=$2; un++; next }
$3 == "#" { hc+=$1; hs+=$2; hn++; next }
$3 !~ /^[0-9]/ { qc+=$1; qs+=$2; qn++ }
END { pr(qc, qs, qn, "QEMU")
    pr(uc, us, un, "system")
    pr(gc, gs, gn, "GLib")
    pr(hc, hs, hn, "# lines")
    pr(qc+uc+gc+hc, qs+us+gs+hs, qn+un+gn+hn, "total") }' $n.out
}

> +
> +analyze() {
> +  cc $QEMU_CFLAGS $QEMU_INCLUDES $CFLAGS  -E -o - "$@" | \
> +    awk "$awk1" | awk "$awk2"
> +  echo
> +}
> +
> +echo osdep.h:
> +analyze ../include/qemu/osdep.h
> +
> +echo qemu-common.h:
> +analyze  -include ../include/qemu/osdep.h ../include/qemu-common.h
> +
> +echo hw/hw.h:
> +analyze -include ../include/qemu/osdep.h ../include/hw/hw.h
> +
> +echo trace/generated-tracers.h:
> +analyze -include ../include/qemu/osdep.h trace/generated-tracers.h
> +
> +echo target-i386/cpu.h:
> +analyze -DNEED_CPU_H -I../target-i386 -Ii386-softmmu -include ../include/qemu/osdep.h ../target-i386/cpu.h
> +
> +echo hw/hw.h + NEED_CPU_H:
> +analyze -DNEED_CPU_H -I../target-i386 -Ii386-softmmu -include ../include/qemu/osdep.h ../include/hw/hw.h

We want to watch commonly included big headers, especially the ones that
are prone to indirect inclusion.  These will change as we go.

Output of my header counting bash script (first 64 lines appended)
provides possible additional initial candidates.


479379846  124806   3841 qapi-types.h
199662236   55756   3581 /work/armbru/qemu/include/qom/object.h
187691645   53857   3485 /work/armbru/qemu/include/exec/memory.h
118894840   30643   3880 /work/armbru/qemu/include/fpu/softfloat.h
109943680  124936    880 trace/generated-events.h
 88524072   27022   3276 /work/armbru/qemu/include/qom/cpu.h
 82757301   46519   1779 /work/armbru/qemu/include/migration/vmstate.h
 82340280   21510   3828 /work/armbru/qemu/include/qemu/queue.h
 63362110   19259   3290 /work/armbru/qemu/include/disas/bfd.h
 62800785   26667   2355 /work/armbru/qemu/include/qemu/timer.h
 52975068   13828   3831 /work/armbru/qemu/include/qemu/atomic.h
 51315482   16442   3121 /work/armbru/qemu/include/exec/exec-all.h
 48329904   13944   3466 /work/armbru/qemu/include/hw/qdev-core.h
 47768052   12508   3819 /work/armbru/qemu/include/qemu/host-utils.h
 45446418   12603   3606 /work/armbru/qemu/include/qemu/bitops.h
 44024926  102146    431 /work/armbru/qemu/target-ppc/cpu.h
 38564586   79351    486 /work/armbru/qemu/target-arm/cpu.h
 37448181    9459   3959 /work/armbru/qemu/include/qemu/osdep.h
 35412449    9263   3823 /work/armbru/qemu/include/qemu/bswap.h
 34768410   84801    410 /work/armbru/qemu/linux-user/syscall_defs.h
 32384620    8180   3959 /work/armbru/qemu/include/glib-compat.h
 30675274    8722   3517 /work/armbru/qemu/include/qemu/bitmap.h
 29553480   14487   2040 /work/armbru/qemu/include/block/aio.h
 28660968   49077    584 /work/armbru/qemu/include/standard-headers/linux/pci_regs.h
 26771938    8578   3121 /work/armbru/qemu/include/exec/cpu-all.h
 26076375   23179   1125 /work/armbru/qemu/include/block/block.h
 23502126    6154   3819 /work/armbru/qemu/include/qemu/option.h
 21961012    5758   3814 /work/armbru/qemu/include/qemu-common.h
 20414844   17814   1146 /work/armbru/qemu/include/sysemu/kvm.h
 20357714   34043    598 /work/armbru/qemu/tcg/tcg.h
 17830273    5713   3121 /work/armbru/qemu/include/exec/cpu-defs.h
 17747303   34867    509 /work/armbru/qemu/target-mips/cpu.h
 17693696   59776    296 /work/armbru/qemu/include/elf.h
 17242542   10107   1706 /work/armbru/qemu/include/migration/qemu-file.h
 16867400   11275   1496 /work/armbru/qemu/include/hw/qdev-properties.h
 16589807    4709   3523 /work/armbru/qemu/include/exec/cpu-common.h
 15574706    3934   3959 /work/armbru/qemu/include/qemu/typedefs.h
 15539459    4979   3121 /work/armbru/qemu/tcg/i386/tcg-target.h
 14808486   10241   1446 /work/armbru/qemu/include/qemu/main-loop.h
 14653088    4201   3488 /work/armbru/qemu/include/qemu/rcu.h
 14620587    3693   3959 config-host.h
 14311504   24506    584 /work/armbru/qemu/include/hw/pci/pci.h
 13875582   45794    303 /work/armbru/qemu/target-i386/cpu.h
 13025073   10027   1299 /work/armbru/qemu/include/qapi/error.h
 12931152    3657   3536 /work/armbru/qemu/include/qemu/log.h
 11520690    2910   3959 /work/armbru/qemu/include/qemu/compiler.h
 10893750    8750   1245 /work/armbru/qemu/include/sysemu/sysemu.h
 10214680    2674   3820 /work/armbru/qemu/include/qapi/qmp/qdict.h
 10129776    2649   3824 /work/armbru/qemu/include/qapi/qmp/qobject.h
  9114743    2629   3467 /work/armbru/qemu/include/hw/hotplug.h
  8747820   38034    230 /work/armbru/qemu/tcg/tcg-op.h
  8147622    2058   3959 /work/armbru/qemu/include/sysemu/os-posix.h
  8082800    6680   1210 /work/armbru/qemu/include/qemu/iov.h
  7691673    2013   3821 /work/armbru/qemu/include/qemu/module.h
  7522880   23509    320 qmp-commands.h
  7284094    6469   1126 /work/armbru/qemu/include/qemu/hbitmap.h
  7214376    1976   3651 /work/armbru/qemu/include/qemu/thread.h
  6756243    2721   2483 /work/armbru/qemu/include/qemu/int128.h
  6706616    5842   1148 /work/armbru/qemu/include/qemu/coroutine.h
  6601240   10445    632 /work/armbru/qemu/include/exec/cpu_ldst.h
  6306784    1804   3496 /work/armbru/qemu/include/hw/irq.h
  6285708   15444    407 /work/armbru/qemu/linux-user/qemu.h
  6109779    1599   3821 /work/armbru/qemu/include/qapi/qmp/qlist.h
  5626144    1613   3488 /work/armbru/qemu/include/exec/memattrs.h
Alex Bennée April 20, 2016, 7:47 p.m. UTC | #2
Paolo Bonzini <pbonzini@redhat.com> writes:

> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  scripts/analyze-inclusions | 89 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 89 insertions(+)
>  create mode 100644 scripts/analyze-inclusions
>
> diff --git a/scripts/analyze-inclusions b/scripts/analyze-inclusions

.sh extension to make it clear what it is?

> new file mode 100644

OK the script directory is inconsistent but if we want to use it for
automated testing it should be executable in the checkout.

> index 0000000..e241bd4
> --- /dev/null
> +++ b/scripts/analyze-inclusions
> @@ -0,0 +1,89 @@
> +#! /bin/sh
> +#
> +# Copyright (C) 2016 Red Hat, Inc.
> +#
> +# Author: Paolo Bonzini <pbonzini@redhat.com>
> +#
> +# Print statistics about header file inclusions.
> +# The script configures and builds QEMU itself in a "+build"
> +# subdirectory which is left around when the script exits.
> +# To run the statistics on a pre-existing "+build" directory,
> +# pass "--no-build" as the first argument on the command line.
> +# Any other command line is passed directly to "make" (so
> +# you can for example pass a "-j" argument suitable for your
> +# system).
> +#
> +# Inspired by a post by Markus Armbruster.
> +
> +mkdir -p +build
> +cd +build
> +if test "x$1" != "x--no-build"; then
> +  test -f Makefile && make distclean
> +  ../configure
> +  make "$@"
> +fi
> +
> +QEMU_CFLAGS=$(sed -n s/^QEMU_CFLAGS=//p config-host.mak)
> +QEMU_INCLUDES=$(sed -n s/^QEMU_INCLUDES=//p config-host.mak | \
> +    sed 's/$(SRC_PATH)/../g' )
> +CFLAGS=$(sed -n s/^CFLAGS=//p config-host.mak)
> +
> +grep_include() {
> +  find . -name "*.d" | xargs grep -l "$@" | wc -l
> +}
> +
> +echo Found $(find . -name "*.d" | wc -l) object files
> +echo $(grep_include -F 'include/qemu-common.h') files include qemu-common.h
> +echo $(grep_include -F 'hw/hw.h') files include hw/hw.h
> +echo $(grep_include 'target-[a-z0-9]*/cpu\.h') files include cpu.h
> +echo $(grep_include -F 'qapi-types.h') files include qapi-types.h
> +echo $(grep_include -F 'trace/generated-tracers.h') files include generated-tracers.h
> +echo $(grep_include -F 'qapi/error.h') files include qapi/error.h
> +echo $(grep_include -F 'qom/object.h') files include qom/object.h
> +echo $(grep_include -F 'block/aio.h') files include block/aio.h
> +echo $(grep_include -F 'exec/memory.h') files include exec/memory.h
> +echo $(grep_include -F 'fpu/softfloat.h') files include fpu/softfloat.h
> +echo $(grep_include -F 'qemu/bswap.h') files include qemu/bswap.h
> +echo
> +
> +awk1='
> +    /^# / { file = $3;next }
> +    NR>1 { bytes[file]+=length; lines[file]++ }
> +    END { for(i in lines) print i,lines[i],bytes[i] }'
> +
> +awk2='
> +    {tot_l+=$2;tot_b+=$3;tot_f++}
> +    /\/usr.*\/glib/ {glib_l+=$2;glib_b+=$3;glib_f++;next}
> +    /\/usr/ {sys_l+=$2;sys_b+=$3;sys_f++;next}
> +    {qemu_l+=$2;qemu_b+=$3;qemu_f++;next}
> +    END {
> +      printf "%s\t %s\t %s\t %s\n", "lines", "bytes", "files", "source"
> +      printf "%s\t %s\t %s\t %s\n", qemu_l, qemu_b, qemu_f, "QEMU"
> +      printf "%s\t %s\t %s\t %s\n", sys_l, sys_b, sys_f, "system"
> +      printf "%s\t %s\t %s\t %s\n", glib_l, glib_b, glib_f, "glib"
> +      printf "%s\t %s\t %s\t %s\n", tot_l, tot_b, tot_f, "total"
> +    }'
> +
> +analyze() {
> +  cc $QEMU_CFLAGS $QEMU_INCLUDES $CFLAGS  -E -o - "$@" | \
> +    awk "$awk1" | awk "$awk2"
> +  echo
> +}
> +
> +echo osdep.h:
> +analyze ../include/qemu/osdep.h
> +
> +echo qemu-common.h:
> +analyze  -include ../include/qemu/osdep.h ../include/qemu-common.h
> +
> +echo hw/hw.h:
> +analyze -include ../include/qemu/osdep.h ../include/hw/hw.h
> +
> +echo trace/generated-tracers.h:
> +analyze -include ../include/qemu/osdep.h trace/generated-tracers.h
> +
> +echo target-i386/cpu.h:
> +analyze -DNEED_CPU_H -I../target-i386 -Ii386-softmmu -include ../include/qemu/osdep.h ../target-i386/cpu.h
> +
> +echo hw/hw.h + NEED_CPU_H:
> +analyze -DNEED_CPU_H -I../target-i386 -Ii386-softmmu -include ../include/qemu/osdep.h ../include/hw/hw.h

If we get to be include clean we want this script to have a non-zero
exit for automated testing.

--
Alex Bennée
Paolo Bonzini May 9, 2016, 9:39 a.m. UTC | #3
On 20/04/2016 21:47, Alex Bennée wrote:
> OK the script directory is inconsistent but if we want to use it for
> automated testing it should be executable in the checkout.

This is not a testing script, it's just for analysis.

Paolo
Paolo Bonzini May 9, 2016, 10:07 a.m. UTC | #4
On 18/04/2016 15:10, Markus Armbruster wrote:
> Paolo Bonzini <pbonzini@redhat.com> writes:
> 
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  scripts/analyze-inclusions | 89 ++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 89 insertions(+)
>>  create mode 100644 scripts/analyze-inclusions
>>
>> diff --git a/scripts/analyze-inclusions b/scripts/analyze-inclusions
>> new file mode 100644
>> index 0000000..e241bd4
>> --- /dev/null
>> +++ b/scripts/analyze-inclusions
>> @@ -0,0 +1,89 @@
>> +#! /bin/sh
>> +#
>> +# Copyright (C) 2016 Red Hat, Inc.
>> +#
>> +# Author: Paolo Bonzini <pbonzini@redhat.com>
>> +#
>> +# Print statistics about header file inclusions.
>> +# The script configures and builds QEMU itself in a "+build"
>> +# subdirectory which is left around when the script exits.
>> +# To run the statistics on a pre-existing "+build" directory,
>> +# pass "--no-build" as the first argument on the command line.
>> +# Any other command line is passed directly to "make" (so
>> +# you can for example pass a "-j" argument suitable for your
>> +# system).
>> +#
>> +# Inspired by a post by Markus Armbruster.
>> +
>> +mkdir -p +build
>> +cd +build
>> +if test "x$1" != "x--no-build"; then
>> +  test -f Makefile && make distclean
>> +  ../configure
>> +  make "$@"
>> +fi
> 
> Unfortunate: "mkdir -p +build" clobbers an existing symbolic link from
> +build to the build tree of my choice.

Changed to this:

# The script has two modes of execution:
#
# 1) if invoked with a path on the command line (possibly
# preceded by a "--" argument), it will run the analysis on
# an existing build directory
#
# 2) otherwise, it will configure and builds QEMU itself in a
# "+build" subdirectory which is left around when the script
# exits.  In this case the command line is passed directly to
# "make" (typically used for a "-j" argument suitable for your
# system).
#
# Inspired by a post by Markus Armbruster.

case "x$1" in
--)
  shift
  cd "$1" || exit $?
  ;;
x-* | x)
  mkdir -p +build
  cd +build
  test -f Makefile && make distclean
  ../configure
  make "$@"
  ;;
*)
  cd "$1" || exit $?
esac

>> +
>> +QEMU_CFLAGS=$(sed -n s/^QEMU_CFLAGS=//p config-host.mak)
>> +QEMU_INCLUDES=$(sed -n s/^QEMU_INCLUDES=//p config-host.mak | \
>> +    sed 's/$(SRC_PATH)/../g' )
>> +CFLAGS=$(sed -n s/^CFLAGS=//p config-host.mak)
>> +
>> +grep_include() {
>> +  find . -name "*.d" | xargs grep -l "$@" | wc -l
> 
> More robust against funny names would be:
> 
>      find . -name "*.d" -exec grep -l {} + | wc -l

Missing a "$@", otherwise adopted your version.

>> +echo Found $(find . -name "*.d" | wc -l) object files
>> +echo $(grep_include -F 'include/qemu-common.h') files include qemu-common.h
>> +echo $(grep_include -F 'hw/hw.h') files include hw/hw.h
>> +echo $(grep_include 'target-[a-z0-9]*/cpu\.h') files include cpu.h
>> +echo $(grep_include -F 'qapi-types.h') files include qapi-types.h
>> +echo $(grep_include -F 'trace/generated-tracers.h') files include generated-tracers.h
>> +echo $(grep_include -F 'qapi/error.h') files include qapi/error.h
>> +echo $(grep_include -F 'qom/object.h') files include qom/object.h
>> +echo $(grep_include -F 'block/aio.h') files include block/aio.h
>> +echo $(grep_include -F 'exec/memory.h') files include exec/memory.h
>> +echo $(grep_include -F 'fpu/softfloat.h') files include fpu/softfloat.h
>> +echo $(grep_include -F 'qemu/bswap.h') files include qemu/bswap.h
>> +echo
> 
> How did you select these headers?

From your post, mostly.  A lot of these are files that we are planning
to tackle one way or another, or that have a lot of indirect inclusions.

>> +
>> +awk1='
>> +    /^# / { file = $3;next }
>> +    NR>1 { bytes[file]+=length; lines[file]++ }
> 
> Your #bytes is off by one, because AWK chops off the newlines.  I think
> you want length() + 1.

Fixed.

> We want to watch commonly included big headers, especially the ones that
> are prone to indirect inclusion.  These will change as we go.

That's valuable, but actually I wanted to check for something else.  I'm
looking at:

- files that include the world: these are hw/hw.h, cpu.h, etc.

- files included from anywhere, that probably shouldn't be included
anywhere: these are the ones I cherry-picked in the first part of the
script, such as block/aio.h, qemu/bswap.h, fpu/softfloat.h.

> Output of my header counting bash script (first 64 lines appended)
> provides possible additional initial candidates.
> 
> 479379846  124806   3841 qapi-types.h
> 199662236   55756   3581 /work/armbru/qemu/include/qom/object.h
> 187691645   53857   3485 /work/armbru/qemu/include/exec/memory.h
> 118894840   30643   3880 /work/armbru/qemu/include/fpu/softfloat.h
> 109943680  124936    880 trace/generated-events.h

These are examples of the second case.

>  88524072   27022   3276 /work/armbru/qemu/include/qom/cpu.h

This needs to be included by all target-*/cpu.h (which are in my list),
so I'm tracking one of those instead.

>  82757301   46519   1779 /work/armbru/qemu/include/migration/vmstate.h

Almost always included through hw/hw.h, tracking that instead.  The
problem (if it is a problem) here is too many inclusions of hw/hw.h, and
hw/hw.h including the kitchen sink.

>  82340280   21510   3828 /work/armbru/qemu/include/qemu/queue.h

Probably unavoidable, might as well move it to qemu/osdep.h?!?

>  63362110   19259   3290 /work/armbru/qemu/include/disas/bfd.h

For disas/bfd.h and (before this series) exec/exec-all.h, the problem is
not too many inclusions of the header, but possibly unnecessary
inclusions from heavily used headers.  In particular I'm not sure why
qom/cpu.h needs disas/bfd.h.

Anyhow, my point is that the generic counting script tends to count
things twice, which is why I went for a limited hand-written list based
on your message and thus on your script.  The obvious disadvantage is
that the hand-written list may become obsolete.

>  62800785   26667   2355 /work/armbru/qemu/include/qemu/timer.h
>  52975068   13828   3831 /work/armbru/qemu/include/qemu/atomic.h
>  51315482   16442   3121 /work/armbru/qemu/include/exec/exec-all.h

Happy to say my patches fix this one. :)

Paolo
diff mbox

Patch

diff --git a/scripts/analyze-inclusions b/scripts/analyze-inclusions
new file mode 100644
index 0000000..e241bd4
--- /dev/null
+++ b/scripts/analyze-inclusions
@@ -0,0 +1,89 @@ 
+#! /bin/sh
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# Author: Paolo Bonzini <pbonzini@redhat.com>
+#
+# Print statistics about header file inclusions.
+# The script configures and builds QEMU itself in a "+build"
+# subdirectory which is left around when the script exits.
+# To run the statistics on a pre-existing "+build" directory,
+# pass "--no-build" as the first argument on the command line.
+# Any other command line is passed directly to "make" (so
+# you can for example pass a "-j" argument suitable for your
+# system).
+#
+# Inspired by a post by Markus Armbruster.
+
+mkdir -p +build
+cd +build
+if test "x$1" != "x--no-build"; then
+  test -f Makefile && make distclean
+  ../configure
+  make "$@"
+fi
+
+QEMU_CFLAGS=$(sed -n s/^QEMU_CFLAGS=//p config-host.mak)
+QEMU_INCLUDES=$(sed -n s/^QEMU_INCLUDES=//p config-host.mak | \
+    sed 's/$(SRC_PATH)/../g' )
+CFLAGS=$(sed -n s/^CFLAGS=//p config-host.mak)
+
+grep_include() {
+  find . -name "*.d" | xargs grep -l "$@" | wc -l
+}
+
+echo Found $(find . -name "*.d" | wc -l) object files
+echo $(grep_include -F 'include/qemu-common.h') files include qemu-common.h
+echo $(grep_include -F 'hw/hw.h') files include hw/hw.h
+echo $(grep_include 'target-[a-z0-9]*/cpu\.h') files include cpu.h
+echo $(grep_include -F 'qapi-types.h') files include qapi-types.h
+echo $(grep_include -F 'trace/generated-tracers.h') files include generated-tracers.h
+echo $(grep_include -F 'qapi/error.h') files include qapi/error.h
+echo $(grep_include -F 'qom/object.h') files include qom/object.h
+echo $(grep_include -F 'block/aio.h') files include block/aio.h
+echo $(grep_include -F 'exec/memory.h') files include exec/memory.h
+echo $(grep_include -F 'fpu/softfloat.h') files include fpu/softfloat.h
+echo $(grep_include -F 'qemu/bswap.h') files include qemu/bswap.h
+echo
+
+awk1='
+    /^# / { file = $3;next }
+    NR>1 { bytes[file]+=length; lines[file]++ }
+    END { for(i in lines) print i,lines[i],bytes[i] }'
+
+awk2='
+    {tot_l+=$2;tot_b+=$3;tot_f++}
+    /\/usr.*\/glib/ {glib_l+=$2;glib_b+=$3;glib_f++;next}
+    /\/usr/ {sys_l+=$2;sys_b+=$3;sys_f++;next}
+    {qemu_l+=$2;qemu_b+=$3;qemu_f++;next}
+    END {
+      printf "%s\t %s\t %s\t %s\n", "lines", "bytes", "files", "source"
+      printf "%s\t %s\t %s\t %s\n", qemu_l, qemu_b, qemu_f, "QEMU"
+      printf "%s\t %s\t %s\t %s\n", sys_l, sys_b, sys_f, "system"
+      printf "%s\t %s\t %s\t %s\n", glib_l, glib_b, glib_f, "glib"
+      printf "%s\t %s\t %s\t %s\n", tot_l, tot_b, tot_f, "total"
+    }'
+
+analyze() {
+  cc $QEMU_CFLAGS $QEMU_INCLUDES $CFLAGS  -E -o - "$@" | \
+    awk "$awk1" | awk "$awk2"
+  echo
+}
+
+echo osdep.h:
+analyze ../include/qemu/osdep.h
+
+echo qemu-common.h:
+analyze  -include ../include/qemu/osdep.h ../include/qemu-common.h
+
+echo hw/hw.h:
+analyze -include ../include/qemu/osdep.h ../include/hw/hw.h
+
+echo trace/generated-tracers.h:
+analyze -include ../include/qemu/osdep.h trace/generated-tracers.h
+
+echo target-i386/cpu.h:
+analyze -DNEED_CPU_H -I../target-i386 -Ii386-softmmu -include ../include/qemu/osdep.h ../target-i386/cpu.h
+
+echo hw/hw.h + NEED_CPU_H:
+analyze -DNEED_CPU_H -I../target-i386 -Ii386-softmmu -include ../include/qemu/osdep.h ../include/hw/hw.h