diff mbox series

[v3,11/16] scripts/oss-fuzz: Add general-fuzzer build script

Message ID 20200921022506.873303-12-alxndr@bu.edu (mailing list archive)
State New, archived
Headers show
Series Add a General Virtual Device Fuzzer | expand

Commit Message

Alexander Bulekov Sept. 21, 2020, 2:25 a.m. UTC
This parses a yaml file containing general-fuzzer configs and builds a
separate oss-fuzz wrapper binary for each one, changing some
preprocessor macros for each configuration. To avoid dealing with
escaping and stringifying, convert each string into a byte-array
representation

Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
---
 scripts/oss-fuzz/build_general_fuzzers.py | 69 +++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100755 scripts/oss-fuzz/build_general_fuzzers.py

Comments

Darren Kenny Oct. 1, 2020, 3:40 p.m. UTC | #1
Hi Alex,

On Sunday, 2020-09-20 at 22:25:01 -04, Alexander Bulekov wrote:
> This parses a yaml file containing general-fuzzer configs and builds a
> separate oss-fuzz wrapper binary for each one, changing some
> preprocessor macros for each configuration. To avoid dealing with
> escaping and stringifying, convert each string into a byte-array
> representation
>
> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> ---
>  scripts/oss-fuzz/build_general_fuzzers.py | 69 +++++++++++++++++++++++
>  1 file changed, 69 insertions(+)
>  create mode 100755 scripts/oss-fuzz/build_general_fuzzers.py
>
> diff --git a/scripts/oss-fuzz/build_general_fuzzers.py b/scripts/oss-fuzz/build_general_fuzzers.py
> new file mode 100755
> index 0000000000..918f1143a5
> --- /dev/null
> +++ b/scripts/oss-fuzz/build_general_fuzzers.py
> @@ -0,0 +1,69 @@
> +#!/usr/bin/env python3
> +# -*- coding: utf-8 -*-
> +
> +"""
> +This script creates wrapper binaries that invoke the general-device-fuzzer with
> +configurations specified in a yaml config file.
> +"""
> +import sys
> +import os
> +import yaml
> +import tempfile
> +
> +CC = ""
> +TEMPLATE_FILENAME = "target_template.c"
> +TEMPLATE_PATH = ""
> +
> +
> +def usage():
> +    print("Usage: CC=COMPILER {} CONFIG_PATH \
> +OUTPUT_PATH_PREFIX".format(sys.argv[0]))

The indentation of this seems off.

Python will concatenate 2 or more strings that appear one after the
other, so it might be cleaner to write them like:

    print("Usage: CC=COMPILER {} CONFIG_PATH "
          "OUTPUT_PATH_PREFIX".format(sys.argv[0]))

There is no need for the backslash at the end due to the use of the
braces '()' here.


> +    sys.exit(0)
> +
> +
> +def str_to_c_byte_array(s):
> +    """
> +    Convert strings to byte-arrays so we don't worry about formatting
> +    strings to play nicely with cc -DQEMU_FUZZARGS etc
> +    """
> +    return ','.join('0x{:02x}'.format(ord(x)) for x in s)
> +
> +
> +def compile_wrapper(cfg, path):
> +    os.system('$CC -DQEMU_FUZZ_ARGS="{fuzz_args}" \
> +               -DQEMU_FUZZ_OBJECTS="{fuzz_objs}" \
> +               {wrapper_template} -o {output_bin}'.format(
> +                   fuzz_args=str_to_c_byte_array(cfg["args"].replace("\n", " ")),
> +                   fuzz_objs=str_to_c_byte_array(cfg["objects"].replace("\n", " ")),
> +                   wrapper_template=TEMPLATE_PATH,
> +                   output_bin=path))
> +

Similarly here, it might look better as:

    os.system('$CC -DQEMU_FUZZ_ARGS="{fuzz_args}" '
              '-DQEMU_FUZZ_OBJECTS="{fuzz_objs}" '
              '{wrapper_template} -o {output_bin}'.format(
                   fuzz_args=str_to_c_byte_array(cfg["args"].replace("\n", " ")),
                   fuzz_objs=str_to_c_byte_array(cfg["objects"].replace("\n", " ")),
                   wrapper_template=TEMPLATE_PATH,
                   output_bin=path))

> +
> +
> +def main():
> +    global CC
> +    global TEMPLATE_PATH
> +    global OUTPUT_BIN_NAME
> +
> +    if len(sys.argv) != 3:
> +        usage()
> +
> +    cfg_path = sys.argv[1]
> +    out_path = sys.argv[2]
> +
> +    CC = os.getenv("CC", default="cc")
> +    TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), TEMPLATE_FILENAME)
> +    if not os.path.exists(TEMPLATE_PATH):
> +        print("Error {} doesn't exist".format(TEMPLATE_PATH))
> +        sys.exit(1)
> +
> +    with open(cfg_path, "r") as f:
> +        configs = yaml.load(f)["configs"]

Just in case, the use of .get("config". []) might work better here.

But also check if yaml.load() could possibly throw an exception if the
file, despite existing isn't able to be parsed.

Thanks,

Darren.
Paolo Bonzini Oct. 8, 2020, 7:35 a.m. UTC | #2
On 21/09/20 04:25, Alexander Bulekov wrote:
> This parses a yaml file containing general-fuzzer configs and builds a
> separate oss-fuzz wrapper binary for each one, changing some
> preprocessor macros for each configuration. To avoid dealing with
> escaping and stringifying, convert each string into a byte-array
> representation
> 
> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> ---
>  scripts/oss-fuzz/build_general_fuzzers.py | 69 +++++++++++++++++++++++
>  1 file changed, 69 insertions(+)
>  create mode 100755 scripts/oss-fuzz/build_general_fuzzers.py
> 
> diff --git a/scripts/oss-fuzz/build_general_fuzzers.py b/scripts/oss-fuzz/build_general_fuzzers.py
> new file mode 100755
> index 0000000000..918f1143a5
> --- /dev/null
> +++ b/scripts/oss-fuzz/build_general_fuzzers.py
> @@ -0,0 +1,69 @@
> +#!/usr/bin/env python3
> +# -*- coding: utf-8 -*-
> +
> +"""
> +This script creates wrapper binaries that invoke the general-device-fuzzer with
> +configurations specified in a yaml config file.
> +"""
> +import sys
> +import os
> +import yaml
> +import tempfile
> +
> +CC = ""
> +TEMPLATE_FILENAME = "target_template.c"
> +TEMPLATE_PATH = ""
> +
> +
> +def usage():
> +    print("Usage: CC=COMPILER {} CONFIG_PATH \
> +OUTPUT_PATH_PREFIX".format(sys.argv[0]))
> +    sys.exit(0)
> +
> +
> +def str_to_c_byte_array(s):
> +    """
> +    Convert strings to byte-arrays so we don't worry about formatting
> +    strings to play nicely with cc -DQEMU_FUZZARGS etc
> +    """
> +    return ','.join('0x{:02x}'.format(ord(x)) for x in s)
> +
> +
> +def compile_wrapper(cfg, path):
> +    os.system('$CC -DQEMU_FUZZ_ARGS="{fuzz_args}" \
> +               -DQEMU_FUZZ_OBJECTS="{fuzz_objs}" \
> +               {wrapper_template} -o {output_bin}'.format(
> +                   fuzz_args=str_to_c_byte_array(cfg["args"].replace("\n", " ")),
> +                   fuzz_objs=str_to_c_byte_array(cfg["objects"].replace("\n", " ")),
> +                   wrapper_template=TEMPLATE_PATH,
> +                   output_bin=path))
> +
> +
> +def main():
> +    global CC
> +    global TEMPLATE_PATH
> +    global OUTPUT_BIN_NAME
> +
> +    if len(sys.argv) != 3:
> +        usage()
> +
> +    cfg_path = sys.argv[1]
> +    out_path = sys.argv[2]
> +
> +    CC = os.getenv("CC", default="cc")
> +    TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), TEMPLATE_FILENAME)
> +    if not os.path.exists(TEMPLATE_PATH):
> +        print("Error {} doesn't exist".format(TEMPLATE_PATH))
> +        sys.exit(1)
> +
> +    with open(cfg_path, "r") as f:
> +        configs = yaml.load(f)["configs"]
> +    for cfg in configs:
> +        assert "name" in cfg
> +        assert "args" in cfg
> +        assert "objects" in cfg
> +        compile_wrapper(cfg, out_path + cfg["name"])
> +
> +
> +if __name__ == '__main__':
> +    main()
> 

Can you instead write an array of

struct {
    const char *name, *args, *objects;
}

and use it in the normal argv0-based selection?  The advantage would be
that you can do the whole build within tests/qtest/fuzz/meson.build
instead of having yet another undocumented shell script (cue all the
mess I made when trying to modify scripts/oss-fuzz/build.sh).

Paolo
Alexander Bulekov Oct. 15, 2020, 1:46 p.m. UTC | #3
On 201008 0935, Paolo Bonzini wrote:
> On 21/09/20 04:25, Alexander Bulekov wrote:
> > This parses a yaml file containing general-fuzzer configs and builds a
> > separate oss-fuzz wrapper binary for each one, changing some
> > preprocessor macros for each configuration. To avoid dealing with
> > escaping and stringifying, convert each string into a byte-array
> > representation
> > 
> > Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> > ---
> >  scripts/oss-fuzz/build_general_fuzzers.py | 69 +++++++++++++++++++++++
> >  1 file changed, 69 insertions(+)
> >  create mode 100755 scripts/oss-fuzz/build_general_fuzzers.py
> > 
> > diff --git a/scripts/oss-fuzz/build_general_fuzzers.py b/scripts/oss-fuzz/build_general_fuzzers.py
> > new file mode 100755
> > index 0000000000..918f1143a5
> > --- /dev/null
> > +++ b/scripts/oss-fuzz/build_general_fuzzers.py
> > @@ -0,0 +1,69 @@
> > +#!/usr/bin/env python3
> > +# -*- coding: utf-8 -*-
> > +
> > +"""
> > +This script creates wrapper binaries that invoke the general-device-fuzzer with
> > +configurations specified in a yaml config file.
> > +"""
> > +import sys
> > +import os
> > +import yaml
> > +import tempfile
> > +
> > +CC = ""
> > +TEMPLATE_FILENAME = "target_template.c"
> > +TEMPLATE_PATH = ""
> > +
> > +
> > +def usage():
> > +    print("Usage: CC=COMPILER {} CONFIG_PATH \
> > +OUTPUT_PATH_PREFIX".format(sys.argv[0]))
> > +    sys.exit(0)
> > +
> > +
> > +def str_to_c_byte_array(s):
> > +    """
> > +    Convert strings to byte-arrays so we don't worry about formatting
> > +    strings to play nicely with cc -DQEMU_FUZZARGS etc
> > +    """
> > +    return ','.join('0x{:02x}'.format(ord(x)) for x in s)
> > +
> > +
> > +def compile_wrapper(cfg, path):
> > +    os.system('$CC -DQEMU_FUZZ_ARGS="{fuzz_args}" \
> > +               -DQEMU_FUZZ_OBJECTS="{fuzz_objs}" \
> > +               {wrapper_template} -o {output_bin}'.format(
> > +                   fuzz_args=str_to_c_byte_array(cfg["args"].replace("\n", " ")),
> > +                   fuzz_objs=str_to_c_byte_array(cfg["objects"].replace("\n", " ")),
> > +                   wrapper_template=TEMPLATE_PATH,
> > +                   output_bin=path))
> > +
> > +
> > +def main():
> > +    global CC
> > +    global TEMPLATE_PATH
> > +    global OUTPUT_BIN_NAME
> > +
> > +    if len(sys.argv) != 3:
> > +        usage()
> > +
> > +    cfg_path = sys.argv[1]
> > +    out_path = sys.argv[2]
> > +
> > +    CC = os.getenv("CC", default="cc")
> > +    TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), TEMPLATE_FILENAME)
> > +    if not os.path.exists(TEMPLATE_PATH):
> > +        print("Error {} doesn't exist".format(TEMPLATE_PATH))
> > +        sys.exit(1)
> > +
> > +    with open(cfg_path, "r") as f:
> > +        configs = yaml.load(f)["configs"]
> > +    for cfg in configs:
> > +        assert "name" in cfg
> > +        assert "args" in cfg
> > +        assert "objects" in cfg
> > +        compile_wrapper(cfg, out_path + cfg["name"])
> > +
> > +
> > +if __name__ == '__main__':
> > +    main()
> > 
> 
> Can you instead write an array of
> 
> struct {
>     const char *name, *args, *objects;
> }
> 
> and use it in the normal argv0-based selection?  The advantage would be
> that you can do the whole build within tests/qtest/fuzz/meson.build
> instead of having yet another undocumented shell script (cue all the
> mess I made when trying to modify scripts/oss-fuzz/build.sh).
> 
> Paolo

Thanks for the suggestion. I did this in v4, and I think it is much
nicer. No more python script, c template, and preprocessor hacking. I
don't think the way I defined the configs is ideal, however I think it
is already a better solution.
-Alex

>
diff mbox series

Patch

diff --git a/scripts/oss-fuzz/build_general_fuzzers.py b/scripts/oss-fuzz/build_general_fuzzers.py
new file mode 100755
index 0000000000..918f1143a5
--- /dev/null
+++ b/scripts/oss-fuzz/build_general_fuzzers.py
@@ -0,0 +1,69 @@ 
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+This script creates wrapper binaries that invoke the general-device-fuzzer with
+configurations specified in a yaml config file.
+"""
+import sys
+import os
+import yaml
+import tempfile
+
+CC = ""
+TEMPLATE_FILENAME = "target_template.c"
+TEMPLATE_PATH = ""
+
+
+def usage():
+    print("Usage: CC=COMPILER {} CONFIG_PATH \
+OUTPUT_PATH_PREFIX".format(sys.argv[0]))
+    sys.exit(0)
+
+
+def str_to_c_byte_array(s):
+    """
+    Convert strings to byte-arrays so we don't worry about formatting
+    strings to play nicely with cc -DQEMU_FUZZARGS etc
+    """
+    return ','.join('0x{:02x}'.format(ord(x)) for x in s)
+
+
+def compile_wrapper(cfg, path):
+    os.system('$CC -DQEMU_FUZZ_ARGS="{fuzz_args}" \
+               -DQEMU_FUZZ_OBJECTS="{fuzz_objs}" \
+               {wrapper_template} -o {output_bin}'.format(
+                   fuzz_args=str_to_c_byte_array(cfg["args"].replace("\n", " ")),
+                   fuzz_objs=str_to_c_byte_array(cfg["objects"].replace("\n", " ")),
+                   wrapper_template=TEMPLATE_PATH,
+                   output_bin=path))
+
+
+def main():
+    global CC
+    global TEMPLATE_PATH
+    global OUTPUT_BIN_NAME
+
+    if len(sys.argv) != 3:
+        usage()
+
+    cfg_path = sys.argv[1]
+    out_path = sys.argv[2]
+
+    CC = os.getenv("CC", default="cc")
+    TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), TEMPLATE_FILENAME)
+    if not os.path.exists(TEMPLATE_PATH):
+        print("Error {} doesn't exist".format(TEMPLATE_PATH))
+        sys.exit(1)
+
+    with open(cfg_path, "r") as f:
+        configs = yaml.load(f)["configs"]
+    for cfg in configs:
+        assert "name" in cfg
+        assert "args" in cfg
+        assert "objects" in cfg
+        compile_wrapper(cfg, out_path + cfg["name"])
+
+
+if __name__ == '__main__':
+    main()