[OPW,kernel,v2] scripts: Compile out syscalls given a specific userspace
diff mbox

Message ID 1425823227-17077-1-git-send-email-iulia.manda21@gmail.com
State New, archived
Headers show

Commit Message

Iulia Manda March 8, 2015, 2 p.m. UTC
This patch suggests which syscalls can be compiled out in the kernel given a
specific userspace, by mapping each syscall with its corresponding symbol(s)
and deciding which of them can be disabled.

This is implemented in two steps, as follows:

A. Parse all C files and Makefiles in the kernel source code in order to map
each syscall with the symbols that compile it out:
- we need a stack in order to know between which ifdef and endif a syscall is
defined;
- we keep a dictionary where the key is the syscall and the values are all the
symbols that it depends on and the conditionals between them;

B. Reads the object file and the data file obtained at the previous step and
outputs a config snippet, as follows:
1. Get the list of syscalls a userspace uses (nm) - this will give us more
symbols than those that match syscalls, but the next step will filter them
out;
2. Intersect that list with the list of all optional syscalls (check-syscalls
script that finds what syscalls can be compiled out in kernel/sys_ni.c) => we
will obtain a list containing all the optional syscalls that we can compile
out;
4. The output will be a list of symbols that can be disabled, and the
corresponding list of those syscalls that need to be enabled in order for the
application to work.

In case of uncertainty (e.g: compound conditionals), it choses to enable all
the symbols that syscall depends on.

On a short note, it provides with correct solutions, not necessarily the
optimal one yet (for example, in case of a disjunction, both symbols are set
to True, even though only one is needed in order for the syscall to be
compiled in).

You can run the scripts as follows:

./parse_sys_symbols.py `find linux/ -name "*.c"`
./optional_sys_symbols.py object_file syscalls-optional sys_dict

Signed-off-by: Iulia Manda <iulia.manda21@gmail.com>
---
Changes since v1:
        - separate parsing logic
        - solve python issues
        - add 'ifeq' checks
Uncertainties:
        - names of the scripts;

 scripts/optional_sys_symbols.py | 101 +++++++++++++++++++++++++++++
 scripts/parse_sys_symbols.py    | 139 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 240 insertions(+)
 create mode 100755 scripts/optional_sys_symbols.py
 create mode 100755 scripts/parse_sys_symbols.py

Patch
diff mbox

diff --git a/scripts/optional_sys_symbols.py b/scripts/optional_sys_symbols.py
new file mode 100755
index 0000000..0ac8c6b
--- /dev/null
+++ b/scripts/optional_sys_symbols.py
@@ -0,0 +1,101 @@ 
+#!/usr/bin/python
+
+import sys, re, os, json
+from subprocess import STDOUT, PIPE, Popen
+
+
+# Find what syscalls a userspace uses
+def get_symbols(file):
+    sym = []
+    cmd = "nm --undefined-only " + file
+    p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, \
+                close_fds=True)
+    content = p.stdout.read()
+    if "no symbols" not in content:
+        for l in content.splitlines():
+            if '@@GLIBC' not in l:
+                continue
+            t,n = l.split()
+            if t == 'U':
+                sym.append(re.split("@@GLIBC", n)[0])
+    else:
+        for l in os.popen("nm -D " + file):
+            if len(l.split()) == 3:
+                sym.append(l.split()[2])
+    return sym
+
+
+def get_optional_syscalls(file):
+    sym = []
+    with open(file) as f:
+        lines = f.read().splitlines()
+        for l in lines:
+            sym.append(l)
+    return sym
+
+
+# Find which syscalls from userspace can be optionally compiled in the
+# kernel
+def merge_lists(args):
+    sym = []
+    us = get_symbols(args[1])
+    opt = get_optional_syscalls(args[2])
+    for u in us:
+        if "sys_" + u in opt:
+            sym.append(u)
+    return sym
+
+
+def main(args):
+    if len(args) < 4:
+        sys.stderr.write("usage: %s object_file syscalls-optional data_file\n"
+                     % args[0])
+        return 1
+
+    bool_dict = {}
+    map_sys = {}
+    with open(args[3], 'r') as f:
+        map_sys = json.load(f)
+
+
+    # At first, we set all symbols to False (no symbol is enabled)
+    for k,v in map_sys.items():
+        for e in v:
+            bool_dict[e] = False
+
+    def enable_symbol():
+        cnf = merge_lists(args)
+        for e in cnf:
+            if e not in map_sys:
+                continue
+            for sym in map_sys[e]:
+                if re.search('^!', sym):
+                    bool_dict[sym] = "!True"
+                else:
+                    bool_dict[sym] = True
+
+    enable_symbol()
+
+    print "You can disable the following symbols:"
+    for k,v in bool_dict.iteritems():
+        if v is False:
+            if re.search('^!', k):
+                print k[1:]
+            else:
+                print k
+    print '\n'
+
+    print "The following symbols have to be enabled:"
+    for k,v in bool_dict.iteritems():
+        if v is True:
+            print k
+    print '\n'
+
+    print "The following symbols need to be set to 'n':"
+    for k,v in bool_dict.iteritems():
+        if v == "!True":
+            print k[1:]
+    print '\n'
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv))
diff --git a/scripts/parse_sys_symbols.py b/scripts/parse_sys_symbols.py
new file mode 100755
index 0000000..7a85bfe
--- /dev/null
+++ b/scripts/parse_sys_symbols.py
@@ -0,0 +1,139 @@ 
+#!/usr/bin/python
+
+import re, sys, os, json
+
+def c_to_o(file):
+    f = os.path.basename(file)
+    name, ext = os.path.splitext(f)
+    return name + ".o"
+
+
+def get_syscall_name(line):
+    name = re.split('[(,)]', line)[1]
+    return name
+
+
+def get_ifdef_symbols(line):
+    name = line.split()
+    prefix = "" if name[0] == "ifdef" else "!"
+    return prefix + name[1]
+
+
+def get_ifeq_symbols(line):
+    string = re.split('[$()]', line)
+    name = string[3]
+
+    prefix = ""
+    if string[0] == "ifeq" and string[4] != ",y":
+        prefix = "!"
+
+    if string[0] == "ifneq" and string[4] == ",y":
+        prefix = "!"
+
+    return prefix + name
+
+
+def get_defined_symbols(line):
+    delim = ['#if', 'defined ', 'defined(', ')', '&&', '||', '>=']
+    for d in delim:
+        line = line.replace(d, '')
+    line = line.split()[0]
+    return line
+
+
+class Parser(object):
+    def __init__(self):
+        self.stack = []
+        self.map_sys = {}
+
+# Check in the Makefile in order to see if a file containing a syscall
+# is compiled out as a whole
+    def parse_makefile(self, file):
+        sys_list = []
+        with open(file) as f:
+            lines = f.read().splitlines()
+            for l in lines:
+                if re.search("^SYSCALL_DEFINE", l) or \
+                    re.search("^COMPAT_SYSCALL_DEFINE", l):
+                    sys_list.append(get_syscall_name(l))
+        if sys_list == []:
+            return
+        search_for = c_to_o(file)
+        makefile_path = os.path.join(os.path.dirname(file),
+                                     "Makefile")
+        if not os.path.exists(makefile_path):
+            return
+
+        with open(makefile_path) as f:
+            lines = f.read().replace('\\\n', '').splitlines()
+            yes = '\n'.join([l for l in lines if search_for in l])
+            if re.search('.*-\$\(CONFIG.*\)', yes):
+                value = re.split('[$()]', yes)[2]
+                for e in sys_list:
+                    self.map_sys.setdefault(e, []).append(value)
+            # Check if a file is compiled under ifdefs
+            for l in lines:
+                if re.search('^(ifdef|ifndef)', l):
+                    name = get_ifdef_symbols(l)
+                    self.stack.append(name)
+                if re.search('^(ifeq|ifneq)', l):
+                    name = get_ifeq_symbols(l)
+                    self.stack.append(name)
+                elif search_for in l:
+                    if self.stack:
+                        for e in sys_list:
+                            self.map_sys.setdefault(e, []) \
+                                   .append(self.stack[-1])
+                elif re.search('^endif', l):
+                    if self.stack:
+                        self.stack.pop()
+
+
+    def parse_line(self, line):
+        if re.search("^(#ifdef|#ifndef)",line):
+            name = get_ifdef_symbols(line)
+            self.stack.append(name)
+        elif (re.search('^#if', line)) and ('defined' not in line):
+            name = ""
+            self.stack.append(name)
+        elif re.search("^#if defined", line):
+            name = get_defined_symbols(line)
+            self.stack.append(name)
+        elif re.search("^SYSCALL_DEFINE", line) or \
+                re.search("^COMPAT_SYSCALL_DEFINE", line):
+            syscall_name = get_syscall_name(line)
+            if self.stack and self.stack[-1] != "":
+                self.map_sys.setdefault(syscall_name, []) \
+                                .append(self.stack[-1])
+        elif re.search('^#endif', line):
+            if self.stack:
+                self.stack.pop()
+
+
+    def run(self, args):
+        for n in args:
+            with open(n) as f:
+                # need to compact lines that contain the same info
+                lines = f.read().replace('\\\n', '').splitlines()
+                for l in lines:
+                    self.parse_line(l)
+            self.parse_makefile(n)
+        return self.map_sys
+
+# One can use pprint in order to see the intermediate output
+# more human-readable :)
+# import pprint
+# pprint.pprint(map_sys)
+# print "\n"
+
+def main(args):
+    if len(args) < 2:
+        sys.stderr.write("usage: %s source_files\n" % args[0])
+        return 1
+
+    map_sys = Parser().run(args[1:])
+    with open('sys_dict', 'w') as f:
+        json.dump(map_sys, f)
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv))