diff mbox

[KVM-AUTOTEST,2/2] Add KSM test

Message ID 4A9CE03A.4010706@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lukáš Doktor Sept. 1, 2009, 8:50 a.m. UTC
I'm sorry but thunderbird apparently crippled the path. Resending as the 
attachment.
diff mbox

Patch

diff --git a/client/tests/kvm/kvm.py b/client/tests/kvm/kvm.py
index 4930e80..b9839df 100644
--- a/client/tests/kvm/kvm.py
+++ b/client/tests/kvm/kvm.py
@@ -53,6 +53,8 @@  class kvm(test.test):
                 "yum_update":   test_routine("kvm_tests", "run_yum_update"),
                 "autotest":     test_routine("kvm_tests", "run_autotest"),
                 "kvm_install":  test_routine("kvm_install", "run_kvm_install"),
+                "ksm":
+                                test_routine("kvm_tests", "run_ksm"),
                 "linux_s3":     test_routine("kvm_tests", "run_linux_s3"),
                 "stress_boot":  test_routine("kvm_tests", "run_stress_boot"),
                 "timedrift":    test_routine("kvm_tests", "run_timedrift"),
diff --git a/client/tests/kvm/kvm_tests.cfg.sample b/client/tests/kvm/kvm_tests.cfg.sample
index a83ef9b..f4a41b9 100644
--- a/client/tests/kvm/kvm_tests.cfg.sample
+++ b/client/tests/kvm/kvm_tests.cfg.sample
@@ -100,6 +100,23 @@  variants:
                 test_name = disktest
                 test_control_file = disktest.control
 
+    - ksm:
+        # Don't preprocess any vms as we need to change it's params
+        vms = ''
+        image_snapshot = yes
+        kill_vm_gracefully = no
+        type = ksm
+        variants:
+            - ratio_3:
+                ksm_ratio = 3
+            - ratio_10:
+                ksm_ratio = 10
+        variants:
+            - serial 
+                ksm_test_size = "serial"
+            - paralel
+                ksm_test_size = "paralel"
+
     - linux_s3:     install setup
         type = linux_s3
 
diff --git a/client/tests/kvm/kvm_tests.py b/client/tests/kvm/kvm_tests.py
index b100269..ada4c6b 100644
--- a/client/tests/kvm/kvm_tests.py
+++ b/client/tests/kvm/kvm_tests.py
@@ -462,6 +462,554 @@  def run_yum_update(test, params, env):
 
     session.close()
 
+def run_ksm(test, params, env):
+    """
+    Test how KSM (Kernel Shared Memory) act with more than physical memory is
+    used. In second part is also tested, how KVM can handle the situation,
+    when the host runs out of memory (expected is to pause the guest system,
+    wait until some process returns the memory and bring the guest back to life)
+
+    @param test: kvm test object.
+    @param params: Dictionary with test parameters.
+    @param env: Dictionary with the test wnvironment.
+    """
+    # We are going to create the main VM so we use kvm_preprocess functions
+    # FIXME: not a nice thing
+    import kvm_preprocessing
+    import random
+    import socket
+    import select
+    import math
+
+    class allocator_com:
+        """
+        This class is used for communication with the allocator
+        """
+        def __init__(self, vm, _port, _host='127.0.0.1'):
+            self.vm = vm
+            self.PORT = _port
+            self.HOST = _host
+            self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            self.isConnect = False
+
+        def __str__(self):
+            return self.vm + ":" + self.HOST + ":" + str(self.PORT)
+
+        def connect(self):
+            print self
+            logging.debug("ALLOC: connect to %s", self.vm)
+            try:
+                self.socket.connect((self.HOST, self.PORT))
+            except:
+                raise error.TestFail("ALLOC: Could not establish the "\
+                                     "communication with %s" % (self.vm))
+            self.isConnect = True
+        
+        def isConnected(self):
+            return self.isConnect;
+
+        def readsize(self):
+            read,write,error = select.select([self.socket.fileno()],[],[],0.5)
+            size = 0
+            if (self.socket.fileno() in read):
+                data = self.socket.recv(1);
+                size = "";
+                while data[0] != ':':
+                    size = size + data[0]
+                    data = self.socket.recv(1)
+            return int(size)
+
+        def _recv(self):
+            msg = ""
+            read, write, error = select.select([self.socket.fileno()],\
+                                               [], [], 0.5)
+            if (self.socket.fileno() in read):
+                size = self.readsize()
+                msg = self.socket.recv(size)
+                if (len(msg) < size):
+                    raise error.TestFail("ALLOC: Could not recive the message")
+            
+            logging.debug("ALLOC: output '%s' from %s" % (msg, self.vm))
+            return msg
+
+        def recv(self, wait=1, loops=20):
+            out = ""
+            log = ""
+            while not out.startswith("PASS") and not out.startswith("FAIL"):
+                logging.debug("Sleep(%d)" % (wait))
+                time.sleep(wait)
+                log += out
+                out = self._recv()
+
+                if loops == 0:
+                    logging.error(repr(out))
+                    raise error.TestFail("Command wasn't finished until DL")
+                loops = loops - 1
+
+            if not out.startswith("PASS"):
+                logging.error("Allocator failed on guest %s\nAttaching the"\
+                              "recent log"  % (self.vm))
+                raise error.TestFail(log)
+
+            return out
+            
+
+        def send(self, command, data=""):
+            msg = str(len(command) + len(data) + 3)
+            msg += ":" + command + ":" + data + ";"
+            logging.debug("ALLOC: execute %s on %s" %(repr(msg), self.vm))
+            try:
+                self.socket.sendall(msg)
+            except:
+                raise error.TestFail("ALLOC: Could not send the message")
+
+        def disconnect(self):
+            logging.debug("ALLOC: disconnect")
+            self.send("exit")
+            self.recv()
+            time.sleep(5)
+            self.socket.close()
+            self.isConnect = False
+
+    def get_stat(lvms):
+        """
+        Get statistics in format:
+        Host: memfree = XXXM; Guests memsh = {XXX,XXX,...}
+
+        @params lvms: List of VMs
+        """
+        if not isinstance(lvms, list):
+            raise error.TestError("get_stat: parameter have to be proper list")
+
+        try:
+            stat = "Host: memfree = "
+            stat += str(int(os.popen("cat /proc/meminfo | grep MemFree")\
+                                     .readline().split()[1]) / 1024) + "M; "
+            stat += "swapfree = "
+            stat += str(int(os.popen("cat /proc/meminfo | grep SwapFree")\
+                                     .readline().split()[1]) / 1024) + "M; "
+        except:
+            raise error.TestFail("Could not fetch free memory info")
+
+
+        stat += "Guests memsh = {"
+        for vm in lvms:
+            try:
+                cmd = "cat /proc/%d/statm" % vm.pid
+                shm = int(os.popen(cmd).readline().split()[2])
+                # statm stores informations in pages, recalculate to MB
+                shm = shm * 4 / 1024
+                stat += "%dM; " % (shm)
+            except:
+                raise error.TestError("Could not fetch shmem info from proc")
+        stat = stat[0:-2] + "}"
+        return stat
+    
+    
+    
+
+
+    logging.info("Starting phase 0: Initialization")
+    # host_reserve: mem reserve keept for the host system to run
+    host_reserve = 256
+    # guest_reserve: mem reserve which is not used by allocator on the guests
+    guest_reserve = 256
+    max_alloc = 10
+    max_vms = params.get("max_vms")
+    if max_vms:
+        max_vms = int(max_vms)
+    else:
+        max_vms = 2
+    overcommit = params.get("ksm_overcommit_ratio")
+    if overcommit:
+        overcommit = float(overcommit)
+    else:
+        overcommit = 2.0
+    # vmsc: count of all used VMs
+    vmsc = int(overcommit) + 1
+    vmsc = max(vmsc, max_vms)
+    
+    if (params['ksm_test_size'] == "paralel") :
+        host_mem = (int(os.popen("grep MemTotal: /proc/meminfo")\
+                       .readline().split()[1]) / 1024 - host_reserve)
+        vmsc = 1
+        overcommit = 1
+        mem = host_mem
+        # 32bit system adjustment
+        if not params['image_name'].endswith("64"):
+            logging.debug("Probably i386 guest architecture, "\
+                          "max allocator mem = 2G")
+            # Guest can have more than 2G but kvm mem + 1MB (allocator itself) can't
+            if (host_mem > 2048):
+                mem = 2047 
+                
+                
+        if os.popen("uname -i").readline().startswith("i386"):
+            logging.debug("Host is i386 architecture, max guest mem is 2G")
+            # Guest system with qemu overhead (64M) can't have more than 2G
+            if mem > 2048 - 64:
+                mem = 2048 - 64
+        
+    else:
+        host_mem = (int(os.popen("grep MemTotal: /proc/meminfo")\
+                       .readline().split()[1]) / 1024 - host_reserve)
+        # mem: Memory of the guest systems. Maximum must be less than amount of the 
+        # host's physical ram
+        mem = int(overcommit * host_mem / vmsc)
+    
+        # 32bit system adjustment
+        if not params['image_name'].endswith("64"):
+            logging.debug("Probably i386 guest architecture, "\
+                          "max allocator mem = 2G")
+            # Guest can have more than 2G but kvm mem + 1MB (allocator itself) can't
+            if mem-guest_reserve-1 > 2048:
+                vmsc = int(math.ceil((host_mem*overcommit)/(2048.0+guest_reserve))) 
+                mem = int(math.floor(host_mem*overcommit/vmsc))
+                
+        if os.popen("uname -i").readline().startswith("i386"):
+            logging.debug("Host is i386 architecture, max guest mem is 2G")
+            # Guest system with qemu overhead (64M) can't have more than 2G
+            if mem > 2048 - 64:
+                vmsc = int(math.ceil((host_mem*overcommit)/(2048 - 64.0))) 
+                mem = int(math.floor(host_mem*overcommit/vmsc))
+    
+    
+    logging.info("overcommit = %f" % (overcommit))
+    logging.info("true overcommit = %f " % (float(vmsc*mem) / float(host_mem)))
+    logging.info("host mem = %dM" % (host_mem))
+    logging.info("mem = %dM" % (mem))
+    logging.info("swap = %dM" %\
+                 (int(os.popen("cat /proc/meminfo | grep SwapTotal")\
+                              .readline().split()[1]) / 1024))
+    logging.info("max_vms = %d" % (max_vms))
+    logging.info("vmsc = %d" % (vmsc))
+
+    # Generate unique keys for random series
+    skeys = []
+    dkeys = []
+    for i in range(0, max(vmsc, max_alloc)):
+        key = "%03s" % (random.randrange(0,999))
+        while key in skeys:
+            key = "%03s" % (random.randrange(0,999))
+        skeys.append(key)
+
+        key = "%03s" % (random.randrange(0,999))
+        while key in dkeys:
+            key = "%03s" % (random.randrange(0,999))
+        dkeys.append(key)
+    
+    lvms = []
+    lsessions = []
+    lallocators = []
+    alloc_port = 31284
+
+    # As we don't know the number and memory amount of VMs in advance, we need 
+    # to specify and create them here (FIXME: not a nice thing)
+    params['mem'] = mem
+    params['vms'] = params.get("main_vm")
+    # ksm_size: amount of memory used by allocator
+    ksm_size = mem - guest_reserve
+    logging.info("ksm_size = %dM" % (ksm_size))
+
+    
+    params['redirs'] += ' alloc0'
+    params['guest_port_alloc0'] = str(alloc_port)
+    
+    if (params['ksm_test_size'] == "paralel") :
+        for j in range(1, max_alloc):
+            params['redirs'] += ' alloc' + str(j)
+            params['guest_port_alloc' + str(j)] = str(alloc_port + j)
+
+    # Creating of the first guest
+    kvm_preprocessing.preprocess_vm(test, params, env, params['vms'])
+    lvms.append(kvm_utils.env_get_vm(env, params.get("main_vm")))
+    if not lvms[0]:
+        raise error.TestError("VM object not found in environment")
+    if not lvms[0].is_alive():
+        raise error.TestError("VM seems to be dead; Test requires a living VM")
+
+    logging.info("Booting the first guest %s" % lvms[0].name)
+
+    lsessions.append(kvm_utils.wait_for(lvms[0].ssh_login, 360, 0, 2))
+    if not lsessions[0]:
+        raise error.TestFail("Could not log into first guest")
+    
+    
+    lallocators.append(allocator_com(lvms[0].name, lvms[0].redirs[alloc_port]))
+    if not lallocators[0]:
+        raise error.TestFail("Could not create allocator_com class for vm1")
+    
+
+    
+    # Creating of other guest systems
+    for i in range(1, vmsc):
+        vm_name = "vm" + str(i + 1)
+        # Last VM is later used to run more allocators simultaneously
+        """for j in range(1, max_alloc):
+            params['redirs'] += ' alloc' + str(j)
+            params['guest_port_alloc' + str(j)] = str(alloc_port + j)"""
+        
+        lvms.append(lvms[0].clone(vm_name, params))
+        kvm_utils.env_register_vm(env, vm_name, lvms[i])
+        params['vms'] += " " + vm_name
+
+        logging.info("Booting guest %s" % lvms[i].name)
+        if not lvms[i].create():
+            raise error.TestFail("Cannot create VM %s" % lvms[i].name)
+        if not lvms[i].is_alive():
+            raise error.TestError("VM %s seems to be dead; Test requires a"\
+                                  "living VM" % lvms[i].name)
+
+        lsessions.append(kvm_utils.wait_for(lvms[i].ssh_login, 360, 0, 2))
+        if not lsessions[i]:
+            raise error.TestFail("Could not log into guest %s" % lvms[i].name)
+
+        lallocators.append(allocator_com(lvms[i].name,\
+                                         lvms[i].redirs[alloc_port]))
+        if not lallocators[i]:
+            raise error.TestFail("Could not create allocator_com class for %s"\
+                                 % (lvms[i].name))
+        
+
+    # Let systems take a rest :-)
+    time.sleep(vmsc * 2)
+    logging.info(get_stat(lvms))
+
+    # Copy the allocator.c into guests
+    pwd = os.path.join(os.environ['AUTODIR'],'tests/kvm')
+    vksmd_src = os.path.join(pwd, "allocator.c")
+    dst_dir = "/tmp"
+    for vm in lvms:
+        if not vm.scp_to_remote(vksmd_src, dst_dir):
+            raise error.TestFail("Remote scp failed %s" % (vm.name))
+    logging.info("Phase 0 => passed")
+
+    def phase_1():
+        """ Inicialize virtual machine """
+        logging.info("Starting phase 1: filling with 0")
+        logging.info("Preparing the guests and fill in pages by zero")
+        for session in lsessions:
+            vm = lvms[lsessions.index(session)]
+            allocator = lallocators[lsessions.index(session)]
+            # Build the test suite
+            ret = session.get_command_status("gcc -o /tmp/allocator "\
+                                             "/tmp/allocator.c",\
+                                             timeout=300)
+            if ret == None or ret:
+                raise error.TestFail("Failed to build vksmd in the %s"\
+                                     % (vm.name))
+    
+            # Start the daemon
+            ret = session.get_command_status("/tmp/allocator %d %d" % (ksm_size,\
+                                                                      alloc_port))
+            if ret == None:
+                raise error.TestFail("Could not run vksmd in guest %s"\
+                                     % (vm.name))
+            if ret:
+                raise error.TestFail("Could not run vksmd in %s errno: %d"\
+                                      % (vm.name, ret))
+    
+            ret = session.get_command_status("iptables -F;"\
+                                             "iptables -P INPUT ACCEPT;")
+    
+            allocator.connect()
+            allocator.recv((ksm_size / 200), 100)
+            
+            # Let kksmd works (until shared mem rich expected value)
+            shm = 0
+            i = 0
+            cmd = "cat /proc/%d/statm" % vm.pid
+            while shm < ksm_size:
+                if i > 64:
+                    logging.info(get_stat(lvms))
+                    raise error.TestError("SHM didn't merged the memory until "\
+					  "the DL")
+                logging.debug("Sleep(%d)" % (ksm_size / 200))
+                time.sleep(ksm_size / 200)
+                try:
+                    shm = int(os.popen(cmd).readline().split()[2])
+                    shm = shm * 4 / 1024
+                    i = i + 1
+                except:
+                    raise error.TestError("Could not fetch shmem info from "
+					  "the /proc")
+    
+        # Keep some reserve
+        time.sleep(ksm_size / 200)
+    
+        # Set allocator keys
+        for i in range(0, vmsc):
+            lallocators[i].send("init", "%s%s" % (skeys[i], dkeys[i]))
+            lallocators[i].recv(1, 10)
+        logging.info(get_stat(lvms))
+        logging.info("Phase 1 => passed")
+
+    def phase_2():
+        """ Separate first guest memory by generate a special random series """
+        logging.info("Starting phase 2: Split the pages on the first guest")
+    
+        lallocators[0].send("srandom")
+        out = lallocators[0].recv(ksm_size / 500, 50)
+        out = int(out.split()[4])
+        logging.info("PERFORMANCE: %dMB * 1000 / %dms = %dMB/s"\
+                     % (ksm_size, out, (ksm_size * 1000 / out)))
+        logging.info(get_stat(lvms))
+        logging.info("Phase 2 => passed")
+
+    def phase_3():
+        """ Sequentional split of pages on guests up to memory limit """
+        logging.info("Starting phase 3a: Sequentional split of pages on guests up "\
+                     "to memory limit")
+        last_vm = 0
+        for i in range(1, vmsc):
+            vm = lvms[i]
+            session = lsessions[i]
+            allocator = lallocators[i]
+    
+            allocator.send("srandom")
+            out = ""
+            while not out.startswith("PASS") and not out.startswith("FAIL"):
+                free_mem = int(os.popen("grep MemFree /proc/meminfo")\
+                             .readline().split()[1])
+                logging.debug("FreeMem = %d" % (free_mem))
+                # We need to keep some memory for python to run.
+                if free_mem < 32000:
+                    logging.debug("Only %s free memory, killing 0 - %d hosts"\
+                                  % (free_mem, (i-1)))
+                    for j in range(0, i):
+                        lvms[j].destroy(gracefully = False)
+                    last_vm = i
+                    break
+                out = allocator._recv()
+            if last_vm != 0:
+                break
+    
+        allocator.recv(mem / 500, 50)
+        logging.info("Memory filled by the guest %s" % (vm.name))
+        logging.info("Phase 3a => passed")
+        
+        """ Check if memory in max loading guest is allright"""
+        logging.info("Starting phase 3b")
+        allocator.send("srverify")
+        allocator.recv(mem / 200, 50)
+        allocator.disconnect()
+        # We are going to use the last VM later
+        if i != (vmsc):
+            session.close()
+            vm.destroy(gracefully = False)
+            for i in range(last_vm + 1, vmsc):
+                lallocators[i].send("verify")
+                lallocators[i].recv(mem / 200, 50)
+                lallocators[i].disconnect()
+                # We are going to use the last VM later
+                if i != (vmsc - 1):
+                    lsessions[i].close()
+                    lvms[i].destroy(gracefully = False)
+        logging.info(get_stat([lvms[i]]))
+        logging.info("Phase 3b => passed")
+
+    def phase_4():
+        """ Paralel page spliting """
+        logging.info("Phase 4: Paralel page spliting")
+        # We have to wait until allocator is finished (it waits 5 seconds to clean
+        # the socket
+        
+        session = lsessions[0]
+        vm = lvms[0]
+        
+        ret = session.get_command_status("gcc -o /tmp/allocator "\
+                                             "/tmp/allocator.c",\
+                                             timeout=300)
+        if ret == None or ret:
+            raise error.TestFail("Failed to build vksmd in the %s"\
+                                  % (vm.name))
+                                  
+        for all in lallocators:
+            if all.isConnected():
+                all.disconnect()
+        
+        del lallocators[:]
+        ret = session.get_command_status("iptables -F;"\
+                                    "iptables -P INPUT ACCEPT;")
+         
+        for i in range(0, max_alloc):
+            ret = session.get_command_status("/tmp/allocator %d %d" 
+                                   % (ksm_size / max_alloc, alloc_port + i))
+            if ret == None:
+                raise error.TestFail("Could not run vksmd in guest %s"\
+                                     % (vm.name))
+            if ret:
+                raise error.TestFail("Could not run allocator in %s errno: %d"\
+                                     % (vm.name, ret))
+        
+            lallocators.append(allocator_com(vm.name,\
+                                             vm.redirs[alloc_port + i]))
+            if not lallocators[i]:
+                raise error.TestFail("Could not create allocator_com class for"\
+				     " %s" % (vm.name))
+        
+        logging.info("Phase 4a: Simultaneous merging")
+        for i in range(0, max_alloc):
+            lallocators[i].connect()
+        
+        for i in range(0, max_alloc):
+            lallocators[i].recv((ksm_size / 200), 100)
+        # Wait until kksmd merges the pages (3 x ksm_size / 3)
+        shm = 0
+        i = 0
+        cmd = "cat /proc/%d/statm" % vm.pid
+        while shm < ksm_size:
+            if i > 64:
+                logging.info(get_stat(lvms))
+                raise error.TestError("SHM didn't merged the memory until DL")
+            logging.debug("Sleep(%d)" % (ksm_size / 200))
+            time.sleep(ksm_size / 200)
+            try:
+                shm = int(os.popen(cmd).readline().split()[2])
+                shm = shm * 4 / 1024
+            except:
+                raise error.TestError("Could not fetch shmem info from proc")
+        logging.info(get_stat([vm]))
+    
+    
+        logging.info("Phases 4b: Simultaneous spliting")
+        # Set keys
+        for i in range(0, max_alloc):
+            lallocators[i].send("init", "%s%s" % (skeys[i], dkeys[i]))
+            lallocators[i].recv(1, 10)
+    
+        # Actual splitting
+        for i in range(0, max_alloc):
+            lallocators[i].send("srandom")
+        
+        for i in range(0, max_alloc):
+            out = lallocators[i].recv(ksm_size / 500, 50)
+            out = int(out.split()[4])
+            logging.info("PERFORMANCE: %dMB * 1000 / %dms = %dMB/s"\
+                         % (ksm_size, out, (ksm_size * 1000 / out / max_alloc)))
+        logging.info(get_stat([vm]))
+    
+        logging.info("Phase 4c: Simultaneous verification")
+        for i in range(0, max_alloc):
+            lallocators[i].send("srverify")
+        for i in range(0, max_alloc):
+            lallocators[i].recv(mem / 200, 50)
+        logging.info(get_stat([vm]))
+    
+        logging.info("Phase 4 => passed")
+        # Clean-up
+        for i in range(0, max_alloc):
+            lallocators[i].disconnect()
+        session.close()
+        vm.destroy(gracefully = False)
+        
+    if params['ksm_test_size'] == "paralel":
+        phase_4()
+    elif params['ksm_test_size'] == "serial":
+        phase_1()
+        phase_2()
+        phase_3()
+        
 
 def run_linux_s3(test, params, env):
     """