diff mbox

[KVM-AUTOTEST] timedrift support

Message ID 4A010BCD.8060307@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bear Yang May 6, 2009, 4:02 a.m. UTC
Hello everyone,

I like to submit patch to add a new function for 'time drift check' for 
guest running on KVM.

The TimeDrift design logic is below:
1. Set the host as the NTP server
2. Guest only sync it's clock with host *once* when it booted up.
* if the offset value of ntpdate large than 1 sec, the guest will sync 
the clock with host.
* if the offset value of ntpdate less than 1 sec, the guest doesn't need 
sync it's clock with host.

3. Then the cpu stress testing will running on guest.
* a C program will give the real load to guest cpu
4.when the cpustress testing finished. running the commandline <ntpdate 
-q host-ip> totally 20 times on guest to query the time from host and 
judge whether the guest clock has drift or not.

The details of my patch is attached.

thanks.

Bear.

Comments

Marcelo Tosatti May 6, 2009, 1:02 p.m. UTC | #1
Bear,

Some comments below.

On Wed, May 06, 2009 at 12:02:21PM +0800, Bear Yang wrote:
> Hello everyone,
>
> I like to submit patch to add a new function for 'time drift check' for  
> guest running on KVM.
>
> The TimeDrift design logic is below:
> 1. Set the host as the NTP server
> 2. Guest only sync it's clock with host *once* when it booted up.
> * if the offset value of ntpdate large than 1 sec, the guest will sync  
> the clock with host.
> * if the offset value of ntpdate less than 1 sec, the guest doesn't need  
> sync it's clock with host.
>
> 3. Then the cpu stress testing will running on guest.
> * a C program will give the real load to guest cpu
> 4.when the cpustress testing finished. running the commandline <ntpdate  
> -q host-ip> totally 20 times on guest to query the time from host and  
> judge whether the guest clock has drift or not.
>
> The details of my patch is attached.
>
> thanks.
>
> Bear.

> diff -urN kvm_runtest_2.bak/cpu_stress.c kvm_runtest_2/cpu_stress.c
> --- kvm_runtest_2.bak/cpu_stress.c	1969-12-31 19:00:00.000000000 -0500
> +++ kvm_runtest_2/cpu_stress.c	2009-05-05 22:35:34.000000000 -0400
> @@ -0,0 +1,61 @@
> +#define _GNU_SOURCE
> +#include <stdio.h>
> +#include <pthread.h>
> +#include <sched.h>
> +#include <stdlib.h>
> +#include <fcntl.h>
> +#include <math.h>
> +#include <unistd.h>
> +
> +#define MAX_CPUS 256
> +#define BUFFSIZE 1024
> +
> +
> +void worker_child(int cpu)
> +{
> +	int cur_freq;
> +	int min_freq;
> +	int max_freq;
> +	int last_freq;
> +	cpu_set_t mask;
> +	int i;
> +	double x;
> +        int d = 0;
> +	/*
> +	 * bind this thread to the specified cpu 
> +	 */
> +	CPU_ZERO(&mask);
> +	CPU_SET(cpu, &mask);
> +	sched_setaffinity(0, CPU_SETSIZE, &mask);
> +
> +	while (d++ != 500000) {
> +			for (i=0; i<100000; i++)
> +				x = sqrt(x);
> +	}
> +
> +	_exit(0);
> +
> +}
> +
> +
> +main() {
> +	cpu_set_t mask;
> +	int i;
> +	int code;
> +
> +	if (sched_getaffinity(0, CPU_SETSIZE, &mask) < 0){
> +		perror ("sched_getaffinity");
> +		exit(1);
> +	}
> +
> +	for (i=0; i<CPU_SETSIZE; i++)
> +		if (CPU_ISSET(i, &mask)){
> +			printf ("CPU%d\n",i);
> +			if (fork() == 0)
> +				worker_child(i);
> +		}
> +
> +
> +	wait(&code);
> +	exit (WEXITSTATUS(code));
> +}
> diff -urN kvm_runtest_2.bak/kvm_runtest_2.py kvm_runtest_2/kvm_runtest_2.py
> --- kvm_runtest_2.bak/kvm_runtest_2.py	2009-04-29 06:17:29.000000000 -0400
> +++ kvm_runtest_2/kvm_runtest_2.py	2009-04-29 08:06:32.000000000 -0400
> @@ -36,6 +36,8 @@
>                  "autotest":     test_routine("kvm_tests",           "run_autotest"),
>                  "kvm_install":  test_routine("kvm_install",         "run_kvm_install"),
>                  "linux_s3":     test_routine("kvm_tests",           "run_linux_s3"),
> +                "ntp_server_setup": test_routine("kvm_tests",       "run_ntp_server_setup"),
> +                "timedrift":    test_routine("kvm_tests",           "run_timedrift"),
>                  }
>  
>          # Make it possible to import modules from the test's bindir
> diff -urN kvm_runtest_2.bak/kvm_tests.cfg.sample kvm_runtest_2/kvm_tests.cfg.sample
> --- kvm_runtest_2.bak/kvm_tests.cfg.sample	2009-04-29 06:17:29.000000000 -0400
> +++ kvm_runtest_2/kvm_tests.cfg.sample	2009-04-29 08:09:36.000000000 -0400
> @@ -81,6 +81,10 @@
>      - linux_s3:      install setup
>          type = linux_s3
>  
> +    - ntp_server_setup:
> +        type = ntp_server_setup
> +    - timedrift:      ntp_server_setup
> +        type = timedrift
>  # NICs
>  variants:
>      - @rtl8139:
> diff -urN kvm_runtest_2.bak/kvm_tests.py kvm_runtest_2/kvm_tests.py
> --- kvm_runtest_2.bak/kvm_tests.py	2009-04-29 06:17:29.000000000 -0400
> +++ kvm_runtest_2/kvm_tests.py	2009-05-05 23:45:57.000000000 -0400
> @@ -394,3 +394,235 @@
>      kvm_log.info("VM resumed after S3")
>  
>      session.close()
> +
> +def run_ntp_server_setup(test, params, env):
> +    
> +    """NTP server configuration and related network file modification
> +    """
> +    kvm_log.debug("run ntp server setup")
> +    status = 1
> +    # stop firewall for NTP server if it is running.
> +    status = os.system("/etc/init.d/iptables status")
> +    if status == 0:
> +        os.system("/etc/init.d/iptables stop")
> +        status = 1
> +
> +    # prevent dhcp client modify the ntp.conf
> +    kvm_log.info("prevent dhcp client modify the ntp.conf")
> +
> +    config_file = "/etc/sysconfig/network"
> +    network_file = open("/etc/sysconfig/network", "a")
> +    string = "PEERNTP=no"
> +
> +    if os.system("grep %s %s" % (string, config_file)):
> +        network_file.writelines(str(string)+'\n')
> +    
> +    network_file.close()
> +  
> +    # start ntp server on host
> +    kvm_log.info("backup ntp config file")
> +
> +    ntp_filename = os.path.expanduser("/etc/ntp.conf")
> +    # backup ntp config file
> +    backup_bootloader_filename = ntp_filename + "_backup"
> +    if os.path.exists(ntp_filename):
> +        os.rename(ntp_filename, backup_bootloader_filename)
> +    
> +    status = os.system("/etc/init.d/ntpd status")
> +    if status == 0:
> +        os.system("/etc/init.d/ntpd stop")
> +        status = 1    
> +
> +    kvm_log.info("start ntp server on host")
> +
> +    ntp_cmd = '''
> +        echo "restrict default kod nomodify notrap nopeer noquery" >> /etc/ntp.conf;\
> +        echo "restrict 127.0.0.1" >> /etc/ntp.conf;\
> +        echo "driftfile /var/lib/ntp/drift" >> /etc/ntp.conf;\
> +        echo "keys /etc/ntp/keys" >> /etc/ntp.conf;\
> +        echo "server 127.127.1.0" >> /etc/ntp.conf;\
> +        echo "fudge 127.127.1.0 stratum 1" >> /etc/ntp.conf;\
> +        service ntpd start;
> +        '''

I think it would be better to copy /etc/ntp.conf to a temporary file,
modify that, and start ntpd with the -c option.

After the test is finished, restart ntpd with the default config (if it
was running) via service ntpd restart.

Also I don't see whether your script reports the content of

/sys/devices/system/clocksource/clocksource0/current_clocksource

On the guest? Its important that information is displayed on the test
report.

Looks fine to me other than that, but the kvm-autotest guys probably
have more comments.

Thanks
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff -urN kvm_runtest_2.bak/cpu_stress.c kvm_runtest_2/cpu_stress.c
--- kvm_runtest_2.bak/cpu_stress.c	1969-12-31 19:00:00.000000000 -0500
+++ kvm_runtest_2/cpu_stress.c	2009-05-05 22:35:34.000000000 -0400
@@ -0,0 +1,61 @@ 
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <math.h>
+#include <unistd.h>
+
+#define MAX_CPUS 256
+#define BUFFSIZE 1024
+
+
+void worker_child(int cpu)
+{
+	int cur_freq;
+	int min_freq;
+	int max_freq;
+	int last_freq;
+	cpu_set_t mask;
+	int i;
+	double x;
+        int d = 0;
+	/*
+	 * bind this thread to the specified cpu 
+	 */
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	sched_setaffinity(0, CPU_SETSIZE, &mask);
+
+	while (d++ != 500000) {
+			for (i=0; i<100000; i++)
+				x = sqrt(x);
+	}
+
+	_exit(0);
+
+}
+
+
+main() {
+	cpu_set_t mask;
+	int i;
+	int code;
+
+	if (sched_getaffinity(0, CPU_SETSIZE, &mask) < 0){
+		perror ("sched_getaffinity");
+		exit(1);
+	}
+
+	for (i=0; i<CPU_SETSIZE; i++)
+		if (CPU_ISSET(i, &mask)){
+			printf ("CPU%d\n",i);
+			if (fork() == 0)
+				worker_child(i);
+		}
+
+
+	wait(&code);
+	exit (WEXITSTATUS(code));
+}
diff -urN kvm_runtest_2.bak/kvm_runtest_2.py kvm_runtest_2/kvm_runtest_2.py
--- kvm_runtest_2.bak/kvm_runtest_2.py	2009-04-29 06:17:29.000000000 -0400
+++ kvm_runtest_2/kvm_runtest_2.py	2009-04-29 08:06:32.000000000 -0400
@@ -36,6 +36,8 @@ 
                 "autotest":     test_routine("kvm_tests",           "run_autotest"),
                 "kvm_install":  test_routine("kvm_install",         "run_kvm_install"),
                 "linux_s3":     test_routine("kvm_tests",           "run_linux_s3"),
+                "ntp_server_setup": test_routine("kvm_tests",       "run_ntp_server_setup"),
+                "timedrift":    test_routine("kvm_tests",           "run_timedrift"),
                 }
 
         # Make it possible to import modules from the test's bindir
diff -urN kvm_runtest_2.bak/kvm_tests.cfg.sample kvm_runtest_2/kvm_tests.cfg.sample
--- kvm_runtest_2.bak/kvm_tests.cfg.sample	2009-04-29 06:17:29.000000000 -0400
+++ kvm_runtest_2/kvm_tests.cfg.sample	2009-04-29 08:09:36.000000000 -0400
@@ -81,6 +81,10 @@ 
     - linux_s3:      install setup
         type = linux_s3
 
+    - ntp_server_setup:
+        type = ntp_server_setup
+    - timedrift:      ntp_server_setup
+        type = timedrift
 # NICs
 variants:
     - @rtl8139:
diff -urN kvm_runtest_2.bak/kvm_tests.py kvm_runtest_2/kvm_tests.py
--- kvm_runtest_2.bak/kvm_tests.py	2009-04-29 06:17:29.000000000 -0400
+++ kvm_runtest_2/kvm_tests.py	2009-05-05 23:45:57.000000000 -0400
@@ -394,3 +394,235 @@ 
     kvm_log.info("VM resumed after S3")
 
     session.close()
+
+def run_ntp_server_setup(test, params, env):
+    
+    """NTP server configuration and related network file modification
+    """
+    kvm_log.debug("run ntp server setup")
+    status = 1
+    # stop firewall for NTP server if it is running.
+    status = os.system("/etc/init.d/iptables status")
+    if status == 0:
+        os.system("/etc/init.d/iptables stop")
+        status = 1
+
+    # prevent dhcp client modify the ntp.conf
+    kvm_log.info("prevent dhcp client modify the ntp.conf")
+
+    config_file = "/etc/sysconfig/network"
+    network_file = open("/etc/sysconfig/network", "a")
+    string = "PEERNTP=no"
+
+    if os.system("grep %s %s" % (string, config_file)):
+        network_file.writelines(str(string)+'\n')
+    
+    network_file.close()
+  
+    # start ntp server on host
+    kvm_log.info("backup ntp config file")
+
+    ntp_filename = os.path.expanduser("/etc/ntp.conf")
+    # backup ntp config file
+    backup_bootloader_filename = ntp_filename + "_backup"
+    if os.path.exists(ntp_filename):
+        os.rename(ntp_filename, backup_bootloader_filename)
+    
+    status = os.system("/etc/init.d/ntpd status")
+    if status == 0:
+        os.system("/etc/init.d/ntpd stop")
+        status = 1    
+
+    kvm_log.info("start ntp server on host")
+
+    ntp_cmd = '''
+        echo "restrict default kod nomodify notrap nopeer noquery" >> /etc/ntp.conf;\
+        echo "restrict 127.0.0.1" >> /etc/ntp.conf;\
+        echo "driftfile /var/lib/ntp/drift" >> /etc/ntp.conf;\
+        echo "keys /etc/ntp/keys" >> /etc/ntp.conf;\
+        echo "server 127.127.1.0" >> /etc/ntp.conf;\
+        echo "fudge 127.127.1.0 stratum 1" >> /etc/ntp.conf;\
+        service ntpd start;
+        '''
+    status = os.system(ntp_cmd)
+    if status != 0:
+        raise error.TestFail, "NTP server has not starting correct..."
+
+def run_timedrift(test, params, env):
+    """judge wether the guest clock will encounter timedrift prblem or not. including three stages:
+       1: try to sync the clock with host, if the offset value of guest clock is large than 1 sec.
+       2: running the cpu stress testing program<cpu_stress.c> on guest
+       3: then run analyze loop totally 20 times to determine if the clock on guest has time drift.
+    """
+    # variables using in timedrift testcase
+    cpu_stress_program = "cpu_stress.c"
+    remote_dir = '/root'
+
+    pwd = os.path.join(os.environ['AUTODIR'],'tests/kvm_runtest_2')
+    cpu_stress_test = os.path.join(pwd, cpu_stress_program)
+    cpu_stress_cmdline = 'cd %s;gcc %s -lm;./a.out &' % (remote_dir, os.path.basename(cpu_stress_test))
+
+    cpu_stress_search_cmdline = "ps -ef|grep 'a.out'|grep -v grep"
+
+    hostname = os.environ.get("HOSTNAME")
+
+    kvm_log.info("get host name :%s" % hostname)
+
+    # ntpdate info command and ntpdate sync command
+    ntpdate_info_cmd = "ntpdate -q" + " " + hostname
+    ntpdate_sync_cmd = "ntpdate" + " " + hostname
+
+    # get vm handle
+    vm = kvm_utils.env_get_vm(env,params.get("main_vm"))
+    if not vm:
+        raise error.TestError, "VM object not found in environment"
+    if not vm.is_alive():
+        raise error.TestError, "VM seems to be dead; Test requires a living VM"
+
+    kvm_log.info("Waiting for guest to be up...")
+
+    pxssh = kvm_utils.wait_for(vm.ssh_login, 240, 0, 2)
+    if not pxssh:
+        raise error.TestFail, "Could not log into guest"
+
+    kvm_log.info("Logged into guest IN run_timedrift function.")
+
+    # helper function: 
+    # ntpdate_op: a entire process to get ntpdate command line result from guest.
+    # time_drift_or_not: get the numeric handing by regular expression and make timedrift calulation.
+    def ntpdate_op(command):
+        output = []
+        try:
+            pxssh = kvm_utils.wait_for(vm.ssh_login, 240, 0, 2)
+            if not pxssh:
+                raise error.TestFail, "Could not log into guest"
+
+            kvm_log.info("Logged in:(ntpdate_op)")
+
+            while True:
+                pxssh.sendline(command)
+                #output = pxssh.read_nonblocking(1.0)
+                s, output = pxssh.read_up_to_prompt()
+                if "time server" in output:
+                    # output is a string contain the (ntpdate -q) infor on guest
+                    return True, output
+                else:
+                    continue
+        except:
+            pxssh.close()
+            return False, output
+        return False, output
+
+    def time_drift_or_not(output):
+        date_string = re.findall(r'offset [+-]?(.*) sec', output, re.M)
+        num = float(date_string[0])
+        if num >= 1:
+            kvm_log.info("guest clock has drifted in this scenario :%s %s" % (date_string, num))
+            return False
+        else:
+            kvm_log.info("guest clock running veracious in now stage :%s %s" % (date_string, num))
+            return True
+
+    # send the command and get the ouput from guest
+    # this loop will pick out several conditions need to be process
+    # Actually, we want to get the info match "time server", then script can analyzing it to
+    # determine if guest's clock need sync with host or not.
+    while True:
+        pxssh.sendline(ntpdate_info_cmd)
+        #output = pxssh.read_nonblocking(1.0)
+        s, output = pxssh.read_up_to_prompt()
+        if ("no server suitable" not in output) and ("time server" not in output):
+            kvm_log.info("Very creazying output got. let's try again")
+            continue
+        elif "no server suitable" in output:
+            kvm_log.info("Seems NTP server is not ready for servicing")
+            time.sleep(30)
+            continue
+        elif "time server" in output:
+            # get the ntpdate info from guest
+            kvm_log.info("Got the correct output for analyze. The output is below: \n%s" %output) 
+            break
+
+    kvm_log.info("get the ntpdate infomation from guest successfully :%s" % os.popen('date').read())
+
+    # judge the clock need to sync with host or not
+    while True:
+        date_string = re.findall(r'offset [+-]?(.*) sec', output, re.M)
+        num = float(date_string[0])
+        if num >= 1:
+            kvm_log.info("guest need sync with the server: %s" % hostname)
+            s, output = ntpdate_op(ntpdate_sync_cmd)
+            if s:
+                continue
+        else:
+            #pxssh.sendline("hwclock --systohc")
+            #kvm_log.info("guest clock sync prcdure is finished. then sync the guest clock to guest bios.")
+
+            #pxssh.sendline("hwclock --show")
+            #s, o = pxssh.read_up_to_prompt()
+            #kvm_log.info("the date infomation get from guest bios is :\n%s" % o)
+
+            pxssh.sendline(ntpdate_info_cmd)
+            s, o = pxssh.read_up_to_prompt()
+            kvm_log.info("guest clock after sync with host is :\n%s" % o)
+
+            break
+
+    kvm_log.info("Timedrift Preparation *Finished* at last :%s" % os.popen('date').read())
+
+    if not vm.scp_to_remote(cpu_stress_test, remote_dir):
+        raise error.TestError, "Could not copy program to guest."
+
+    pxssh.sendline(ntpdate_info_cmd)
+    s, o = pxssh.read_up_to_prompt()
+    kvm_log.info("the ntpdate query from host *BEFORE* running the cpu stress program.\n%s" % o)
+    pxssh.sendline(cpu_stress_cmdline)
+    s, o = pxssh.read_up_to_prompt()
+    kvm_log.info("running command line on guest and sleeping for 1200 secs.\n%s" % o)
+
+    time.sleep(1200)
+
+    while True:
+        #if not pxssh.send_command(cpu_stress_search_cmdline):
+        if pxssh.get_command_status(cpu_stress_search_cmdline):
+            #(s, o) = pxssh.get_command_status_output(cpu_stress_search_cmdline)
+            #print "s is :%s" % s
+            #print "o is :%s" % o
+            #print "--------------------------------------------"
+            #aaa = pxssh.get_command_status(cpu_stress_search_cmdline)
+            #print "aaa is :%s" % aaa
+            #print "--------------------------------------------"
+
+            kvm_log.info("stress testing process has been completed and quit.")
+            break
+        else:
+            kvm_log.info("stress testing on CPU has not finished yet.waiting for next detect after sleep 60 secs.")
+            time.sleep(60)
+            continue
+
+    pxssh.sendline(ntpdate_info_cmd)
+    s, o = pxssh.read_up_to_prompt()
+    kvm_log.info("the ntpdate query from host *AFTER* running the cpu stress program.\n%s" % o)
+
+    pxssh.close()
+
+    # Sleep for analyze...
+    kvm_log.info("sleeping(180 secs) Starting... :%s" % os.popen('date').read())
+    time.sleep(180)
+    kvm_log.info("wakeup to get the analyzing... :%s" % os.popen('date').read())
+    count = 0
+    for i in range(1, 21):
+        kvm_log.info("this is %s time to get clock info from guest." % i)
+        s, o = ntpdate_op(ntpdate_info_cmd)
+        
+        if not s:
+            raise error.TestFail, "Guest seems hang or ssh service based on guest has been crash down"
+        
+        if not time_drift_or_not(o):
+            count += 1
+
+        if count == 5:
+            raise error.TestFail, "TimeDrift testing Abort because guest's clock has drift too much"
+
+        kvm_log.info("*********************** Sleep 30 seconds for next loop *************************")
+        time.sleep(60)