diff mbox

[15/16] intel_l3_parity: Support error injection

Message ID 1379050122-12774-16-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Sept. 13, 2013, 5:28 a.m. UTC
Haswell added the ability to inject errors which is extremely useful for
testing. Add two arguments to the tool to inject, and uninject.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 tests/sysfs_l3_parity   |  2 +-
 tools/intel_l3_parity.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 68 insertions(+), 3 deletions(-)

Comments

Daniel Vetter Sept. 13, 2013, 9:12 a.m. UTC | #1
On Thu, Sep 12, 2013 at 10:28:41PM -0700, Ben Widawsky wrote:
> Haswell added the ability to inject errors which is extremely useful for
> testing. Add two arguments to the tool to inject, and uninject.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

Do we run any risk that a concurrent write/read to the same register range
could hang the machine due to the same-cacheline w/a we need? Just want to
make sure that when we integrate this into a testcase there's no surprises
like with intel_gpu_top ...
-Daniel

> ---
>  tests/sysfs_l3_parity   |  2 +-
>  tools/intel_l3_parity.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 68 insertions(+), 3 deletions(-)
> 
> diff --git a/tests/sysfs_l3_parity b/tests/sysfs_l3_parity
> index a0dfad9..e9d4411 100755
> --- a/tests/sysfs_l3_parity
> +++ b/tests/sysfs_l3_parity
> @@ -21,7 +21,7 @@ fi
>  $SOURCE_DIR/../tools/intel_l3_parity -r 0 -b 0 -s 0 -e
>  
>  #Check that we can clear remaps
> -if [ `$SOURCE_DIR/../tools/intel_l3_parity -l | wc -c` != "0" ] ; then
> +if [ `$SOURCE_DIR/../tools/intel_l3_parity -l | wc -l` != 1 ] ; then
>  	echo "Fail 2"
>  	exit 1
>  fi
> diff --git a/tools/intel_l3_parity.c b/tools/intel_l3_parity.c
> index cf15541..cd6754e 100644
> --- a/tools/intel_l3_parity.c
> +++ b/tools/intel_l3_parity.c
> @@ -79,6 +79,20 @@ static int which_slice = -1;
>  			(__i) < ((which_slice == -1) ? MAX_SLICES : (which_slice + 1)); \
>  			(__i)++)
>  
> +static void decode_dft(uint32_t dft)
> +{
> +	if (IS_IVYBRIDGE(devid) || !(dft & 1)) {
> +		printf("Error injection disabled\n");
> +		return;
> +	}
> +	printf("Error injection enabled\n");
> +	printf("  Hang = %s\n", (dft >> 28) & 0x1 ? "yes" : "no");
> +	printf("  Row = %d\n", (dft >> 7) & 0x7ff);
> +	printf("  Bank = %d\n", (dft >> 2) & 0x3);
> +	printf("  Subbank = %d\n", (dft >> 4) & 0x7);
> +	printf("  Slice = %d\n", (dft >> 1) & 0x1);
> +}
> +
>  static void dumpit(int slice)
>  {
>  	int i, j;
> @@ -150,7 +164,9 @@ static void usage(const char *name)
>  		"  -l, --list				List the current L3 logs\n"
>  		"  -a, --clear-all			Clear all disabled rows\n"
>  		"  -e, --enable				Enable row, bank, subbank (undo -d)\n"
> -		"  -d, --disable=<row,bank,subbank>	Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n",
> +		"  -d, --disable=<row,bank,subbank>	Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n"
> +		"  -i, --inject				[HSW only] Cause hardware to inject a row errors\n"
> +		"  -u, --uninject			[HSW only] Turn off hardware error injectection (undo -i)\n",
>  		name);
>  }
>  
> @@ -158,6 +174,7 @@ int main(int argc, char *argv[])
>  {
>  	const int device = drm_get_card();
>  	char *path[REAL_MAX_SLICES];
> +	uint32_t dft;
>  	int row = 0, bank = 0, sbank = 0;
>  	int fd[REAL_MAX_SLICES] = {0}, ret, i;
>  	int action = '0';
> @@ -167,6 +184,8 @@ int main(int argc, char *argv[])
>  	if (intel_gen(devid) < 7)
>  		exit(EXIT_SUCCESS);
>  
> +	assert(intel_register_access_init(intel_get_pci_device(), 0) == 0);
> +
>  	ret = asprintf(&path[0], "/sys/class/drm/card%d/l3_parity", device);
>  	assert(ret != -1);
>  	ret = asprintf(&path[1], "/sys/class/drm/card%d/l3_parity_slice_1", device);
> @@ -183,6 +202,7 @@ int main(int argc, char *argv[])
>  		assert(lseek(fd[i], 0, SEEK_SET) == 0);
>  	}
>  
> +	dft = intel_register_read(0xb038);
>  
>  	while (1) {
>  		int c, option_index = 0;
> @@ -192,6 +212,8 @@ int main(int argc, char *argv[])
>  			{ "clear-all", no_argument, 0, 'a' },
>  			{ "enable", no_argument, 0, 'e' },
>  			{ "disable", optional_argument, 0, 'd' },
> +			{ "inject", no_argument, 0, 'i' },
> +			{ "uninject", no_argument, 0, 'u' },
>  			{ "hw-info", no_argument, 0, 'H' },
>  			{ "row", required_argument, 0, 'r' },
>  			{ "bank", required_argument, 0, 'b' },
> @@ -200,7 +222,7 @@ int main(int argc, char *argv[])
>  			{0, 0, 0, 0}
>  		};
>  
> -		c = getopt_long(argc, argv, "hHr:b:s:w:aled::", long_options,
> +		c = getopt_long(argc, argv, "hHr:b:s:w:aled::iu", long_options,
>  				&option_index);
>  		if (c == -1)
>  			break;
> @@ -215,6 +237,7 @@ int main(int argc, char *argv[])
>  				printf("Number of banks: %d\n", num_banks());
>  				printf("Subbanks per bank: %d\n", NUM_SUBBANKS);
>  				printf("Max L3 size: %dK\n", L3_SIZE >> 10);
> +				printf("Has error injection: %s\n", IS_HASWELL(devid) ? "yes" : "no");
>  				exit(EXIT_SUCCESS);
>  			case 'r':
>  				row = atoi(optarg);
> @@ -236,6 +259,12 @@ int main(int argc, char *argv[])
>  				if (which_slice >= MAX_SLICES)
>  					exit(EXIT_FAILURE);
>  				break;
> +			case 'i':
> +			case 'u':
> +				if (!IS_HASWELL(devid)) {
> +					fprintf(stderr, "Error injection supported on HSW+ only\n");
> +					exit(EXIT_FAILURE);
> +				}
>  			case 'd':
>  				if (optarg) {
>  					ret = sscanf(optarg, "%d,%d,%d", &row, &bank, &sbank);
> @@ -256,6 +285,23 @@ int main(int argc, char *argv[])
>  		}
>  	}
>  
> +	if (action == 'i') {
> +		if (((dft >> 1) & 1) != which_slice) {
> +			fprintf(stderr, "DFT register already has slice %d enabled, and we don't support multiple slices. Try modifying -w; but sometimes the register sticks in the wrong way\n", (dft >> 1) & 1);
> +			exit(EXIT_FAILURE);
> +		}
> +
> +		if (which_slice == -1) {
> +			fprintf(stderr, "Cannot inject errors to multiple slices (modify -w)\n");
> +			exit(EXIT_FAILURE);
> +		}
> +		if (dft & 1 && ((dft >> 1) && 1) == which_slice)
> +			printf("warning: overwriting existing injections. This is very dangerous.\n");
> +	}
> +
> +	if (action == 'l')
> +		decode_dft(dft);
> +
>  	/* Per slice operations */
>  	for_each_slice(i) {
>  		switch (action) {
> @@ -271,11 +317,30 @@ int main(int argc, char *argv[])
>  			case 'd':
>  				assert(disable_rbs(row, bank, sbank, i) == 0);
>  				break;
> +			case 'i':
> +				if (bank == 3) {
> +					fprintf(stderr, "The hardware does not support error inject on bank 3.\n");
> +					exit(EXIT_FAILURE);
> +				}
> +				dft |= row << 7;
> +				dft |= sbank << 4;
> +				dft |= bank << 2;
> +				assert(i < 2);
> +				dft |= i << 1; /* slice */
> +				dft |= 1 << 0; /* enable */
> +				intel_register_write(0xb038, dft);
> +				break;
> +			case 'u':
> +				intel_register_write(0xb038, dft & ~(1<<0));
> +				break;
> +			case 'L':
> +				break;
>  			default:
>  				abort();
>  		}
>  	}
>  
> +	intel_register_access_fini();
>  	if (action == 'l')
>  		exit(EXIT_SUCCESS);
>  
> -- 
> 1.8.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Ben Widawsky Sept. 13, 2013, 3:54 p.m. UTC | #2
On Fri, Sep 13, 2013 at 11:12:11AM +0200, Daniel Vetter wrote:
> On Thu, Sep 12, 2013 at 10:28:41PM -0700, Ben Widawsky wrote:
> > Haswell added the ability to inject errors which is extremely useful for
> > testing. Add two arguments to the tool to inject, and uninject.
> > 
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> 
> Do we run any risk that a concurrent write/read to the same register range
> could hang the machine due to the same-cacheline w/a we need? Just want to
> make sure that when we integrate this into a testcase there's no surprises
> like with intel_gpu_top ...
> -Daniel

The race against the kernel is ever present on all tests/tools. Are we
running parallel igt yet? If so, I can make the read/write functions
threadsafe.

On this note in particular I suppose we can make a debugfs entry like
the forcewake one to allow user space to do register accesses.

Interestingly, this also reminds me of another caveat I meant to put in
the commit message and forgot... the error injection register is also
per context, which makes it a pain to clear (and the pain in writing the
test case). I'm even beginning to think maybe a debugfs for this
register is the way to go.

As a side note, the injection feature is entirely debug only - but
agreed, random hangs in the test suite is not good.

[snip]

> 
> > ---
> >  tests/sysfs_l3_parity   |  2 +-
> >  tools/intel_l3_parity.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++--
> >  2 files changed, 68 insertions(+), 3 deletions(-)
> > 
> > diff --git a/tests/sysfs_l3_parity b/tests/sysfs_l3_parity
> > index a0dfad9..e9d4411 100755
> > --- a/tests/sysfs_l3_parity
> > +++ b/tests/sysfs_l3_parity
> > @@ -21,7 +21,7 @@ fi
> >  $SOURCE_DIR/../tools/intel_l3_parity -r 0 -b 0 -s 0 -e
> >  
> >  #Check that we can clear remaps
> > -if [ `$SOURCE_DIR/../tools/intel_l3_parity -l | wc -c` != "0" ] ; then
> > +if [ `$SOURCE_DIR/../tools/intel_l3_parity -l | wc -l` != 1 ] ; then
> >  	echo "Fail 2"
> >  	exit 1
> >  fi
> > diff --git a/tools/intel_l3_parity.c b/tools/intel_l3_parity.c
> > index cf15541..cd6754e 100644
> > --- a/tools/intel_l3_parity.c
> > +++ b/tools/intel_l3_parity.c
> > @@ -79,6 +79,20 @@ static int which_slice = -1;
> >  			(__i) < ((which_slice == -1) ? MAX_SLICES : (which_slice + 1)); \
> >  			(__i)++)
> >  
> > +static void decode_dft(uint32_t dft)
> > +{
> > +	if (IS_IVYBRIDGE(devid) || !(dft & 1)) {
> > +		printf("Error injection disabled\n");
> > +		return;
> > +	}
> > +	printf("Error injection enabled\n");
> > +	printf("  Hang = %s\n", (dft >> 28) & 0x1 ? "yes" : "no");
> > +	printf("  Row = %d\n", (dft >> 7) & 0x7ff);
> > +	printf("  Bank = %d\n", (dft >> 2) & 0x3);
> > +	printf("  Subbank = %d\n", (dft >> 4) & 0x7);
> > +	printf("  Slice = %d\n", (dft >> 1) & 0x1);
> > +}
> > +
> >  static void dumpit(int slice)
> >  {
> >  	int i, j;
> > @@ -150,7 +164,9 @@ static void usage(const char *name)
> >  		"  -l, --list				List the current L3 logs\n"
> >  		"  -a, --clear-all			Clear all disabled rows\n"
> >  		"  -e, --enable				Enable row, bank, subbank (undo -d)\n"
> > -		"  -d, --disable=<row,bank,subbank>	Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n",
> > +		"  -d, --disable=<row,bank,subbank>	Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n"
> > +		"  -i, --inject				[HSW only] Cause hardware to inject a row errors\n"
> > +		"  -u, --uninject			[HSW only] Turn off hardware error injectection (undo -i)\n",
> >  		name);
> >  }
> >  
> > @@ -158,6 +174,7 @@ int main(int argc, char *argv[])
> >  {
> >  	const int device = drm_get_card();
> >  	char *path[REAL_MAX_SLICES];
> > +	uint32_t dft;
> >  	int row = 0, bank = 0, sbank = 0;
> >  	int fd[REAL_MAX_SLICES] = {0}, ret, i;
> >  	int action = '0';
> > @@ -167,6 +184,8 @@ int main(int argc, char *argv[])
> >  	if (intel_gen(devid) < 7)
> >  		exit(EXIT_SUCCESS);
> >  
> > +	assert(intel_register_access_init(intel_get_pci_device(), 0) == 0);
> > +
> >  	ret = asprintf(&path[0], "/sys/class/drm/card%d/l3_parity", device);
> >  	assert(ret != -1);
> >  	ret = asprintf(&path[1], "/sys/class/drm/card%d/l3_parity_slice_1", device);
> > @@ -183,6 +202,7 @@ int main(int argc, char *argv[])
> >  		assert(lseek(fd[i], 0, SEEK_SET) == 0);
> >  	}
> >  
> > +	dft = intel_register_read(0xb038);
> >  
> >  	while (1) {
> >  		int c, option_index = 0;
> > @@ -192,6 +212,8 @@ int main(int argc, char *argv[])
> >  			{ "clear-all", no_argument, 0, 'a' },
> >  			{ "enable", no_argument, 0, 'e' },
> >  			{ "disable", optional_argument, 0, 'd' },
> > +			{ "inject", no_argument, 0, 'i' },
> > +			{ "uninject", no_argument, 0, 'u' },
> >  			{ "hw-info", no_argument, 0, 'H' },
> >  			{ "row", required_argument, 0, 'r' },
> >  			{ "bank", required_argument, 0, 'b' },
> > @@ -200,7 +222,7 @@ int main(int argc, char *argv[])
> >  			{0, 0, 0, 0}
> >  		};
> >  
> > -		c = getopt_long(argc, argv, "hHr:b:s:w:aled::", long_options,
> > +		c = getopt_long(argc, argv, "hHr:b:s:w:aled::iu", long_options,
> >  				&option_index);
> >  		if (c == -1)
> >  			break;
> > @@ -215,6 +237,7 @@ int main(int argc, char *argv[])
> >  				printf("Number of banks: %d\n", num_banks());
> >  				printf("Subbanks per bank: %d\n", NUM_SUBBANKS);
> >  				printf("Max L3 size: %dK\n", L3_SIZE >> 10);
> > +				printf("Has error injection: %s\n", IS_HASWELL(devid) ? "yes" : "no");
> >  				exit(EXIT_SUCCESS);
> >  			case 'r':
> >  				row = atoi(optarg);
> > @@ -236,6 +259,12 @@ int main(int argc, char *argv[])
> >  				if (which_slice >= MAX_SLICES)
> >  					exit(EXIT_FAILURE);
> >  				break;
> > +			case 'i':
> > +			case 'u':
> > +				if (!IS_HASWELL(devid)) {
> > +					fprintf(stderr, "Error injection supported on HSW+ only\n");
> > +					exit(EXIT_FAILURE);
> > +				}
> >  			case 'd':
> >  				if (optarg) {
> >  					ret = sscanf(optarg, "%d,%d,%d", &row, &bank, &sbank);
> > @@ -256,6 +285,23 @@ int main(int argc, char *argv[])
> >  		}
> >  	}
> >  
> > +	if (action == 'i') {
> > +		if (((dft >> 1) & 1) != which_slice) {
> > +			fprintf(stderr, "DFT register already has slice %d enabled, and we don't support multiple slices. Try modifying -w; but sometimes the register sticks in the wrong way\n", (dft >> 1) & 1);
> > +			exit(EXIT_FAILURE);
> > +		}
> > +
> > +		if (which_slice == -1) {
> > +			fprintf(stderr, "Cannot inject errors to multiple slices (modify -w)\n");
> > +			exit(EXIT_FAILURE);
> > +		}
> > +		if (dft & 1 && ((dft >> 1) && 1) == which_slice)
> > +			printf("warning: overwriting existing injections. This is very dangerous.\n");
> > +	}
> > +
> > +	if (action == 'l')
> > +		decode_dft(dft);
> > +
> >  	/* Per slice operations */
> >  	for_each_slice(i) {
> >  		switch (action) {
> > @@ -271,11 +317,30 @@ int main(int argc, char *argv[])
> >  			case 'd':
> >  				assert(disable_rbs(row, bank, sbank, i) == 0);
> >  				break;
> > +			case 'i':
> > +				if (bank == 3) {
> > +					fprintf(stderr, "The hardware does not support error inject on bank 3.\n");
> > +					exit(EXIT_FAILURE);
> > +				}
> > +				dft |= row << 7;
> > +				dft |= sbank << 4;
> > +				dft |= bank << 2;
> > +				assert(i < 2);
> > +				dft |= i << 1; /* slice */
> > +				dft |= 1 << 0; /* enable */
> > +				intel_register_write(0xb038, dft);
> > +				break;
> > +			case 'u':
> > +				intel_register_write(0xb038, dft & ~(1<<0));
> > +				break;
> > +			case 'L':
> > +				break;
> >  			default:
> >  				abort();
> >  		}
> >  	}
> >  
> > +	intel_register_access_fini();
> >  	if (action == 'l')
> >  		exit(EXIT_SUCCESS);
> >  
> > -- 
> > 1.8.4
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
Daniel Vetter Sept. 13, 2013, 4:14 p.m. UTC | #3
On Fri, Sep 13, 2013 at 5:54 PM, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Fri, Sep 13, 2013 at 11:12:11AM +0200, Daniel Vetter wrote:
>> On Thu, Sep 12, 2013 at 10:28:41PM -0700, Ben Widawsky wrote:
>> > Haswell added the ability to inject errors which is extremely useful for
>> > testing. Add two arguments to the tool to inject, and uninject.
>> >
>> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
>>
>> Do we run any risk that a concurrent write/read to the same register range
>> could hang the machine due to the same-cacheline w/a we need? Just want to
>> make sure that when we integrate this into a testcase there's no surprises
>> like with intel_gpu_top ...
>> -Daniel
>
> The race against the kernel is ever present on all tests/tools. Are we
> running parallel igt yet? If so, I can make the read/write functions
> threadsafe.
>
> On this note in particular I suppose we can make a debugfs entry like
> the forcewake one to allow user space to do register accesses.
>
> Interestingly, this also reminds me of another caveat I meant to put in
> the commit message and forgot... the error injection register is also
> per context, which makes it a pain to clear (and the pain in writing the
> test case). I'm even beginning to think maybe a debugfs for this
> register is the way to go.
>
> As a side note, the injection feature is entirely debug only - but
> agreed, random hangs in the test suite is not good.

Hm, this will be tricky. If nothing else writes this range (i.e. not
our interrupt handler) we could use a secure batchbuffer and emit the
MI_LRI from the userspace batch. Then we could submit some workload
using hw contexts that uses the l3$ cache (I guess without something
in there it won't notice the injected error) and after the error is
detected we could simply kill the context, restoring the original
state again.
-Daniel
Ben Widawsky Sept. 13, 2013, 4:29 p.m. UTC | #4
On Fri, Sep 13, 2013 at 06:14:38PM +0200, Daniel Vetter wrote:
> On Fri, Sep 13, 2013 at 5:54 PM, Ben Widawsky <ben@bwidawsk.net> wrote:
> > On Fri, Sep 13, 2013 at 11:12:11AM +0200, Daniel Vetter wrote:
> >> On Thu, Sep 12, 2013 at 10:28:41PM -0700, Ben Widawsky wrote:
> >> > Haswell added the ability to inject errors which is extremely useful for
> >> > testing. Add two arguments to the tool to inject, and uninject.
> >> >
> >> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> >>
> >> Do we run any risk that a concurrent write/read to the same register range
> >> could hang the machine due to the same-cacheline w/a we need? Just want to
> >> make sure that when we integrate this into a testcase there's no surprises
> >> like with intel_gpu_top ...
> >> -Daniel
> >
> > The race against the kernel is ever present on all tests/tools. Are we
> > running parallel igt yet? If so, I can make the read/write functions
> > threadsafe.
> >
> > On this note in particular I suppose we can make a debugfs entry like
> > the forcewake one to allow user space to do register accesses.
> >
> > Interestingly, this also reminds me of another caveat I meant to put in
> > the commit message and forgot... the error injection register is also
> > per context, which makes it a pain to clear (and the pain in writing the
> > test case). I'm even beginning to think maybe a debugfs for this
> > register is the way to go.
> >
> > As a side note, the injection feature is entirely debug only - but
> > agreed, random hangs in the test suite is not good.
> 
> Hm, this will be tricky. If nothing else writes this range (i.e. not
> our interrupt handler) we could use a secure batchbuffer and emit the
> MI_LRI from the userspace batch. Then we could submit some workload
> using hw contexts that uses the l3$ cache (I guess without something
> in there it won't notice the injected error) and after the error is
> detected we could simply kill the context, restoring the original
> state again.
> -Daniel

Actually, I don't think there is anything else used in the cacheline of
the error injection register which are accessed after driver load.
diff mbox

Patch

diff --git a/tests/sysfs_l3_parity b/tests/sysfs_l3_parity
index a0dfad9..e9d4411 100755
--- a/tests/sysfs_l3_parity
+++ b/tests/sysfs_l3_parity
@@ -21,7 +21,7 @@  fi
 $SOURCE_DIR/../tools/intel_l3_parity -r 0 -b 0 -s 0 -e
 
 #Check that we can clear remaps
-if [ `$SOURCE_DIR/../tools/intel_l3_parity -l | wc -c` != "0" ] ; then
+if [ `$SOURCE_DIR/../tools/intel_l3_parity -l | wc -l` != 1 ] ; then
 	echo "Fail 2"
 	exit 1
 fi
diff --git a/tools/intel_l3_parity.c b/tools/intel_l3_parity.c
index cf15541..cd6754e 100644
--- a/tools/intel_l3_parity.c
+++ b/tools/intel_l3_parity.c
@@ -79,6 +79,20 @@  static int which_slice = -1;
 			(__i) < ((which_slice == -1) ? MAX_SLICES : (which_slice + 1)); \
 			(__i)++)
 
+static void decode_dft(uint32_t dft)
+{
+	if (IS_IVYBRIDGE(devid) || !(dft & 1)) {
+		printf("Error injection disabled\n");
+		return;
+	}
+	printf("Error injection enabled\n");
+	printf("  Hang = %s\n", (dft >> 28) & 0x1 ? "yes" : "no");
+	printf("  Row = %d\n", (dft >> 7) & 0x7ff);
+	printf("  Bank = %d\n", (dft >> 2) & 0x3);
+	printf("  Subbank = %d\n", (dft >> 4) & 0x7);
+	printf("  Slice = %d\n", (dft >> 1) & 0x1);
+}
+
 static void dumpit(int slice)
 {
 	int i, j;
@@ -150,7 +164,9 @@  static void usage(const char *name)
 		"  -l, --list				List the current L3 logs\n"
 		"  -a, --clear-all			Clear all disabled rows\n"
 		"  -e, --enable				Enable row, bank, subbank (undo -d)\n"
-		"  -d, --disable=<row,bank,subbank>	Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n",
+		"  -d, --disable=<row,bank,subbank>	Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n"
+		"  -i, --inject				[HSW only] Cause hardware to inject a row errors\n"
+		"  -u, --uninject			[HSW only] Turn off hardware error injectection (undo -i)\n",
 		name);
 }
 
@@ -158,6 +174,7 @@  int main(int argc, char *argv[])
 {
 	const int device = drm_get_card();
 	char *path[REAL_MAX_SLICES];
+	uint32_t dft;
 	int row = 0, bank = 0, sbank = 0;
 	int fd[REAL_MAX_SLICES] = {0}, ret, i;
 	int action = '0';
@@ -167,6 +184,8 @@  int main(int argc, char *argv[])
 	if (intel_gen(devid) < 7)
 		exit(EXIT_SUCCESS);
 
+	assert(intel_register_access_init(intel_get_pci_device(), 0) == 0);
+
 	ret = asprintf(&path[0], "/sys/class/drm/card%d/l3_parity", device);
 	assert(ret != -1);
 	ret = asprintf(&path[1], "/sys/class/drm/card%d/l3_parity_slice_1", device);
@@ -183,6 +202,7 @@  int main(int argc, char *argv[])
 		assert(lseek(fd[i], 0, SEEK_SET) == 0);
 	}
 
+	dft = intel_register_read(0xb038);
 
 	while (1) {
 		int c, option_index = 0;
@@ -192,6 +212,8 @@  int main(int argc, char *argv[])
 			{ "clear-all", no_argument, 0, 'a' },
 			{ "enable", no_argument, 0, 'e' },
 			{ "disable", optional_argument, 0, 'd' },
+			{ "inject", no_argument, 0, 'i' },
+			{ "uninject", no_argument, 0, 'u' },
 			{ "hw-info", no_argument, 0, 'H' },
 			{ "row", required_argument, 0, 'r' },
 			{ "bank", required_argument, 0, 'b' },
@@ -200,7 +222,7 @@  int main(int argc, char *argv[])
 			{0, 0, 0, 0}
 		};
 
-		c = getopt_long(argc, argv, "hHr:b:s:w:aled::", long_options,
+		c = getopt_long(argc, argv, "hHr:b:s:w:aled::iu", long_options,
 				&option_index);
 		if (c == -1)
 			break;
@@ -215,6 +237,7 @@  int main(int argc, char *argv[])
 				printf("Number of banks: %d\n", num_banks());
 				printf("Subbanks per bank: %d\n", NUM_SUBBANKS);
 				printf("Max L3 size: %dK\n", L3_SIZE >> 10);
+				printf("Has error injection: %s\n", IS_HASWELL(devid) ? "yes" : "no");
 				exit(EXIT_SUCCESS);
 			case 'r':
 				row = atoi(optarg);
@@ -236,6 +259,12 @@  int main(int argc, char *argv[])
 				if (which_slice >= MAX_SLICES)
 					exit(EXIT_FAILURE);
 				break;
+			case 'i':
+			case 'u':
+				if (!IS_HASWELL(devid)) {
+					fprintf(stderr, "Error injection supported on HSW+ only\n");
+					exit(EXIT_FAILURE);
+				}
 			case 'd':
 				if (optarg) {
 					ret = sscanf(optarg, "%d,%d,%d", &row, &bank, &sbank);
@@ -256,6 +285,23 @@  int main(int argc, char *argv[])
 		}
 	}
 
+	if (action == 'i') {
+		if (((dft >> 1) & 1) != which_slice) {
+			fprintf(stderr, "DFT register already has slice %d enabled, and we don't support multiple slices. Try modifying -w; but sometimes the register sticks in the wrong way\n", (dft >> 1) & 1);
+			exit(EXIT_FAILURE);
+		}
+
+		if (which_slice == -1) {
+			fprintf(stderr, "Cannot inject errors to multiple slices (modify -w)\n");
+			exit(EXIT_FAILURE);
+		}
+		if (dft & 1 && ((dft >> 1) && 1) == which_slice)
+			printf("warning: overwriting existing injections. This is very dangerous.\n");
+	}
+
+	if (action == 'l')
+		decode_dft(dft);
+
 	/* Per slice operations */
 	for_each_slice(i) {
 		switch (action) {
@@ -271,11 +317,30 @@  int main(int argc, char *argv[])
 			case 'd':
 				assert(disable_rbs(row, bank, sbank, i) == 0);
 				break;
+			case 'i':
+				if (bank == 3) {
+					fprintf(stderr, "The hardware does not support error inject on bank 3.\n");
+					exit(EXIT_FAILURE);
+				}
+				dft |= row << 7;
+				dft |= sbank << 4;
+				dft |= bank << 2;
+				assert(i < 2);
+				dft |= i << 1; /* slice */
+				dft |= 1 << 0; /* enable */
+				intel_register_write(0xb038, dft);
+				break;
+			case 'u':
+				intel_register_write(0xb038, dft & ~(1<<0));
+				break;
+			case 'L':
+				break;
 			default:
 				abort();
 		}
 	}
 
+	intel_register_access_fini();
 	if (action == 'l')
 		exit(EXIT_SUCCESS);