diff mbox series

[RFC,v3,08/10] EDAC/amd64: Gather hardware information early

Message ID 20190821235938.118710-9-Yazen.Ghannam@amd.com (mailing list archive)
State New, archived
Headers show
Series AMD64 EDAC fixes | expand

Commit Message

Yazen Ghannam Aug. 22, 2019, midnight UTC
From: Yazen Ghannam <yazen.ghannam@amd.com>

Split out gathering hardware information from init_one_instance() into a
separate function get_hardware_info().

This is necessary so that the information can be cached earlier and used
to check if memory is populated and if ECC is enabled on a node.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 drivers/edac/amd64_edac.c | 76 +++++++++++++++++++++++----------------
 1 file changed, 45 insertions(+), 31 deletions(-)

Comments

Borislav Petkov Aug. 29, 2019, 9:22 a.m. UTC | #1
On Thu, Aug 22, 2019 at 12:00:02AM +0000, Ghannam, Yazen wrote:
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> 
> Split out gathering hardware information from init_one_instance() into a
> separate function get_hardware_info().
> 
> This is necessary so that the information can be cached earlier and used
> to check if memory is populated and if ECC is enabled on a node.
> 
> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> ---
>  drivers/edac/amd64_edac.c | 76 +++++++++++++++++++++++----------------
>  1 file changed, 45 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index 4d1e6daa7ec4..84832771dec0 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -3405,34 +3405,17 @@ static void compute_num_umcs(void)
>  	edac_dbg(1, "Number of UMCs: %x", num_umcs);
>  }
>  
> -static int init_one_instance(unsigned int nid)
> +static int get_hardware_info(struct amd64_pvt *pvt,
> +			     struct amd64_family_type *fam_type)
>  {
> -	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
> -	struct amd64_family_type *fam_type = NULL;
> -	struct mem_ctl_info *mci = NULL;
> -	struct edac_mc_layer layers[2];
> -	struct amd64_pvt *pvt = NULL;
>  	u16 pci_id1, pci_id2;
> -	int err = 0, ret;
> -
> -	ret = -ENOMEM;
> -	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
> -	if (!pvt)
> -		goto err_ret;
> -
> -	pvt->mc_node_id	= nid;
> -	pvt->F3 = F3;
> -
> -	ret = -EINVAL;
> -	fam_type = per_family_init(pvt);
> -	if (!fam_type)
> -		goto err_free;
> +	int ret = -EINVAL;
>  
>  	if (pvt->fam >= 0x17) {
>  		pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);

Yeah, a get_hardware_info() function which does an allocation of that
struct amd64_umc on => F17 which is only 20 bytes. Just add it into the
pvt struct:

struct amd64_pvt {
	...
	struct amd64_umc umc;  /* UMC registers */
};

and be done with it. This should simplify the code flow here a bit and
20 bytes more per pvt is not a big deal.

And I know we do test "if (pvt->umc)" in a bunch of places but that can
be replaced with a "if (pvt->fam >= 0x17)" test which is equivalent.

And that conversion should be a single patch.

>  		if (!pvt->umc) {
>  			ret = -ENOMEM;
> -			goto err_free;
> +			goto err_ret;
>  		}
>  
>  		pci_id1 = fam_type->f0_id;
> @@ -3442,18 +3425,34 @@ static int init_one_instance(unsigned int nid)
>  		pci_id2 = fam_type->f2_id;
>  	}
>  
> -	err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
> -	if (err)
> +	ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
> +	if (ret)
>  		goto err_post_init;
>  
>  	read_mc_regs(pvt);
>  
> +	return 0;
> +
> +err_post_init:
> +	if (pvt->fam >= 0x17)
> +		kfree(pvt->umc);
> +
> +err_ret:
> +	return ret;
> +}
> +
> +static int init_one_instance(struct amd64_pvt *pvt,
> +			     struct amd64_family_type *fam_type)

Yeah, that fam_type can be made global. No need to hand it around in
functions since it is going to be a single struct per system. Do that in
another, separate patch please.

After you've done those things, this patch would become a lot simpler.

Thx.
Yazen Ghannam Sept. 6, 2019, 7:14 p.m. UTC | #2
> -----Original Message-----
> From: linux-kernel-owner@vger.kernel.org <linux-kernel-owner@vger.kernel.org> On Behalf Of Borislav Petkov
> Sent: Thursday, August 29, 2019 4:23 AM
> To: Ghannam, Yazen <Yazen.Ghannam@amd.com>
> Cc: linux-edac@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [RFC PATCH v3 08/10] EDAC/amd64: Gather hardware information early
> 
> On Thu, Aug 22, 2019 at 12:00:02AM +0000, Ghannam, Yazen wrote:
> > From: Yazen Ghannam <yazen.ghannam@amd.com>
> >
> > Split out gathering hardware information from init_one_instance() into a
> > separate function get_hardware_info().
> >
> > This is necessary so that the information can be cached earlier and used
> > to check if memory is populated and if ECC is enabled on a node.
> >
> > Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> > ---
> >  drivers/edac/amd64_edac.c | 76 +++++++++++++++++++++++----------------
> >  1 file changed, 45 insertions(+), 31 deletions(-)
> >
> > diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> > index 4d1e6daa7ec4..84832771dec0 100644
> > --- a/drivers/edac/amd64_edac.c
> > +++ b/drivers/edac/amd64_edac.c
> > @@ -3405,34 +3405,17 @@ static void compute_num_umcs(void)
> >  	edac_dbg(1, "Number of UMCs: %x", num_umcs);
> >  }
> >
> > -static int init_one_instance(unsigned int nid)
> > +static int get_hardware_info(struct amd64_pvt *pvt,
> > +			     struct amd64_family_type *fam_type)
> >  {
> > -	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
> > -	struct amd64_family_type *fam_type = NULL;
> > -	struct mem_ctl_info *mci = NULL;
> > -	struct edac_mc_layer layers[2];
> > -	struct amd64_pvt *pvt = NULL;
> >  	u16 pci_id1, pci_id2;
> > -	int err = 0, ret;
> > -
> > -	ret = -ENOMEM;
> > -	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
> > -	if (!pvt)
> > -		goto err_ret;
> > -
> > -	pvt->mc_node_id	= nid;
> > -	pvt->F3 = F3;
> > -
> > -	ret = -EINVAL;
> > -	fam_type = per_family_init(pvt);
> > -	if (!fam_type)
> > -		goto err_free;
> > +	int ret = -EINVAL;
> >
> >  	if (pvt->fam >= 0x17) {
> >  		pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
> 
> Yeah, a get_hardware_info() function which does an allocation of that
> struct amd64_umc on => F17 which is only 20 bytes. Just add it into the
> pvt struct:
> 
> struct amd64_pvt {
> 	...
> 	struct amd64_umc umc;  /* UMC registers */
> };
> 
> and be done with it. This should simplify the code flow here a bit and
> 20 bytes more per pvt is not a big deal.
> 

This struct is used per channel, so we may have 2-8 per system. We could fix it at the max (8).
What do you think?

Thanks,
Yazen
Borislav Petkov Sept. 6, 2019, 8:35 p.m. UTC | #3
On Fri, Sep 06, 2019 at 07:14:57PM +0000, Ghannam, Yazen wrote:
> This struct is used per channel, so we may have 2-8 per system.

Ah, true.

> We could fix it at the max (8). What do you think?

Anything in struct amd64_umc that is shared between those channels or
all max 8 of them can be distinct?
Yazen Ghannam Sept. 6, 2019, 8:49 p.m. UTC | #4
> -----Original Message-----
> From: linux-edac-owner@vger.kernel.org <linux-edac-owner@vger.kernel.org> On Behalf Of Borislav Petkov
> Sent: Friday, September 6, 2019 3:35 PM
> To: Ghannam, Yazen <Yazen.Ghannam@amd.com>
> Cc: linux-edac@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [RFC PATCH v3 08/10] EDAC/amd64: Gather hardware information early
> 
> On Fri, Sep 06, 2019 at 07:14:57PM +0000, Ghannam, Yazen wrote:
> > This struct is used per channel, so we may have 2-8 per system.
> 
> Ah, true.
> 
> > We could fix it at the max (8). What do you think?
> 
> Anything in struct amd64_umc that is shared between those channels or
> all max 8 of them can be distinct?
> 


All the fields are register values, and there are unique instances for each channel. They can
potentially all be different.

Thanks,
Yazen
Borislav Petkov Sept. 9, 2019, 3:31 p.m. UTC | #5
On Fri, Sep 06, 2019 at 08:49:52PM +0000, Ghannam, Yazen wrote:
> All the fields are register values, and there are unique instances for
> each channel. They can potentially all be different.

Hmm, ok, that's 160 bytes more per node. And on !Rome it is wasted. Oh
well, let's remain conservative then and allocate it only where needed.

Btw, upon a second look, compute_num_umcs() can be made part of
get_hardware_info() instead of having this as a separate function which
gets called only once before probe_one_instance().

Thx.
diff mbox series

Patch

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 4d1e6daa7ec4..84832771dec0 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -3405,34 +3405,17 @@  static void compute_num_umcs(void)
 	edac_dbg(1, "Number of UMCs: %x", num_umcs);
 }
 
-static int init_one_instance(unsigned int nid)
+static int get_hardware_info(struct amd64_pvt *pvt,
+			     struct amd64_family_type *fam_type)
 {
-	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
-	struct amd64_family_type *fam_type = NULL;
-	struct mem_ctl_info *mci = NULL;
-	struct edac_mc_layer layers[2];
-	struct amd64_pvt *pvt = NULL;
 	u16 pci_id1, pci_id2;
-	int err = 0, ret;
-
-	ret = -ENOMEM;
-	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
-	if (!pvt)
-		goto err_ret;
-
-	pvt->mc_node_id	= nid;
-	pvt->F3 = F3;
-
-	ret = -EINVAL;
-	fam_type = per_family_init(pvt);
-	if (!fam_type)
-		goto err_free;
+	int ret = -EINVAL;
 
 	if (pvt->fam >= 0x17) {
 		pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
 		if (!pvt->umc) {
 			ret = -ENOMEM;
-			goto err_free;
+			goto err_ret;
 		}
 
 		pci_id1 = fam_type->f0_id;
@@ -3442,18 +3425,34 @@  static int init_one_instance(unsigned int nid)
 		pci_id2 = fam_type->f2_id;
 	}
 
-	err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
-	if (err)
+	ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
+	if (ret)
 		goto err_post_init;
 
 	read_mc_regs(pvt);
 
+	return 0;
+
+err_post_init:
+	if (pvt->fam >= 0x17)
+		kfree(pvt->umc);
+
+err_ret:
+	return ret;
+}
+
+static int init_one_instance(struct amd64_pvt *pvt,
+			     struct amd64_family_type *fam_type)
+{
+	struct mem_ctl_info *mci = NULL;
+	struct edac_mc_layer layers[2];
+	int ret = -EINVAL;
+
 	/*
 	 * We need to determine how many memory channels there are. Then use
 	 * that information for calculating the size of the dynamic instance
 	 * tables in the 'mci' structure.
 	 */
-	ret = -EINVAL;
 	pvt->channel_count = pvt->ops->early_channel_count(pvt);
 	if (pvt->channel_count < 0)
 		goto err_siblings;
@@ -3478,7 +3477,7 @@  static int init_one_instance(unsigned int nid)
 		layers[1].size = 2;
 	layers[1].is_virt_csrow = false;
 
-	mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
+	mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);
 	if (!mci)
 		goto err_siblings;
 
@@ -3504,20 +3503,17 @@  static int init_one_instance(unsigned int nid)
 err_siblings:
 	free_mc_sibling_devs(pvt);
 
-err_post_init:
 	if (pvt->fam >= 0x17)
 		kfree(pvt->umc);
 
-err_free:
-	kfree(pvt);
-
-err_ret:
 	return ret;
 }
 
 static int probe_one_instance(unsigned int nid)
 {
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+	struct amd64_family_type *fam_type = NULL;
+	struct amd64_pvt *pvt = NULL;
 	struct ecc_settings *s;
 	int ret;
 
@@ -3528,6 +3524,21 @@  static int probe_one_instance(unsigned int nid)
 
 	ecc_stngs[nid] = s;
 
+	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
+	if (!pvt)
+		goto err_settings;
+
+	pvt->mc_node_id	= nid;
+	pvt->F3 = F3;
+
+	fam_type = per_family_init(pvt);
+	if (!fam_type)
+		goto err_enable;
+
+	ret = get_hardware_info(pvt, fam_type);
+	if (ret < 0)
+		goto err_enable;
+
 	if (!ecc_enabled(F3, nid)) {
 		ret = 0;
 
@@ -3544,7 +3555,7 @@  static int probe_one_instance(unsigned int nid)
 			goto err_enable;
 	}
 
-	ret = init_one_instance(nid);
+	ret = init_one_instance(pvt, fam_type);
 	if (ret < 0) {
 		amd64_err("Error probing instance: %d\n", nid);
 
@@ -3557,6 +3568,9 @@  static int probe_one_instance(unsigned int nid)
 	return ret;
 
 err_enable:
+	kfree(pvt);
+
+err_settings:
 	kfree(s);
 	ecc_stngs[nid] = NULL;