diff mbox

drm/radeon/kms: optimize CS state checking for r100->r500

Message ID 1297534895-1964-1-git-send-email-maraeo@gmail.com (mailing list archive)
State Accepted
Headers show

Commit Message

Marek Olšák Feb. 12, 2011, 6:21 p.m. UTC
None
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 5f15820..fdf4bc6 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1427,6 +1427,7 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 		}
 		track->zb.robj = reloc->robj;
 		track->zb.offset = idx_value;
+		track->zb_dirty = true;
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		break;
 	case RADEON_RB3D_COLOROFFSET:
@@ -1439,6 +1440,7 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 		}
 		track->cb[0].robj = reloc->robj;
 		track->cb[0].offset = idx_value;
+		track->cb_dirty = true;
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		break;
 	case RADEON_PP_TXOFFSET_0:
@@ -1454,6 +1456,7 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 		}
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		track->textures[i].robj = reloc->robj;
+		track->tex_dirty = true;
 		break;
 	case RADEON_PP_CUBIC_OFFSET_T0_0:
 	case RADEON_PP_CUBIC_OFFSET_T0_1:
@@ -1471,6 +1474,7 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 		track->textures[0].cube_info[i].offset = idx_value;
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		track->textures[0].cube_info[i].robj = reloc->robj;
+		track->tex_dirty = true;
 		break;
 	case RADEON_PP_CUBIC_OFFSET_T1_0:
 	case RADEON_PP_CUBIC_OFFSET_T1_1:
@@ -1488,6 +1492,7 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 		track->textures[1].cube_info[i].offset = idx_value;
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		track->textures[1].cube_info[i].robj = reloc->robj;
+		track->tex_dirty = true;
 		break;
 	case RADEON_PP_CUBIC_OFFSET_T2_0:
 	case RADEON_PP_CUBIC_OFFSET_T2_1:
@@ -1505,9 +1510,12 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 		track->textures[2].cube_info[i].offset = idx_value;
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		track->textures[2].cube_info[i].robj = reloc->robj;
+		track->tex_dirty = true;
 		break;
 	case RADEON_RE_WIDTH_HEIGHT:
 		track->maxy = ((idx_value >> 16) & 0x7FF);
+		track->cb_dirty = true;
+		track->zb_dirty = true;
 		break;
 	case RADEON_RB3D_COLORPITCH:
 		r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1528,9 +1536,11 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 		ib[idx] = tmp;
 
 		track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
+		track->cb_dirty = true;
 		break;
 	case RADEON_RB3D_DEPTHPITCH:
 		track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
+		track->zb_dirty = true;
 		break;
 	case RADEON_RB3D_CNTL:
 		switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
@@ -1555,6 +1565,8 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}
 		track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
+		track->cb_dirty = true;
+		track->zb_dirty = true;
 		break;
 	case RADEON_RB3D_ZSTENCILCNTL:
 		switch (idx_value & 0xf) {
@@ -1572,6 +1584,7 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 		default:
 			break;
 		}
+		track->zb_dirty = true;
 		break;
 	case RADEON_RB3D_ZPASS_ADDR:
 		r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1588,6 +1601,7 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 			uint32_t temp = idx_value >> 4;
 			for (i = 0; i < track->num_texture; i++)
 				track->textures[i].enabled = !!(temp & (1 << i));
+			track->tex_dirty = true;
 		}
 		break;
 	case RADEON_SE_VF_CNTL:
@@ -1602,12 +1616,14 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 		i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
 		track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
 		track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
+		track->tex_dirty = true;
 		break;
 	case RADEON_PP_TEX_PITCH_0:
 	case RADEON_PP_TEX_PITCH_1:
 	case RADEON_PP_TEX_PITCH_2:
 		i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
 		track->textures[i].pitch = idx_value + 32;
+		track->tex_dirty = true;
 		break;
 	case RADEON_PP_TXFILTER_0:
 	case RADEON_PP_TXFILTER_1:
@@ -1621,6 +1637,7 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 		tmp = (idx_value >> 27) & 0x7;
 		if (tmp == 2 || tmp == 6)
 			track->textures[i].roundup_h = false;
+		track->tex_dirty = true;
 		break;
 	case RADEON_PP_TXFORMAT_0:
 	case RADEON_PP_TXFORMAT_1:
@@ -1673,6 +1690,7 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 		}
 		track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
 		track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
+		track->tex_dirty = true;
 		break;
 	case RADEON_PP_CUBIC_FACES_0:
 	case RADEON_PP_CUBIC_FACES_1:
@@ -1683,6 +1701,7 @@  static int r100_packet0_check(struct radeon_cs_parser *p,
 			track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
 			track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
 		}
+		track->tex_dirty = true;
 		break;
 	default:
 		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
@@ -3318,9 +3337,9 @@  int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
 	unsigned long size;
 	unsigned prim_walk;
 	unsigned nverts;
-	unsigned num_cb = track->num_cb;
+	unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
 
-	if (!track->zb_cb_clear && !track->color_channel_mask &&
+	if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
 	    !track->blend_read_enable)
 		num_cb = 0;
 
@@ -3341,7 +3360,9 @@  int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
 			return -EINVAL;
 		}
 	}
-	if (track->z_enabled) {
+	track->cb_dirty = false;
+
+	if (track->zb_dirty && track->z_enabled) {
 		if (track->zb.robj == NULL) {
 			DRM_ERROR("[drm] No buffer for z buffer !\n");
 			return -EINVAL;
@@ -3358,6 +3379,8 @@  int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
 			return -EINVAL;
 		}
 	}
+	track->zb_dirty = false;
+
 	prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
 	if (track->vap_vf_cntl & (1 << 14)) {
 		nverts = track->vap_alt_nverts;
@@ -3417,13 +3440,22 @@  int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
 			  prim_walk);
 		return -EINVAL;
 	}
-	return r100_cs_track_texture_check(rdev, track);
+
+	if (track->tex_dirty) {
+		track->tex_dirty = false;
+		return r100_cs_track_texture_check(rdev, track);
+	}
+	return 0;
 }
 
 void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
 {
 	unsigned i, face;
 
+	track->cb_dirty = true;
+	track->zb_dirty = true;
+	track->tex_dirty = true;
+
 	if (rdev->family < CHIP_R300) {
 		track->num_cb = 1;
 		if (rdev->family <= CHIP_RS200)
diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h
index af65600..ee85c4a 100644
--- a/drivers/gpu/drm/radeon/r100_track.h
+++ b/drivers/gpu/drm/radeon/r100_track.h
@@ -52,14 +52,7 @@  struct r100_cs_track_texture {
 	unsigned                compress_format;
 };
 
-struct r100_cs_track_limits {
-	unsigned num_cb;
-	unsigned num_texture;
-	unsigned max_levels;
-};
-
 struct r100_cs_track {
-	struct radeon_device *rdev;
 	unsigned			num_cb;
 	unsigned                        num_texture;
 	unsigned			maxy;
@@ -78,6 +71,10 @@  struct r100_cs_track {
 	bool                            separate_cube;
 	bool				zb_cb_clear;
 	bool				blend_read_enable;
+
+	bool				cb_dirty;
+	bool				zb_dirty;
+	bool				tex_dirty;
 };
 
 int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track);
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index d2408c3..f240583 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -184,6 +184,7 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 		}
 		track->zb.robj = reloc->robj;
 		track->zb.offset = idx_value;
+		track->zb_dirty = true;
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		break;
 	case RADEON_RB3D_COLOROFFSET:
@@ -196,6 +197,7 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 		}
 		track->cb[0].robj = reloc->robj;
 		track->cb[0].offset = idx_value;
+		track->cb_dirty = true;
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		break;
 	case R200_PP_TXOFFSET_0:
@@ -214,6 +216,7 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 		}
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		track->textures[i].robj = reloc->robj;
+		track->tex_dirty = true;
 		break;
 	case R200_PP_CUBIC_OFFSET_F1_0:
 	case R200_PP_CUBIC_OFFSET_F2_0:
@@ -257,9 +260,12 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 		track->textures[i].cube_info[face - 1].offset = idx_value;
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		track->textures[i].cube_info[face - 1].robj = reloc->robj;
+		track->tex_dirty = true;
 		break;
 	case RADEON_RE_WIDTH_HEIGHT:
 		track->maxy = ((idx_value >> 16) & 0x7FF);
+		track->cb_dirty = true;
+		track->zb_dirty = true;
 		break;
 	case RADEON_RB3D_COLORPITCH:
 		r = r100_cs_packet_next_reloc(p, &reloc);
@@ -280,9 +286,11 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 		ib[idx] = tmp;
 
 		track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
+		track->cb_dirty = true;
 		break;
 	case RADEON_RB3D_DEPTHPITCH:
 		track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
+		track->zb_dirty = true;
 		break;
 	case RADEON_RB3D_CNTL:
 		switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
@@ -312,6 +320,8 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 		}
 
 		track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
+		track->cb_dirty = true;
+		track->zb_dirty = true;
 		break;
 	case RADEON_RB3D_ZSTENCILCNTL:
 		switch (idx_value & 0xf) {
@@ -329,6 +339,7 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 		default:
 			break;
 		}
+		track->zb_dirty = true;
 		break;
 	case RADEON_RB3D_ZPASS_ADDR:
 		r = r100_cs_packet_next_reloc(p, &reloc);
@@ -345,6 +356,7 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 			uint32_t temp = idx_value >> 4;
 			for (i = 0; i < track->num_texture; i++)
 				track->textures[i].enabled = !!(temp & (1 << i));
+			track->tex_dirty = true;
 		}
 		break;
 	case RADEON_SE_VF_CNTL:
@@ -369,6 +381,7 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 		i = (reg - R200_PP_TXSIZE_0) / 32;
 		track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
 		track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
+		track->tex_dirty = true;
 		break;
 	case R200_PP_TXPITCH_0:
 	case R200_PP_TXPITCH_1:
@@ -378,6 +391,7 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 	case R200_PP_TXPITCH_5:
 		i = (reg - R200_PP_TXPITCH_0) / 32;
 		track->textures[i].pitch = idx_value + 32;
+		track->tex_dirty = true;
 		break;
 	case R200_PP_TXFILTER_0:
 	case R200_PP_TXFILTER_1:
@@ -394,6 +408,7 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 		tmp = (idx_value >> 27) & 0x7;
 		if (tmp == 2 || tmp == 6)
 			track->textures[i].roundup_h = false;
+		track->tex_dirty = true;
 		break;
 	case R200_PP_TXMULTI_CTL_0:
 	case R200_PP_TXMULTI_CTL_1:
@@ -432,6 +447,7 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 			track->textures[i].tex_coord_type = 1;
 			break;
 		}
+		track->tex_dirty = true;
 		break;
 	case R200_PP_TXFORMAT_0:
 	case R200_PP_TXFORMAT_1:
@@ -488,6 +504,7 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 		}
 		track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
 		track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
+		track->tex_dirty = true;
 		break;
 	case R200_PP_CUBIC_FACES_0:
 	case R200_PP_CUBIC_FACES_1:
@@ -501,6 +518,7 @@  int r200_packet0_check(struct radeon_cs_parser *p,
 			track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
 			track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
 		}
+		track->tex_dirty = true;
 		break;
 	default:
 		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 55fe5ba..15f9464 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -667,6 +667,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 		}
 		track->cb[i].robj = reloc->robj;
 		track->cb[i].offset = idx_value;
+		track->cb_dirty = true;
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		break;
 	case R300_ZB_DEPTHOFFSET:
@@ -679,6 +680,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 		}
 		track->zb.robj = reloc->robj;
 		track->zb.offset = idx_value;
+		track->zb_dirty = true;
 		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
 		break;
 	case R300_TX_OFFSET_0:
@@ -717,6 +719,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 		tmp |= tile_flags;
 		ib[idx] = tmp;
 		track->textures[i].robj = reloc->robj;
+		track->tex_dirty = true;
 		break;
 	/* Tracked registers */
 	case 0x2084:
@@ -743,6 +746,8 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 		if (p->rdev->family < CHIP_RV515) {
 			track->maxy -= 1440;
 		}
+		track->cb_dirty = true;
+		track->zb_dirty = true;
 		break;
 	case 0x4E00:
 		/* RB3D_CCTL */
@@ -752,6 +757,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}
 		track->num_cb = ((idx_value >> 5) & 0x3) + 1;
+		track->cb_dirty = true;
 		break;
 	case 0x4E38:
 	case 0x4E3C:
@@ -814,6 +820,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 				  ((idx_value >> 21) & 0xF));
 			return -EINVAL;
 		}
+		track->cb_dirty = true;
 		break;
 	case 0x4F00:
 		/* ZB_CNTL */
@@ -822,6 +829,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 		} else {
 			track->z_enabled = false;
 		}
+		track->zb_dirty = true;
 		break;
 	case 0x4F10:
 		/* ZB_FORMAT */
@@ -838,6 +846,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 				  (idx_value & 0xF));
 			return -EINVAL;
 		}
+		track->zb_dirty = true;
 		break;
 	case 0x4F24:
 		/* ZB_DEPTHPITCH */
@@ -861,6 +870,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 		ib[idx] = tmp;
 
 		track->zb.pitch = idx_value & 0x3FFC;
+		track->zb_dirty = true;
 		break;
 	case 0x4104:
 		for (i = 0; i < 16; i++) {
@@ -869,6 +879,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 			enabled = !!(idx_value & (1 << i));
 			track->textures[i].enabled = enabled;
 		}
+		track->tex_dirty = true;
 		break;
 	case 0x44C0:
 	case 0x44C4:
@@ -951,8 +962,8 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 			DRM_ERROR("Invalid texture format %u\n",
 				  (idx_value & 0x1F));
 			return -EINVAL;
-			break;
 		}
+		track->tex_dirty = true;
 		break;
 	case 0x4400:
 	case 0x4404:
@@ -980,6 +991,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 		if (tmp == 2 || tmp == 4 || tmp == 6) {
 			track->textures[i].roundup_h = false;
 		}
+		track->tex_dirty = true;
 		break;
 	case 0x4500:
 	case 0x4504:
@@ -1017,6 +1029,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 			DRM_ERROR("Forbidden bit TXFORMAT_MSB\n");
 			return -EINVAL;
 		}
+		track->tex_dirty = true;
 		break;
 	case 0x4480:
 	case 0x4484:
@@ -1046,6 +1059,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 		track->textures[i].use_pitch = !!tmp;
 		tmp = (idx_value >> 22) & 0xF;
 		track->textures[i].txdepth = tmp;
+		track->tex_dirty = true;
 		break;
 	case R300_ZB_ZPASS_ADDR:
 		r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1060,6 +1074,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 	case 0x4e0c:
 		/* RB3D_COLOR_CHANNEL_MASK */
 		track->color_channel_mask = idx_value;
+		track->cb_dirty = true;
 		break;
 	case 0x43a4:
 		/* SC_HYPERZ_EN */
@@ -1073,6 +1088,8 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 	case 0x4f1c:
 		/* ZB_BW_CNTL */
 		track->zb_cb_clear = !!(idx_value & (1 << 5));
+		track->cb_dirty = true;
+		track->zb_dirty = true;
 		if (p->rdev->hyperz_filp != p->filp) {
 			if (idx_value & (R300_HIZ_ENABLE |
 					 R300_RD_COMP_ENABLE |
@@ -1084,6 +1101,7 @@  static int r300_packet0_check(struct radeon_cs_parser *p,
 	case 0x4e04:
 		/* RB3D_BLENDCNTL */
 		track->blend_read_enable = !!(idx_value & (1 << 2));
+		track->cb_dirty = true;
 		break;
 	case 0x4f28: /* ZB_DEPTHCLEARVALUE */
 		break;