- try again: pull in 7.4 fixes, dri configs changes, new radeon-rewritemesa-7_3-9_fc11

author: Dave Airlie <airlied@fedoraproject.org> 2009-03-04 07:00:36 +0000
committer: Dave Airlie <airlied@fedoraproject.org> 2009-03-04 07:00:36 +0000
commit: 16d2d54dc632d4106d24c81ac8c89ecaa4550b11 (patch)
tree: 15c183204f198ab1d5800489fafda62ece7f7d6f
parent: a5cda00fd401899ec0d38e3519a8f1ad9cd3e1d3 (diff)
download: mesa-16d2d54dc632d4106d24c81ac8c89ecaa4550b11.tar.gz
mesa-16d2d54dc632d4106d24c81ac8c89ecaa4550b11.tar.xz
mesa-16d2d54dc632d4106d24c81ac8c89ecaa4550b11.zip
4 files changed, 3585 insertions, 28351 deletions
diff --git a/mesa-7.3-dri-configs-fixes.patch b/mesa-7.3-dri-configs-fixes.patch
new file mode 100644
index 0000000..351bb5f
--- /dev/null
+++ b/mesa-7.3-dri-configs-fixes.patch
@@ -0,0 +1,1001 @@
+diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c
+index 30c860b..fb2efbb 100644
+--- a/src/mesa/drivers/dri/common/utils.c
++++ b/src/mesa/drivers/dri/common/utils.c
+@@ -523,7 +523,8 @@ __DRIconfig **
+ driCreateConfigs(GLenum fb_format, GLenum fb_type,
+ 		 const uint8_t * depth_bits, const uint8_t * stencil_bits,
+ 		 unsigned num_depth_stencil_bits,
+-		 const GLenum * db_modes, unsigned num_db_modes)
++		 const GLenum * db_modes, unsigned num_db_modes,
++	 	 const u_int8_t * msaa_samples, unsigned num_msaa_modes)
+ {
+    static const uint8_t bits_table[4][4] = {
+      /* R  G  B  A */
+@@ -583,9 +584,7 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type,
+    int index;
+    __DRIconfig **configs, **c;
+    __GLcontextModes *modes;
+-   unsigned i;
+-   unsigned j;
+-   unsigned k;
++   unsigned i, j, k, h;
+    unsigned num_modes;
+    unsigned num_accum_bits = 2;
+ 
+@@ -666,66 +665,74 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type,
+     c = configs;
+     for ( k = 0 ; k < num_depth_stencil_bits ; k++ ) {
+ 	for ( i = 0 ; i < num_db_modes ; i++ ) {
+-	    for ( j = 0 ; j < num_accum_bits ; j++ ) {
+-		*c = _mesa_malloc (sizeof **c);
+-		modes = &(*c)->modes;
+-		c++;
+-
+-		memset(modes, 0, sizeof *modes);
+-		modes->redBits   = bits[0];
+-		modes->greenBits = bits[1];
+-		modes->blueBits  = bits[2];
+-		modes->alphaBits = bits[3];
+-		modes->redMask   = masks[0];
+-		modes->greenMask = masks[1];
+-		modes->blueMask  = masks[2];
+-		modes->alphaMask = masks[3];
+-		modes->rgbBits   = modes->redBits + modes->greenBits
+-		    + modes->blueBits + modes->alphaBits;
+-
+-		modes->accumRedBits   = 16 * j;
+-		modes->accumGreenBits = 16 * j;
+-		modes->accumBlueBits  = 16 * j;
+-		modes->accumAlphaBits = (masks[3] != 0) ? 16 * j : 0;
+-		modes->visualRating = (j == 0) ? GLX_NONE : GLX_SLOW_CONFIG;
+-
+-		modes->stencilBits = stencil_bits[k];
+-		modes->depthBits = depth_bits[k];
+-
+-		modes->transparentPixel = GLX_NONE;
+-		modes->transparentRed = GLX_DONT_CARE;
+-		modes->transparentGreen = GLX_DONT_CARE;
+-		modes->transparentBlue = GLX_DONT_CARE;
+-		modes->transparentAlpha = GLX_DONT_CARE;
+-		modes->transparentIndex = GLX_DONT_CARE;
+-		modes->visualType = GLX_DONT_CARE;
+-		modes->renderType = GLX_RGBA_BIT;
+-		modes->drawableType = GLX_WINDOW_BIT;
+-		modes->rgbMode = GL_TRUE;
+-
+-		if ( db_modes[i] == GLX_NONE ) {
+-		    modes->doubleBufferMode = GL_FALSE;
+-		}
+-		else {
+-		    modes->doubleBufferMode = GL_TRUE;
+-		    modes->swapMethod = db_modes[i];
+-		}
+-
+-		modes->haveAccumBuffer = ((modes->accumRedBits +
++	    for ( h = 0 ; h < num_msaa_modes; h++ ) {
++	    	for ( j = 0 ; j < num_accum_bits ; j++ ) {
++		    *c = _mesa_malloc (sizeof **c);
++		    modes = &(*c)->modes;
++		    c++;
++
++		    memset(modes, 0, sizeof *modes);
++		    modes->redBits   = bits[0];
++		    modes->greenBits = bits[1];
++		    modes->blueBits  = bits[2];
++		    modes->alphaBits = bits[3];
++		    modes->redMask   = masks[0];
++		    modes->greenMask = masks[1];
++		    modes->blueMask  = masks[2];
++		    modes->alphaMask = masks[3];
++		    modes->rgbBits   = modes->redBits + modes->greenBits
++		    	+ modes->blueBits + modes->alphaBits;
++
++		    modes->accumRedBits   = 16 * j;
++		    modes->accumGreenBits = 16 * j;
++		    modes->accumBlueBits  = 16 * j;
++		    modes->accumAlphaBits = (masks[3] != 0) ? 16 * j : 0;
++		    modes->visualRating = (j == 0) ? GLX_NONE : GLX_SLOW_CONFIG;
++
++		    modes->stencilBits = stencil_bits[k];
++		    modes->depthBits = depth_bits[k];
++
++		    modes->transparentPixel = GLX_NONE;
++		    modes->transparentRed = GLX_DONT_CARE;
++		    modes->transparentGreen = GLX_DONT_CARE;
++		    modes->transparentBlue = GLX_DONT_CARE;
++		    modes->transparentAlpha = GLX_DONT_CARE;
++		    modes->transparentIndex = GLX_DONT_CARE;
++		    modes->visualType = GLX_DONT_CARE;
++		    modes->renderType = GLX_RGBA_BIT;
++		    modes->drawableType = GLX_WINDOW_BIT;
++		    modes->rgbMode = GL_TRUE;
++
++		    if ( db_modes[i] == GLX_NONE ) {
++		    	modes->doubleBufferMode = GL_FALSE;
++		    }
++		    else {
++		    	modes->doubleBufferMode = GL_TRUE;
++		    	modes->swapMethod = db_modes[i];
++		    }
++
++		    modes->samples = msaa_samples[h];
++		    modes->sampleBuffers = modes->samples ? 1 : 0;
++
++
++		    modes->haveAccumBuffer = ((modes->accumRedBits +
+ 					   modes->accumGreenBits +
+ 					   modes->accumBlueBits +
+ 					   modes->accumAlphaBits) > 0);
+-		modes->haveDepthBuffer = (modes->depthBits > 0);
+-		modes->haveStencilBuffer = (modes->stencilBits > 0);
+-
+-		modes->bindToTextureRgb = GL_TRUE;
+-		modes->bindToTextureRgba = GL_TRUE;
+-		modes->bindToMipmapTexture = GL_FALSE;
+-		modes->bindToTextureTargets = modes->rgbMode ?
+-		    __DRI_ATTRIB_TEXTURE_1D_BIT |
+-		    __DRI_ATTRIB_TEXTURE_2D_BIT |
+-		    __DRI_ATTRIB_TEXTURE_RECTANGLE_BIT :
+-		    0;
++		    modes->haveDepthBuffer = (modes->depthBits > 0);
++		    modes->haveStencilBuffer = (modes->stencilBits > 0);
++
++		    modes->bindToTextureRgb = GL_TRUE;
++		    modes->bindToTextureRgba = GL_TRUE;
++		    modes->bindToMipmapTexture = GL_FALSE;
++		    modes->bindToTextureTargets = modes->rgbMode ?
++		    	__DRI_ATTRIB_TEXTURE_1D_BIT |
++		    	__DRI_ATTRIB_TEXTURE_2D_BIT |
++		    	__DRI_ATTRIB_TEXTURE_RECTANGLE_BIT :
++		    	0;
++
++		    modes = modes->next;
++		}
+ 	    }
+ 	}
+     }
+@@ -734,9 +741,10 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type,
+     return configs;
+ }
+ 
+-const __DRIconfig **driConcatConfigs(__DRIconfig **a, __DRIconfig **b)
++__DRIconfig **driConcatConfigs(__DRIconfig **a,
++			       __DRIconfig **b)
+ {
+-    const __DRIconfig **all;
++    __DRIconfig **all;
+     int i, j, index;
+ 
+     i = 0;
+diff --git a/src/mesa/drivers/dri/common/utils.h b/src/mesa/drivers/dri/common/utils.h
+index 0c974db..9e9e5bc 100644
+--- a/src/mesa/drivers/dri/common/utils.h
++++ b/src/mesa/drivers/dri/common/utils.h
+@@ -131,9 +131,11 @@ extern __DRIconfig **
+ driCreateConfigs(GLenum fb_format, GLenum fb_type,
+ 		 const uint8_t * depth_bits, const uint8_t * stencil_bits,
+ 		 unsigned num_depth_stencil_bits,
+-		 const GLenum * db_modes, unsigned num_db_modes);
++		 const GLenum * db_modes, unsigned num_db_modes,
++    		 const uint8_t * msaa_samples, unsigned num_msaa_modes);
+ 
+-const __DRIconfig **driConcatConfigs(__DRIconfig **a, __DRIconfig **b);
++__DRIconfig **driConcatConfigs(__DRIconfig **a,
++			       __DRIconfig **b);
+ 
+ int
+ driGetConfigAttrib(const __DRIconfig *config,
+diff --git a/src/mesa/drivers/dri/ffb/ffb_xmesa.c b/src/mesa/drivers/dri/ffb/ffb_xmesa.c
+index 679f856..b1bb010 100644
+--- a/src/mesa/drivers/dri/ffb/ffb_xmesa.c
++++ b/src/mesa/drivers/dri/ffb/ffb_xmesa.c
+@@ -626,6 +626,7 @@ ffbFillInModes( __DRIscreenPrivate *psp,
+ 
+    uint8_t depth_bits_array[3];
+    uint8_t stencil_bits_array[3];
++   uint8_t msaa_samples_array[1];
+ 
+    depth_bits_array[0] = 0;
+    depth_bits_array[1] = depth_bits;
+@@ -639,6 +640,8 @@ ffbFillInModes( __DRIscreenPrivate *psp,
+    stencil_bits_array[1] = 0;
+    stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
+ 
++   msaa_samples_array[0] = 0;
++
+    depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
+    back_buffer_factor  = (have_back_buffer) ? 3 : 1;
+ 
+@@ -654,9 +657,10 @@ ffbFillInModes( __DRIscreenPrivate *psp,
+    configs = driCreateConfigs(fb_format, fb_type,
+ 			      depth_bits_array, stencil_bits_array,
+ 			      depth_buffer_factor, back_buffer_modes,
+-			      back_buffer_factor);
++			      back_buffer_factor,
++                               msaa_samples_array, 1);
+    if (configs == NULL) {
+-    fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
++      fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+               __LINE__);
+       return NULL;
+    }
+diff --git a/src/mesa/drivers/dri/i810/i810screen.c b/src/mesa/drivers/dri/i810/i810screen.c
+index 48603f5..9a5a39c 100644
+--- a/src/mesa/drivers/dri/i810/i810screen.c
++++ b/src/mesa/drivers/dri/i810/i810screen.c
+@@ -77,6 +77,7 @@ i810FillInModes( __DRIscreenPrivate *psp,
+ 
+     uint8_t depth_bits_array[2];
+     uint8_t stencil_bits_array[2];
++    uint8_t msaa_samples_array[1];
+ 
+     depth_bits_array[0] = depth_bits;
+     depth_bits_array[1] = depth_bits;
+@@ -88,13 +89,16 @@ i810FillInModes( __DRIscreenPrivate *psp,
+     stencil_bits_array[0] = 0;
+     stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+ 
++    msaa_samples_array[0] = 0;
++
+     depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
+     back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+ 
+     configs = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
+ 			       depth_bits_array, stencil_bits_array,
+ 			       depth_buffer_factor,
+-			       back_buffer_modes, back_buffer_factor);
++			       back_buffer_modes, back_buffer_factor,
++                               msaa_samples_array, 1);
+     if (configs == NULL) {
+ 	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+ 		 __func__, __LINE__ );
+diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
+index 7042c25..52a28b3 100644
+--- a/src/mesa/drivers/dri/intel/intel_screen.c
++++ b/src/mesa/drivers/dri/intel/intel_screen.c
+@@ -467,8 +467,6 @@ intelFillInModes(__DRIscreenPrivate *psp,
+    __GLcontextModes *m;
+    unsigned depth_buffer_factor;
+    unsigned back_buffer_factor;
+-   GLenum fb_format;
+-   GLenum fb_type;
+    int i;
+ 
+    /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
+@@ -480,6 +478,7 @@ intelFillInModes(__DRIscreenPrivate *psp,
+ 
+    uint8_t depth_bits_array[3];
+    uint8_t stencil_bits_array[3];
++   uint8_t msaa_samples_array[1];
+ 
+    depth_bits_array[0] = 0;
+    depth_bits_array[1] = depth_bits;
+@@ -496,22 +495,39 @@ intelFillInModes(__DRIscreenPrivate *psp,
+ 
+    stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
+ 
++   msaa_samples_array[0] = 0;
++
+    depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
+    back_buffer_factor = (have_back_buffer) ? 3 : 1;
+ 
+    if (pixel_bits == 16) {
+-      fb_format = GL_RGB;
+-      fb_type = GL_UNSIGNED_SHORT_5_6_5;
++      configs = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
++				 depth_bits_array, stencil_bits_array,
++				 depth_buffer_factor, back_buffer_modes,
++				 back_buffer_factor,
++				 msaa_samples_array, 1);
+    }
+    else {
+-      fb_format = GL_BGRA;
+-      fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
++      __DRIconfig **configs_a8r8g8b8;
++      __DRIconfig **configs_x8r8g8b8;
++
++      configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
++					  depth_bits_array,
++					  stencil_bits_array,
++					  depth_buffer_factor,
++					  back_buffer_modes,
++					  back_buffer_factor,
++					  msaa_samples_array, 1);
++      configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV,
++					  depth_bits_array,
++					  stencil_bits_array,
++					  depth_buffer_factor,
++					  back_buffer_modes,
++					  back_buffer_factor,
++					  msaa_samples_array, 1);
++      configs = driConcatConfigs(configs_a8r8g8b8, configs_x8r8g8b8);
+    }
+ 
+-   configs = driCreateConfigs(fb_format, fb_type,
+-			      depth_bits_array, stencil_bits_array,
+-			      depth_buffer_factor, back_buffer_modes,
+-			      back_buffer_factor);
+    if (configs == NULL) {
+     fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+               __LINE__);
+@@ -673,6 +689,17 @@ static const
+ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp)
+ {
+    intelScreenPrivate *intelScreen;
++   GLenum fb_format[3];
++   GLenum fb_type[3];
++   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
++    * support pageflipping at all.
++    */
++   static const GLenum back_buffer_modes[] = {
++      GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
++   };
++   uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1];
++   int color;
++   __DRIconfig **configs = NULL;
+ 
+    /* Calling driInitExtensions here, with a NULL context pointer,
+     * does not actually enable the extensions.  It just makes sure
+@@ -712,8 +739,50 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp)
+    intelScreen->irq_active = 1;
+    psp->extensions = intelScreenExtensions;
+ 
+-   return driConcatConfigs(intelFillInModes(psp, 16, 16, 0, 1),
+-			   intelFillInModes(psp, 32, 24, 8, 1));
++   depth_bits[0] = 0;
++   stencil_bits[0] = 0;
++   depth_bits[1] = 16;
++   stencil_bits[1] = 0;
++   depth_bits[2] = 24;
++   stencil_bits[2] = 0;
++   depth_bits[3] = 24;
++   stencil_bits[3] = 8;
++
++   msaa_samples_array[0] = 0;
++
++   fb_format[0] = GL_RGB;
++   fb_type[0] = GL_UNSIGNED_SHORT_5_6_5;
++
++   fb_format[1] = GL_BGR;
++   fb_type[1] = GL_UNSIGNED_INT_8_8_8_8_REV;
++
++   fb_format[2] = GL_BGRA;
++   fb_type[2] = GL_UNSIGNED_INT_8_8_8_8_REV;
++
++   for (color = 0; color < ARRAY_SIZE(fb_format); color++) {
++      __DRIconfig **new_configs;
++
++      new_configs = driCreateConfigs(fb_format[color], fb_type[color],
++				     depth_bits,
++				     stencil_bits,
++				     ARRAY_SIZE(depth_bits),
++				     back_buffer_modes,
++				     ARRAY_SIZE(back_buffer_modes),
++				     msaa_samples_array,
++				     ARRAY_SIZE(msaa_samples_array));
++      if (configs == NULL)
++	 configs = new_configs;
++      else
++	 configs = driConcatConfigs(configs, new_configs);
++   }
++
++   if (configs == NULL) {
++      fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
++              __LINE__);
++      return NULL;
++   }
++
++   return (const __DRIconfig **)configs;
+ }
+ 
+ const struct __DriverAPIRec driDriverAPI = {
+diff --git a/src/mesa/drivers/dri/mach64/mach64_screen.c b/src/mesa/drivers/dri/mach64/mach64_screen.c
+index 6bfb4c3..43e5959 100644
+--- a/src/mesa/drivers/dri/mach64/mach64_screen.c
++++ b/src/mesa/drivers/dri/mach64/mach64_screen.c
+@@ -93,6 +93,7 @@ mach64FillInModes( __DRIscreenPrivate *psp,
+ 
+     uint8_t depth_bits_array[2];
+     uint8_t stencil_bits_array[2];
++    uint8_t msaa_samples_array[1];
+ 
+     depth_bits_array[0] = depth_bits;
+     depth_bits_array[1] = depth_bits;
+@@ -104,6 +105,8 @@ mach64FillInModes( __DRIscreenPrivate *psp,
+     stencil_bits_array[0] = 0;
+     stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+ 
++    msaa_samples_array[0] = 0;
++
+     depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
+     back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+ 
+@@ -119,7 +122,8 @@ mach64FillInModes( __DRIscreenPrivate *psp,
+     configs = driCreateConfigs(fb_format, fb_type,
+ 			       depth_bits_array, stencil_bits_array,
+ 			       depth_buffer_factor, back_buffer_modes,
+-			       back_buffer_factor);
++			       back_buffer_factor,
++                               msaa_samples_array, 1);
+     if (configs == NULL) {
+        fprintf(stderr, "[%s:%u] Error creating FBConfig!\n",
+ 	       __func__, __LINE__);
+diff --git a/src/mesa/drivers/dri/mga/mga_xmesa.c b/src/mesa/drivers/dri/mga/mga_xmesa.c
+index 86da3a2..af706b3 100644
+--- a/src/mesa/drivers/dri/mga/mga_xmesa.c
++++ b/src/mesa/drivers/dri/mga/mga_xmesa.c
+@@ -133,6 +133,7 @@ mgaFillInModes( __DRIscreenPrivate *psp,
+ 
+     uint8_t depth_bits_array[3];
+     uint8_t stencil_bits_array[3];
++    uint8_t msaa_samples_array[1];
+ 
+ 
+     depth_bits_array[0] = 0;
+@@ -147,6 +148,8 @@ mgaFillInModes( __DRIscreenPrivate *psp,
+     stencil_bits_array[1] = 0;
+     stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
+ 
++    msaa_samples_array[0] = 0;
++
+     depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
+     back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+ 
+@@ -162,7 +165,8 @@ mgaFillInModes( __DRIscreenPrivate *psp,
+     configs = driCreateConfigs(fb_format, fb_type,
+ 			       depth_bits_array, stencil_bits_array,
+ 			       depth_buffer_factor,
+-			       back_buffer_modes, back_buffer_factor);
++			       back_buffer_modes, back_buffer_factor,
++                               msaa_samples_array, 1);
+     if (configs == NULL) {
+ 	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+ 		 __func__, __LINE__ );
+diff --git a/src/mesa/drivers/dri/r128/r128_screen.c b/src/mesa/drivers/dri/r128/r128_screen.c
+index cb3a147..7cda4ca 100644
+--- a/src/mesa/drivers/dri/r128/r128_screen.c
++++ b/src/mesa/drivers/dri/r128/r128_screen.c
+@@ -422,7 +422,7 @@ r128FillInModes( __DRIscreenPrivate *psp,
+ 
+     uint8_t depth_bits_array[2];
+     uint8_t stencil_bits_array[2];
+-
++    uint8_t msaa_samples_array[1];
+ 
+     depth_bits_array[0] = depth_bits;
+     depth_bits_array[1] = depth_bits;
+@@ -434,6 +434,8 @@ r128FillInModes( __DRIscreenPrivate *psp,
+     stencil_bits_array[0] = 0;
+     stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+ 
++    msaa_samples_array[0] = 0;
++
+     depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
+     back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+ 
+@@ -446,26 +448,27 @@ r128FillInModes( __DRIscreenPrivate *psp,
+         fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+     }
+ 
+-   configs = driCreateConfigs(fb_format, fb_type,
+-			      depth_bits_array, stencil_bits_array,
+-			      depth_buffer_factor, back_buffer_modes,
+-			      back_buffer_factor);
+-   if (configs == NULL) {
+-    fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+-              __LINE__);
+-      return NULL;
+-   }
++    configs = driCreateConfigs(fb_format, fb_type,
++                               depth_bits_array, stencil_bits_array,
++                               depth_buffer_factor, back_buffer_modes,
++                               back_buffer_factor,
++                               msaa_samples_array, 1);
++    if (configs == NULL) {
++        fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
++                __LINE__);
++        return NULL;
++    }
+ 
+-   /* Mark the visual as slow if there are "fake" stencil bits.
+-    */
+-   for (i = 0; configs[i]; i++) {
+-      m = &configs[i]->modes;
+-      if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
+-         m->visualRating = GLX_SLOW_CONFIG;
+-      }
+-   }
++    /* Mark the visual as slow if there are "fake" stencil bits.
++     */
++    for (i = 0; configs[i]; i++) {
++        m = &configs[i]->modes;
++        if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
++            m->visualRating = GLX_SLOW_CONFIG;
++        }
++    }
+ 
+-   return (const __DRIconfig **) configs;
++    return (const __DRIconfig **) configs;
+ }
+ 
+ 
+diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
+index 81337da..e3afaa9 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
++++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
+@@ -274,7 +274,7 @@ radeonFillInModes( __DRIscreenPrivate *psp,
+ 
+     uint8_t depth_bits_array[2];
+     uint8_t stencil_bits_array[2];
+-
++    uint8_t msaa_samples_array[1];
+ 
+     depth_bits_array[0] = depth_bits;
+     depth_bits_array[1] = depth_bits;
+@@ -286,6 +286,8 @@ radeonFillInModes( __DRIscreenPrivate *psp,
+     stencil_bits_array[0] = 0;
+     stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+ 
++    msaa_samples_array[0] = 0;
++
+     depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
+     back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+ 
+@@ -301,7 +303,8 @@ radeonFillInModes( __DRIscreenPrivate *psp,
+     configs = driCreateConfigs(fb_format, fb_type,
+ 			       depth_bits_array, stencil_bits_array,
+ 			       depth_buffer_factor,
+-			       back_buffer_modes, back_buffer_factor);
++			       back_buffer_modes, back_buffer_factor,
++			       msaa_samples_array, 1);
+     if (configs == NULL) {
+ 	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+ 		 __func__, __LINE__ );
+diff --git a/src/mesa/drivers/dri/savage/savage_xmesa.c b/src/mesa/drivers/dri/savage/savage_xmesa.c
+index a344aab..5c835ac 100644
+--- a/src/mesa/drivers/dri/savage/savage_xmesa.c
++++ b/src/mesa/drivers/dri/savage/savage_xmesa.c
+@@ -914,7 +914,7 @@ savageFillInModes( __DRIscreenPrivate *psp,
+ 
+     uint8_t depth_bits_array[2];
+     uint8_t stencil_bits_array[2];
+-
++    uint8_t msaa_samples_array[1];
+ 
+     depth_bits_array[0] = depth_bits;
+     depth_bits_array[1] = depth_bits;
+@@ -926,6 +926,8 @@ savageFillInModes( __DRIscreenPrivate *psp,
+     stencil_bits_array[0] = 0;
+     stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+ 
++    msaa_samples_array[0] = 0;
++
+     depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
+     back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+ 
+@@ -941,7 +943,8 @@ savageFillInModes( __DRIscreenPrivate *psp,
+     configs = driCreateConfigs(fb_format, fb_type,
+ 			       depth_bits_array, stencil_bits_array,
+ 			       depth_buffer_factor,
+-			       back_buffer_modes, back_buffer_factor);
++			       back_buffer_modes, back_buffer_factor,
++                               msaa_samples_array, 1);
+     if (configs == NULL) {
+ 	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+ 		 __func__, __LINE__ );
+diff --git a/src/mesa/drivers/dri/sis/sis_screen.c b/src/mesa/drivers/dri/sis/sis_screen.c
+index b1a5d15..9eb27fe 100644
+--- a/src/mesa/drivers/dri/sis/sis_screen.c
++++ b/src/mesa/drivers/dri/sis/sis_screen.c
+@@ -77,6 +77,7 @@ sisFillInModes(__DRIscreenPrivate *psp, int bpp)
+    };
+    uint8_t depth_bits_array[4];
+    uint8_t stencil_bits_array[4];
++   uint8_t msaa_samples_array[1];
+ 
+    depth_bits_array[0] = 0;
+    stencil_bits_array[0] = 0;
+@@ -87,6 +88,8 @@ sisFillInModes(__DRIscreenPrivate *psp, int bpp)
+    depth_bits_array[3] = 32;
+    stencil_bits_array[3] = 0;
+ 
++   msaa_samples_array[0] = 0;
++
+    depth_buffer_factor = 4;
+    back_buffer_factor = 2;
+ 
+@@ -100,7 +103,8 @@ sisFillInModes(__DRIscreenPrivate *psp, int bpp)
+ 
+    configs = driCreateConfigs(fb_format, fb_type, depth_bits_array,
+ 			      stencil_bits_array, depth_buffer_factor,
+-			      back_buffer_modes, back_buffer_factor);
++			      back_buffer_modes, back_buffer_factor,
++                              msaa_samples_array, 1);
+    if (configs == NULL) {
+       fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, __LINE__);
+       return NULL;
+diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c
+index 15b5724..6bfb866 100644
+--- a/src/mesa/drivers/dri/swrast/swrast.c
++++ b/src/mesa/drivers/dri/swrast/swrast.c
+@@ -147,6 +147,7 @@ swrastFillInModes(__DRIscreen *psp,
+ 
+     uint8_t depth_bits_array[4];
+     uint8_t stencil_bits_array[4];
++    uint8_t msaa_samples_array[1];
+ 
+     depth_bits_array[0] = 0;
+     depth_bits_array[1] = 0;
+@@ -161,26 +162,38 @@ swrastFillInModes(__DRIscreen *psp,
+     stencil_bits_array[2] = 0;
+     stencil_bits_array[3] = (stencil_bits == 0) ? 8 : stencil_bits;
+ 
++    msaa_samples_array[0] = 0;
++
+     depth_buffer_factor = 4;
+     back_buffer_factor = 2;
+ 
+-    if (pixel_bits == 8) {
++    switch (pixel_bits) {
++    case 8:
+ 	fb_format = GL_RGB;
+ 	fb_type = GL_UNSIGNED_BYTE_2_3_3_REV;
+-    }
+-    else if (pixel_bits == 16) {
++	break;
++    case 16:
+ 	fb_format = GL_RGB;
+ 	fb_type = GL_UNSIGNED_SHORT_5_6_5;
+-    }
+-    else {
++	break;
++    case 24:
++	fb_format = GL_BGR;
++	fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
++	break;
++    case 32:
+ 	fb_format = GL_BGRA;
+ 	fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
++	break;
++    default:
++	fprintf(stderr, "[%s:%u] bad depth %d\n", __func__, __LINE__,
++		pixel_bits);
++	return NULL;
+     }
+ 
+     configs = driCreateConfigs(fb_format, fb_type,
+ 			       depth_bits_array, stencil_bits_array,
+ 			       depth_buffer_factor, back_buffer_modes,
+-			       back_buffer_factor);
++			       back_buffer_factor, msaa_samples_array, 1);
+     if (configs == NULL) {
+ 	fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+ 		__LINE__);
+@@ -196,7 +209,7 @@ driCreateNewScreen(int scrn, const __DRIextension **extensions,
+ {
+     static const __DRIextension *emptyExtensionList[] = { NULL };
+     __DRIscreen *psp;
+-    __DRIconfig **configs8, **configs16, **configs32;
++    __DRIconfig **configs8, **configs16, **configs24, **configs32;
+ 
+     (void) data;
+ 
+@@ -213,11 +226,13 @@ driCreateNewScreen(int scrn, const __DRIextension **extensions,
+ 
+     configs8  = swrastFillInModes(psp,  8,  8, 0, 1);
+     configs16 = swrastFillInModes(psp, 16, 16, 0, 1);
++    configs24 = swrastFillInModes(psp, 24, 24, 8, 1);
+     configs32 = swrastFillInModes(psp, 32, 24, 8, 1);
+ 
+-    configs16 = (__DRIconfig **)driConcatConfigs(configs8, configs16);
+-
+-    *driver_configs = driConcatConfigs(configs16, configs32);
++    configs16 = driConcatConfigs(configs8, configs16);
++    configs24 = driConcatConfigs(configs16, configs24);
++    *driver_configs = (const __DRIconfig **)
++       driConcatConfigs(configs24, configs32);
+ 
+     driInitExtensions( NULL, card_extensions, GL_FALSE );
+ 
+@@ -249,19 +264,24 @@ static GLuint
+ choose_pixel_format(const GLvisual *v)
+ {
+     if (v->rgbMode) {
+-	int bpp = v->rgbBits;
++	int depth = v->rgbBits;
+ 
+-	if (bpp == 32
++	if (depth == 32
+ 	    && v->redMask   == 0xff0000
+ 	    && v->greenMask == 0x00ff00
+ 	    && v->blueMask  == 0x0000ff)
+ 	    return PF_A8R8G8B8;
+-	else if (bpp == 16
++	else if (depth == 24
++	    && v->redMask   == 0xff0000
++	    && v->greenMask == 0x00ff00
++	    && v->blueMask  == 0x0000ff)
++	    return PF_X8R8G8B8;
++	else if (depth == 16
+ 	    && v->redMask   == 0xf800
+ 	    && v->greenMask == 0x07e0
+ 	    && v->blueMask  == 0x001f)
+ 	    return PF_R5G6B5;
+-	else if (bpp == 8
++	else if (depth == 8
+ 	    && v->redMask   == 0x07
+ 	    && v->greenMask == 0x38
+ 	    && v->blueMask  == 0xc0)
+@@ -290,7 +310,6 @@ swrast_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
+ 			   GLenum internalFormat, GLuint width, GLuint height)
+ {
+     struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
+-    int bpp;
+     unsigned mask = PITCH_ALIGN_BITS - 1;
+ 
+     TRACE;
+@@ -299,23 +318,8 @@ swrast_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
+     rb->Width = width;
+     rb->Height = height;
+ 
+-    switch (internalFormat) {
+-    case GL_RGB:
+-	bpp = rb->RedBits + rb->GreenBits + rb->BlueBits;
+-	break;
+-    case GL_RGBA:
+-	bpp = rb->RedBits + rb->GreenBits + rb->BlueBits + rb->AlphaBits;
+-	break;
+-    case GL_COLOR_INDEX8_EXT:
+-	bpp = rb->IndexBits;
+-	break;
+-    default:
+-	_mesa_problem( NULL, "unexpected format in %s", __FUNCTION__ );
+-	return GL_FALSE;
+-    }
+-
+     /* always pad to PITCH_ALIGN_BITS */
+-    xrb->pitch = ((width * bpp + mask) & ~mask) / 8;
++    xrb->pitch = ((width * xrb->bpp + mask) & ~mask) / 8;
+ 
+     return GL_TRUE;
+ }
+@@ -371,6 +375,17 @@ swrast_new_renderbuffer(const GLvisual *visual, GLboolean front)
+ 	xrb->Base.GreenBits = 8 * sizeof(GLubyte);
+ 	xrb->Base.BlueBits  = 8 * sizeof(GLubyte);
+ 	xrb->Base.AlphaBits = 8 * sizeof(GLubyte);
++	xrb->bpp = 32;
++	break;
++    case PF_X8R8G8B8:
++	xrb->Base.InternalFormat = GL_RGB;
++	xrb->Base._BaseFormat = GL_RGB;
++	xrb->Base.DataType = GL_UNSIGNED_BYTE;
++	xrb->Base.RedBits   = 8 * sizeof(GLubyte);
++	xrb->Base.GreenBits = 8 * sizeof(GLubyte);
++	xrb->Base.BlueBits  = 8 * sizeof(GLubyte);
++	xrb->Base.AlphaBits = 0;
++	xrb->bpp = 32;
+ 	break;
+     case PF_R5G6B5:
+ 	xrb->Base.InternalFormat = GL_RGB;
+@@ -380,6 +395,7 @@ swrast_new_renderbuffer(const GLvisual *visual, GLboolean front)
+ 	xrb->Base.GreenBits = 6 * sizeof(GLubyte);
+ 	xrb->Base.BlueBits  = 5 * sizeof(GLubyte);
+ 	xrb->Base.AlphaBits = 0;
++	xrb->bpp = 16;
+ 	break;
+     case PF_R3G3B2:
+ 	xrb->Base.InternalFormat = GL_RGB;
+@@ -389,12 +405,14 @@ swrast_new_renderbuffer(const GLvisual *visual, GLboolean front)
+ 	xrb->Base.GreenBits = 3 * sizeof(GLubyte);
+ 	xrb->Base.BlueBits  = 2 * sizeof(GLubyte);
+ 	xrb->Base.AlphaBits = 0;
++	xrb->bpp = 8;
+ 	break;
+     case PF_CI8:
+ 	xrb->Base.InternalFormat = GL_COLOR_INDEX8_EXT;
+ 	xrb->Base._BaseFormat = GL_COLOR_INDEX;
+ 	xrb->Base.DataType = GL_UNSIGNED_BYTE;
+ 	xrb->Base.IndexBits = 8 * sizeof(GLubyte);
++	xrb->bpp = 8;
+ 	break;
+     default:
+ 	return NULL;
+diff --git a/src/mesa/drivers/dri/swrast/swrast_priv.h b/src/mesa/drivers/dri/swrast/swrast_priv.h
+index a707ffc..1a5fb31 100644
+--- a/src/mesa/drivers/dri/swrast/swrast_priv.h
++++ b/src/mesa/drivers/dri/swrast/swrast_priv.h
+@@ -90,6 +90,8 @@ struct swrast_renderbuffer {
+ 
+     /* renderbuffer pitch (in bytes) */
+     GLuint pitch;
++   /* bits per pixel of storage */
++    GLuint bpp;
+ };
+ 
+ static INLINE __DRIcontext *
+@@ -115,10 +117,10 @@ swrast_renderbuffer(struct gl_renderbuffer *rb)
+  * Pixel formats we support
+  */
+ #define PF_CI8        1		/**< Color Index mode */
+-#define PF_A8R8G8B8   2		/**< 32-bit TrueColor:  8-A, 8-R, 8-G, 8-B bits */
+-#define PF_R5G6B5     3		/**< 16-bit TrueColor:  5-R, 6-G, 5-B bits */
+-#define PF_R3G3B2     4		/**<  8-bit TrueColor:  3-R, 3-G, 2-B bits */
+-
++#define PF_A8R8G8B8   2		/**< 32bpp TrueColor:  8-A, 8-R, 8-G, 8-B bits */
++#define PF_R5G6B5     3		/**< 16bpp TrueColor:  5-R, 6-G, 5-B bits */
++#define PF_R3G3B2     4		/**<  8bpp TrueColor:  3-R, 3-G, 2-B bits */
++#define PF_X8R8G8B8   5		/**< 32bpp TrueColor:  8-R, 8-G, 8-B bits */
+ 
+ /**
+  * Renderbuffer pitch alignment (in bits).
+diff --git a/src/mesa/drivers/dri/swrast/swrast_span.c b/src/mesa/drivers/dri/swrast/swrast_span.c
+index 5e99036..2d3c25d 100644
+--- a/src/mesa/drivers/dri/swrast/swrast_span.c
++++ b/src/mesa/drivers/dri/swrast/swrast_span.c
+@@ -79,6 +79,24 @@ static const GLubyte kernel[16] = {
+    DST[BCOMP] = SRC[0]
+ 
+ 
++/* 32-bit BGRX */
++#define STORE_PIXEL_X8R8G8B8(DST, X, Y, VALUE) \
++   DST[3] = 0xff; \
++   DST[2] = VALUE[RCOMP]; \
++   DST[1] = VALUE[GCOMP]; \
++   DST[0] = VALUE[BCOMP]
++#define STORE_PIXEL_RGB_X8R8G8B8(DST, X, Y, VALUE) \
++   DST[3] = 0xff; \
++   DST[2] = VALUE[RCOMP]; \
++   DST[1] = VALUE[GCOMP]; \
++   DST[0] = VALUE[BCOMP]
++#define FETCH_PIXEL_X8R8G8B8(DST, SRC) \
++   DST[ACOMP] = 0xff; \
++   DST[RCOMP] = SRC[2]; \
++   DST[GCOMP] = SRC[1]; \
++   DST[BCOMP] = SRC[0]
++
++
+ /* 16-bit BGR */
+ #define STORE_PIXEL_R5G6B5(DST, X, Y, VALUE) \
+    do { \
+@@ -139,6 +157,24 @@ static const GLubyte kernel[16] = {
+ #include "swrast/s_spantemp.h"
+ 
+ 
++/* 32-bit BGRX */
++#define NAME(FUNC) FUNC##_X8R8G8B8
++#define RB_TYPE GLubyte
++#define SPAN_VARS \
++   struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
++#define INIT_PIXEL_PTR(P, X, Y) \
++   GLubyte *P = (GLubyte *)xrb->Base.Data + YFLIP(xrb, Y) * xrb->pitch + (X) * 4;
++#define INC_PIXEL_PTR(P) P += 4
++#define STORE_PIXEL(DST, X, Y, VALUE) \
++   STORE_PIXEL_X8R8G8B8(DST, X, Y, VALUE)
++#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
++   STORE_PIXEL_RGB_X8R8G8B8(DST, X, Y, VALUE)
++#define FETCH_PIXEL(DST, SRC) \
++   FETCH_PIXEL_X8R8G8B8(DST, SRC)
++
++#include "swrast/s_spantemp.h"
++
++
+ /* 16-bit BGR */
+ #define NAME(FUNC) FUNC##_R5G6B5
+ #define RB_TYPE GLubyte
+@@ -210,6 +246,24 @@ static const GLubyte kernel[16] = {
+ #include "swrast_spantemp.h"
+ 
+ 
++/* 32-bit BGRX */
++#define NAME(FUNC) FUNC##_X8R8G8B8_front
++#define RB_TYPE GLubyte
++#define SPAN_VARS \
++   struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
++#define INIT_PIXEL_PTR(P, X, Y) \
++   GLubyte *P = (GLubyte *)row;
++#define INC_PIXEL_PTR(P) P += 4
++#define STORE_PIXEL(DST, X, Y, VALUE) \
++   STORE_PIXEL_X8R8G8B8(DST, X, Y, VALUE)
++#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
++   STORE_PIXEL_RGB_X8R8G8B8(DST, X, Y, VALUE)
++#define FETCH_PIXEL(DST, SRC) \
++   FETCH_PIXEL_X8R8G8B8(DST, SRC)
++
++#include "swrast_spantemp.h"
++
++
+ /* 16-bit BGR */
+ #define NAME(FUNC) FUNC##_R5G6B5_front
+ #define RB_TYPE GLubyte
+@@ -279,6 +333,15 @@ swrast_set_span_funcs_back(struct swrast_renderbuffer *xrb,
+ 	xrb->Base.PutValues = put_values_A8R8G8B8;
+ 	xrb->Base.PutMonoValues = put_mono_values_A8R8G8B8;
+ 	break;
++    case PF_X8R8G8B8:
++	xrb->Base.GetRow = get_row_X8R8G8B8;
++	xrb->Base.GetValues = get_values_X8R8G8B8;
++	xrb->Base.PutRow = put_row_X8R8G8B8;
++	xrb->Base.PutRowRGB = put_row_rgb_X8R8G8B8;
++	xrb->Base.PutMonoRow = put_mono_row_X8R8G8B8;
++	xrb->Base.PutValues = put_values_X8R8G8B8;
++	xrb->Base.PutMonoValues = put_mono_values_X8R8G8B8;
++	break;
+     case PF_R5G6B5:
+ 	xrb->Base.GetRow = get_row_R5G6B5;
+ 	xrb->Base.GetValues = get_values_R5G6B5;
+@@ -334,6 +397,15 @@ swrast_set_span_funcs_front(struct swrast_renderbuffer *xrb,
+ 	xrb->Base.PutValues = put_values_A8R8G8B8_front;
+ 	xrb->Base.PutMonoValues = put_mono_values_A8R8G8B8_front;
+ 	break;
++    case PF_X8R8G8B8:
++	xrb->Base.GetRow = get_row_X8R8G8B8_front;
++	xrb->Base.GetValues = get_values_X8R8G8B8_front;
++	xrb->Base.PutRow = put_row_X8R8G8B8_front;
++	xrb->Base.PutRowRGB = put_row_rgb_X8R8G8B8_front;
++	xrb->Base.PutMonoRow = put_mono_row_X8R8G8B8_front;
++	xrb->Base.PutValues = put_values_X8R8G8B8_front;
++	xrb->Base.PutMonoValues = put_mono_values_X8R8G8B8_front;
++	break;
+     case PF_R5G6B5:
+ 	xrb->Base.GetRow = get_row_R5G6B5_front;
+ 	xrb->Base.GetValues = get_values_R5G6B5_front;
+diff --git a/src/mesa/drivers/dri/tdfx/tdfx_screen.c b/src/mesa/drivers/dri/tdfx/tdfx_screen.c
+index cd22b84..5f2f5cf 100644
+--- a/src/mesa/drivers/dri/tdfx/tdfx_screen.c
++++ b/src/mesa/drivers/dri/tdfx/tdfx_screen.c
+@@ -361,6 +361,7 @@ tdfxFillInModes(__DRIscreenPrivate *psp,
+ 	static const GLenum db_modes[2] = { GLX_NONE, GLX_SWAP_UNDEFINED_OML };
+ 	uint8_t depth_bits_array[4];
+ 	uint8_t stencil_bits_array[4];
++        uint8_t msaa_samples_array[1];
+ 	if(deep) {
+ 		depth_bits_array[0] = 0;
+ 		depth_bits_array[1] = 24;
+@@ -377,13 +378,17 @@ tdfxFillInModes(__DRIscreenPrivate *psp,
+ 		stencil_bits_array[3] = 8;
+ 	}
+ 
+-	return driCreateConfigs(
+-		deep ? GL_RGBA : GL_RGB,
+-		deep ? GL_UNSIGNED_INT_8_8_8_8 : GL_UNSIGNED_SHORT_5_6_5,
+-		depth_bits_array,
+-		stencil_bits_array,
+-		deep ? 2 : 4,
+-		db_modes, 2);
++	msaa_samples_array[0] = 0;
++
++	return (const __DRIconfig **)
++	   driCreateConfigs(deep ? GL_RGBA : GL_RGB,
++			    deep ? GL_UNSIGNED_INT_8_8_8_8 :
++				   GL_UNSIGNED_SHORT_5_6_5,
++			    depth_bits_array,
++			    stencil_bits_array,
++			    deep ? 2 : 4,
++			    db_modes, 2,
++			    msaa_samples_array, 1);
+ }
+ 
+ /**
+diff --git a/src/mesa/drivers/dri/unichrome/via_screen.c b/src/mesa/drivers/dri/unichrome/via_screen.c
+index 988f993..e9f5661 100644
+--- a/src/mesa/drivers/dri/unichrome/via_screen.c
++++ b/src/mesa/drivers/dri/unichrome/via_screen.c
+@@ -342,6 +342,7 @@ viaFillInModes( __DRIscreenPrivate *psp,
+      */
+     static const uint8_t depth_bits_array[4]   = { 0, 16, 24, 32 };
+     static const uint8_t stencil_bits_array[4] = { 0,  0,  8,  0 };
++    uint8_t msaa_samples_array[1] = { 0 };
+     const unsigned depth_buffer_factor = 3;
+ 
+     if ( pixel_bits == 16 ) {
+@@ -356,7 +357,8 @@ viaFillInModes( __DRIscreenPrivate *psp,
+     configs = driCreateConfigs(fb_format, fb_type,
+ 			       depth_bits_array, stencil_bits_array,
+ 			       depth_buffer_factor, back_buffer_modes,
+-			       back_buffer_factor);
++			       back_buffer_factor,
++                               msaa_samples_array, 1);
+     if (configs == NULL) {
+ 	fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+ 		__LINE__);
diff --git a/mesa-7.3-fixes-from-7.4-branch.patch b/mesa-7.3-fixes-from-7.4-branch.patch
new file mode 100644
index 0000000..2044868
--- /dev/null
+++ b/mesa-7.3-fixes-from-7.4-branch.patch
@@ -0,0 +1,2525 @@
+diff --git a/Makefile b/Makefile
+index ebc5254..ee40b75 100644
+--- a/Makefile
++++ b/Makefile
+@@ -174,10 +174,10 @@ ultrix-gcc:
+ 
+ # Rules for making release tarballs
+ 
+-DIRECTORY = Mesa-7.3
+-LIB_NAME = MesaLib-7.3
+-DEMO_NAME = MesaDemos-7.3
+-GLUT_NAME = MesaGLUT-7.3
++DIRECTORY = Mesa-7.4
++LIB_NAME = MesaLib-7.4
++DEMO_NAME = MesaDemos-7.4
++GLUT_NAME = MesaGLUT-7.4
+ 
+ MAIN_FILES = \
+ 	$(DIRECTORY)/Makefile*						\
+diff --git a/configure.ac b/configure.ac
+index 33c1072..73caf00 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -46,10 +46,14 @@ esac
+ MKDEP_OPTIONS=-fdepend
+ dnl Ask gcc where it's keeping its secret headers
+ if test "x$GCC" = xyes; then
+-    GCC_INCLUDES=`$CC -print-file-name=include`
+-    if test "x$GCC_INCLUDES" != x; then
+-        MKDEP_OPTIONS="$MKDEP_OPTIONS -I$GCC_INCLUDES"
+-    fi
++    for dir in include include-fixed; do
++        GCC_INCLUDES=`$CC -print-file-name=$dir`
++        if test "x$GCC_INCLUDES" != x && \
++           test "$GCC_INCLUDES" != "$dir" && \
++           test -d "$GCC_INCLUDES"; then
++            MKDEP_OPTIONS="$MKDEP_OPTIONS -I$GCC_INCLUDES"
++        fi
++    done
+ fi
+ AC_SUBST([MKDEP_OPTIONS])
+ 
+@@ -362,7 +366,7 @@ default_driver="xlib"
+ case "$host_os" in
+ linux*)
+     case "$host_cpu" in
+-    i*86|x86_64|powerpc*) default_driver="dri";;
++    i*86|x86_64|powerpc*|sparc*) default_driver="dri";;
+     esac
+     ;;
+ *freebsd* | dragonfly*)
+diff --git a/docs/relnotes-7.3.html b/docs/relnotes-7.3.html
+index c083fcb..df89884 100644
+--- a/docs/relnotes-7.3.html
++++ b/docs/relnotes-7.3.html
+@@ -22,13 +22,21 @@ Some drivers don't support all the features required in OpenGL 2.1.
+ </p>
+ <p>
+ See the <a href="install.html">Compiling/Installing page</a> for prerequisites
+-for DRI ardware acceleration.
++for DRI hardware acceleration.
+ </p>
+ 
+ 
+ <h2>MD5 checksums</h2>
+ <pre>
+-tbd
++8ed03191432b22d118d88d6db497f304  MesaLib-7.3.tar.gz
++781e7811a6ed5c97b2b8defefc8ffbc9  MesaLib-7.3.tar.bz2
++3ccba9a1734ed6d4b3389e1535d90fbf  MesaLib-7.3.zip
++d312e974b31043b13b61bac5fbf00b87  MesaDemos-7.3.tar.gz
++3f0741394069bdf2329565a387396cda  MesaDemos-7.3.tar.bz2
++4d0887fd4c66a824295cdd619f6d34cb  MesaDemos-7.3.zip
++2d7661b66022bcb8878728f3d5bd33ab  MesaGLUT-7.3.tar.gz
++abe8036a724c1a483bdad6b5a55ddc1a  MesaGLUT-7.3.tar.bz2
++5f247819b47e2a7c62d07a6afe5262fb  MesaGLUT-7.3.zip
+ </pre>
+ 
+ 
+diff --git a/docs/relnotes-7.4.html b/docs/relnotes-7.4.html
+new file mode 100644
+index 0000000..8ad23e5
+--- /dev/null
++++ b/docs/relnotes-7.4.html
+@@ -0,0 +1,79 @@
++<HTML>
++
++<TITLE>Mesa Release Notes</TITLE>
++
++<head><link rel="stylesheet" type="text/css" href="mesa.css"></head>
++
++<BODY>
++
++<body bgcolor="#eeeeee">
++
++<H1>Mesa 7.4 Release Notes / date TBD</H1>
++
++<p>
++Mesa 7.4 is a stable development release fixing bugs since the 7.3 release.
++</p>
++<p>
++Mesa 7.4 implements the OpenGL 2.1 API, but the version reported by
++glGetString(GL_VERSION) depends on the particular driver being used.
++Some drivers don't support all the features required in OpenGL 2.1.
++</p>
++<p>
++See the <a href="install.html">Compiling/Installing page</a> for prerequisites
++for DRI ardware acceleration.
++</p>
++
++
++<h2>MD5 checksums</h2>
++<pre>
++tbd
++</pre>
++
++
++<h2>New features</h2>
++<ul>
++<li>Added MESA_GLX_FORCE_DIRECT env var for Xlib/software driver
++<li>GLSL version 1.20 is returnd by the GL_SHADING_LANGUAGE_VERSION query
++</ul>
++
++
++<h2>Bug fixes</h2>
++<ul>
++<li>glGetActiveUniform() returned wrong size for some array types
++<li>Fixed some error checking in glUniform()
++<li>Fixed a potential glTexImage('proxy target') segfault
++<li>Fixed bad reference counting for 1D/2D texture arrays
++<li>Fixed VBO + glPush/PopClientAttrib() bug #19835
++<li>Assorted i965 driver bug fixes
++<li>Fixed a Windows compilation failure in s_triangle.c
++<li>Fixed a GLSL array indexing bug
++<li>Fixes for building on Haiku
++</ul>
++
++<h2>Changes</h2>
++<ul>
++</ul>
++
++
++
++<h2>Driver Status</h2>
++
++<pre>
++Driver			Status
++----------------------	----------------------
++DRI drivers		varies with the driver
++XMesa/GLX (on Xlib)	implements OpenGL 2.1
++OSMesa (off-screen)	implements OpenGL 2.1
++Windows/Win32		implements OpenGL 2.1
++Glide (3dfx Voodoo1/2)	implements OpenGL 1.3
++SVGA			unsupported
++Wind River UGL		unsupported
++DJGPP			unsupported
++GGI			unsupported
++BeOS			unsupported
++Allegro			unsupported
++D3D			unsupported
++</pre>
++
++</body>
++</html>
+diff --git a/docs/relnotes.html b/docs/relnotes.html
+index 020e485..61c6a20 100644
+--- a/docs/relnotes.html
++++ b/docs/relnotes.html
+@@ -20,6 +20,7 @@ The release notes summarize what's new or changed in each Mesa release.
+ </p>
+ 
+ <UL>
++<LI><A HREF="relnotes-7.4.html">7.4 release notes</A>
+ <LI><A HREF="relnotes-7.3.html">7.3 release notes</A>
+ <LI><A HREF="relnotes-7.2.html">7.2 release notes</A>
+ <LI><A HREF="relnotes-7.1.html">7.1 release notes</A>
+diff --git a/docs/xlibdriver.html b/docs/xlibdriver.html
+index d95f4d5..029e2b1 100644
+--- a/docs/xlibdriver.html
++++ b/docs/xlibdriver.html
+@@ -169,6 +169,20 @@ the Gamma FAQ</a>
+ </p>
+ 
+ 
++<H2>Direct Rendering Flag</H2>
++<p>
++Some applications won't run with indirect rendering contexts (which is
++what the Xlib driver supports).
++To force the glXIsDirect() query to return True, set the MESA_GLX_FORCE_DIRECT
++environment variable.
++For example:
++</p>
++<pre>
++	$ export MESA_GLX_FORCE_DIRECT=1
++</pre>
++
++
++
+ <H2>Overlay Planes</H2>
+ <p>
+ Hardware overlay planes are supported by the Xlib driver.  To
+@@ -268,6 +282,8 @@ This extension was added in Mesa 2.6
+    MESA_BACK_BUFFER - specifies how to implement the back color buffer (X only)
+    MESA_PRIVATE_CMAP - force aux/tk libraries to use private colormaps (X only)
+    MESA_GAMMA - gamma correction coefficients (X only)
++   MESA_GLX_FORCE_DIRECT - report that the driver is direct rendering, even
++      though it's not.
+ </pre>
+ 
+ 
+diff --git a/include/GL/gl.h b/include/GL/gl.h
+index 3891a71..6b4f3f5 100644
+--- a/include/GL/gl.h
++++ b/include/GL/gl.h
+@@ -1,6 +1,6 @@
+ /*
+  * Mesa 3-D graphics library
+- * Version:  6.5.1
++ * Version:  7.4
+  *
+  * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+  *
+diff --git a/progs/glsl/multitex.c b/progs/glsl/multitex.c
+index 096d40f..b4be463 100644
+--- a/progs/glsl/multitex.c
++++ b/progs/glsl/multitex.c
+@@ -47,9 +47,12 @@ static const char *TexFiles[2] =
+ 
+ static GLuint Program;
+ 
+-static GLfloat Xrot = -90.0, Yrot = .0, Zrot = 0.0;
++static GLfloat Xrot = 0.0, Yrot = .0, Zrot = 0.0;
+ static GLfloat EyeDist = 10;
+ static GLboolean Anim = GL_TRUE;
++static GLboolean UseArrays = GL_TRUE;
++
++static GLint VertCoord_attr = -1, TexCoord0_attr = -1, TexCoord1_attr = -1;
+ 
+ 
+ /* value[0] = tex unit */
+@@ -60,32 +63,62 @@ static struct uniform_info Uniforms[] = {
+ };
+ 
+ 
++static const GLfloat Tex0Coords[4][2] = {
++   { 0.0, 0.0 }, { 2.0, 0.0 }, { 2.0, 2.0 }, { 0.0, 2.0 }
++};
++
++static const GLfloat Tex1Coords[4][2] = {
++   { 0.0, 0.0 }, { 1.0, 0.0 }, { 1.0, 1.0 }, { 0.0, 1.0 }
++};
++
++static const GLfloat VertCoords[4][2] = {
++   { -3.0, -3.0 }, { 3.0, -3.0 }, { 3.0, 3.0 }, { -3.0, 3.0 }
++};
++
++
+ static void
+-DrawPolygon(GLfloat size)
++DrawPolygonArray(void)
+ {
+-   glPushMatrix();
+-   glRotatef(90, 1, 0, 0);
+-   glNormal3f(0, 0, 1);
+-   glBegin(GL_POLYGON);
++   if (VertCoord_attr >= 0) {
++      glVertexAttribPointer_func(VertCoord_attr, 2, GL_FLOAT, GL_FALSE,
++                                 0, VertCoords);
++      glEnableVertexAttribArray_func(VertCoord_attr);
++   }
++   else {
++      glVertexPointer(2, GL_FLOAT, 0, VertCoords);
++      glEnable(GL_VERTEX_ARRAY);
++   }
+ 
+-   glMultiTexCoord2f(GL_TEXTURE0, 0, 0);
+-   glMultiTexCoord2f(GL_TEXTURE1, 0, 0);
+-   glVertex2f(-size, -size);
++   glVertexAttribPointer_func(TexCoord0_attr, 2, GL_FLOAT, GL_FALSE,
++                              0, Tex0Coords);
++   glEnableVertexAttribArray_func(TexCoord0_attr);
+ 
+-   glMultiTexCoord2f(GL_TEXTURE0, 2, 0);
+-   glMultiTexCoord2f(GL_TEXTURE1, 1, 0);
+-   glVertex2f( size, -size);
++   glVertexAttribPointer_func(TexCoord1_attr, 2, GL_FLOAT, GL_FALSE,
++                              0, Tex1Coords);
++   glEnableVertexAttribArray_func(TexCoord1_attr);
+ 
+-   glMultiTexCoord2f(GL_TEXTURE0, 2, 2);
+-   glMultiTexCoord2f(GL_TEXTURE1, 1, 1);
+-   glVertex2f( size,  size);
++   glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
++}
+ 
+-   glMultiTexCoord2f(GL_TEXTURE0, 0, 2);
+-   glMultiTexCoord2f(GL_TEXTURE1, 0, 1);
+-   glVertex2f(-size,  size);
++
++static void
++DrawPolygonVert(void)
++{
++   GLuint i;
++
++   glBegin(GL_TRIANGLE_FAN);
++
++   for (i = 0; i < 4; i++) {
++      glVertexAttrib2fv_func(TexCoord0_attr, Tex0Coords[i]);
++      glVertexAttrib2fv_func(TexCoord1_attr, Tex1Coords[i]);
++
++      if (VertCoord_attr >= 0)
++         glVertexAttrib2fv_func(VertCoord_attr, VertCoords[i]);
++      else
++         glVertex2fv(VertCoords[i]);
++   }
+ 
+    glEnd();
+-   glPopMatrix();
+ }
+ 
+ 
+@@ -100,7 +133,10 @@ draw(void)
+       glRotatef(Yrot, 0, 1, 0);
+       glRotatef(Xrot, 1, 0, 0);
+ 
+-      DrawPolygon(3.0);
++      if (UseArrays)
++         DrawPolygonArray();
++      else
++         DrawPolygonVert();
+ 
+    glPopMatrix();
+ 
+@@ -123,8 +159,11 @@ key(unsigned char k, int x, int y)
+    (void) x;
+    (void) y;
+    switch (k) {
+-   case ' ':
+    case 'a':
++      UseArrays = !UseArrays;
++      printf("Arrays: %d\n", UseArrays);
++      break;
++   case ' ':
+       Anim = !Anim;
+       if (Anim)
+          glutIdleFunc(idle);
+@@ -232,6 +271,13 @@ CreateProgram(const char *vertProgFile, const char *fragProgFile,
+ 
+    InitUniforms(program, uniforms);
+ 
++   TexCoord0_attr = glGetAttribLocation_func(program, "TexCoord0");
++   TexCoord1_attr = glGetAttribLocation_func(program, "TexCoord1");
++   VertCoord_attr = glGetAttribLocation_func(program, "VertCoord");
++   printf("TexCoord0_attr = %d\n", TexCoord0_attr);
++   printf("TexCoord1_attr = %d\n", TexCoord1_attr);
++   printf("VertCoord_attr = %d\n", VertCoord_attr);
++
+    return program;
+ }
+ 
+diff --git a/progs/glsl/multitex.vert b/progs/glsl/multitex.vert
+index 5518ca1..4fae3b7 100644
+--- a/progs/glsl/multitex.vert
++++ b/progs/glsl/multitex.vert
+@@ -2,9 +2,13 @@
+ // Brian Paul
+ 
+ 
++attribute vec4 TexCoord0, TexCoord1;
++attribute vec4 VertCoord;
++
+ void main() 
+ {
+-   gl_TexCoord[0] = gl_MultiTexCoord0;
+-   gl_TexCoord[1] = gl_MultiTexCoord1;
+-   gl_Position = ftransform();
++   gl_TexCoord[0] = TexCoord0;
++   gl_TexCoord[1] = TexCoord1;
++   // note: may use gl_Vertex or VertCoord here for testing:
++   gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
+ }
+diff --git a/progs/util/extfuncs.h b/progs/util/extfuncs.h
+index cf6b29d..070414e 100644
+--- a/progs/util/extfuncs.h
++++ b/progs/util/extfuncs.h
+@@ -46,6 +46,13 @@ static PFNGLVERTEXATTRIB1FPROC glVertexAttrib1f_func = NULL;
+ static PFNGLVERTEXATTRIB2FPROC glVertexAttrib2f_func = NULL;
+ static PFNGLVERTEXATTRIB3FPROC glVertexAttrib3f_func = NULL;
+ static PFNGLVERTEXATTRIB4FPROC glVertexAttrib4f_func = NULL;
++static PFNGLVERTEXATTRIB1FVPROC glVertexAttrib1fv_func = NULL;
++static PFNGLVERTEXATTRIB2FVPROC glVertexAttrib2fv_func = NULL;
++static PFNGLVERTEXATTRIB3FVPROC glVertexAttrib3fv_func = NULL;
++static PFNGLVERTEXATTRIB4FVPROC glVertexAttrib4fv_func = NULL;
++static PFNGLVERTEXATTRIBPOINTERPROC glVertexAttribPointer_func = NULL;
++static PFNGLENABLEVERTEXATTRIBARRAYPROC glEnableVertexAttribArray_func = NULL;
++static PFNGLDISABLEVERTEXATTRIBARRAYPROC glDisableVertexAttribArray_func = NULL;
+ 
+ /* OpenGL 2.1 */
+ static PFNGLUNIFORMMATRIX2X3FVPROC glUniformMatrix2x3fv_func = NULL;
+@@ -126,6 +133,14 @@ GetExtensionFuncs(void)
+    glVertexAttrib2f_func = (PFNGLVERTEXATTRIB2FPROC) glutGetProcAddress("glVertexAttrib2f");
+    glVertexAttrib3f_func = (PFNGLVERTEXATTRIB3FPROC) glutGetProcAddress("glVertexAttrib3f");
+    glVertexAttrib4f_func = (PFNGLVERTEXATTRIB4FPROC) glutGetProcAddress("glVertexAttrib4f");
++   glVertexAttrib1fv_func = (PFNGLVERTEXATTRIB1FVPROC) glutGetProcAddress("glVertexAttrib1fv");
++   glVertexAttrib2fv_func = (PFNGLVERTEXATTRIB2FVPROC) glutGetProcAddress("glVertexAttrib2fv");
++   glVertexAttrib3fv_func = (PFNGLVERTEXATTRIB3FVPROC) glutGetProcAddress("glVertexAttrib3fv");
++   glVertexAttrib4fv_func = (PFNGLVERTEXATTRIB4FVPROC) glutGetProcAddress("glVertexAttrib4fv");
++
++   glVertexAttribPointer_func = (PFNGLVERTEXATTRIBPOINTERPROC) glutGetProcAddress("glVertexAttribPointer");
++   glEnableVertexAttribArray_func = (PFNGLENABLEVERTEXATTRIBARRAYPROC) glutGetProcAddress("glEnableVertexAttribArray");
++   glDisableVertexAttribArray_func = (PFNGLDISABLEVERTEXATTRIBARRAYPROC) glutGetProcAddress("glDisableVertexAttribArray");
+ 
+    /* OpenGL 2.1 */
+    glUniformMatrix2x3fv_func = (PFNGLUNIFORMMATRIX2X3FVPROC) glutGetProcAddress("glUniformMatrix2x3fv");
+diff --git a/progs/xdemos/glxpixmap.c b/progs/xdemos/glxpixmap.c
+index 9db4df2..e1abd22 100644
+--- a/progs/xdemos/glxpixmap.c
++++ b/progs/xdemos/glxpixmap.c
+@@ -178,6 +178,7 @@ int main( int argc, char *argv[] )
+    glColor3f( 0.0, 1.0, 1.0 );
+    glRectf( -0.75, -0.75, 0.75, 0.75 );
+    glFlush();
++   glXWaitGL();
+ 
+    XMapWindow( dpy, win );
+ 
+diff --git a/src/glx/x11/dri2_glx.c b/src/glx/x11/dri2_glx.c
+index 2bee677..639aa19 100644
+--- a/src/glx/x11/dri2_glx.c
++++ b/src/glx/x11/dri2_glx.c
+@@ -60,6 +60,9 @@ struct __GLXDRIdisplayPrivateRec {
+     int driMajor;
+     int driMinor;
+     int driPatch;
++
++    unsigned long configureSeqno;
++    Bool (*oldConfigProc)(Display *, XEvent *, xEvent *);
+ };
+ 
+ struct __GLXDRIcontextPrivateRec {
+@@ -73,6 +76,10 @@ struct __GLXDRIdrawablePrivateRec {
+     __DRIbuffer buffers[5];
+     int bufferCount;
+     int width, height;
++    unsigned long configureSeqno;
++    int have_back;
++    int have_front;
++    int have_fake_front;
+ };
+ 
+ static void dri2DestroyContext(__GLXDRIcontext *context,
+@@ -166,6 +173,7 @@ static __GLXDRIdrawable *dri2CreateDrawable(__GLXscreenConfigs *psc,
+     pdraw->base.xDrawable = xDrawable;
+     pdraw->base.drawable = drawable;
+     pdraw->base.psc = psc;
++    pdraw->configureSeqno = ~0;
+ 
+     DRI2CreateDrawable(psc->dpy, xDrawable);
+ 
+@@ -190,6 +198,10 @@ static void dri2CopySubBuffer(__GLXDRIdrawable *pdraw,
+     XRectangle xrect;
+     XserverRegion region;
+ 
++    /* Check we have the right attachments */
++    if (!(priv->have_front && priv->have_back))
++    	return;
++
+     xrect.x = x;
+     xrect.y = priv->height - y - height;
+     xrect.width = width;
+@@ -208,6 +220,47 @@ static void dri2SwapBuffers(__GLXDRIdrawable *pdraw)
+     dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height);
+ }
+ 
++static void dri2WaitX(__GLXDRIdrawable *pdraw)
++{
++    __GLXDRIdrawablePrivate *priv = (__GLXDRIdrawablePrivate *) pdraw;
++    XRectangle xrect;
++    XserverRegion region;
++
++    /* Check we have the right attachments */
++    if (!(priv->have_fake_front && priv->have_front))
++    	return;
++
++    xrect.x = 0;
++    xrect.y = 0;
++    xrect.width = priv->width;
++    xrect.height = priv->height;
++
++    region = XFixesCreateRegion(pdraw->psc->dpy, &xrect, 1);
++    DRI2CopyRegion(pdraw->psc->dpy, pdraw->drawable, region,
++		   DRI2BufferFakeFrontLeft, DRI2BufferFrontLeft);
++    XFixesDestroyRegion(pdraw->psc->dpy, region);
++}
++
++static void dri2WaitGL(__GLXDRIdrawable *pdraw)
++{
++    __GLXDRIdrawablePrivate *priv = (__GLXDRIdrawablePrivate *) pdraw;
++    XRectangle xrect;
++    XserverRegion region;
++
++    if (!(priv->have_fake_front && priv->have_front))
++    	return;
++
++    xrect.x = 0;
++    xrect.y = 0;
++    xrect.width = priv->width;
++    xrect.height = priv->height;
++
++    region = XFixesCreateRegion(pdraw->psc->dpy, &xrect, 1);
++    DRI2CopyRegion(pdraw->psc->dpy, pdraw->drawable, region,
++		   DRI2BufferFrontLeft, DRI2BufferFakeFrontLeft);
++    XFixesDestroyRegion(pdraw->psc->dpy, region);
++}
++
+ static void dri2DestroyScreen(__GLXscreenConfigs *psc)
+ {
+     /* Free the direct rendering per screen data */
+@@ -223,9 +276,30 @@ dri2GetBuffers(__DRIdrawable *driDrawable,
+ 	       int *out_count, void *loaderPrivate)
+ {
+     __GLXDRIdrawablePrivate *pdraw = loaderPrivate;
++    __GLXdisplayPrivate *dpyPriv = __glXInitialize(pdraw->base.psc->dpy);
++    __GLXDRIdisplayPrivate *pdp = (__GLXDRIdisplayPrivate *)dpyPriv->dri2Display;
+     DRI2Buffer *buffers;
+     int i;
+ 
++    /**
++     * Check if a ConfigureNotify has come in since we last asked for the
++     * buffers associated with this drawable.  If not, we can assume that they're
++     * the same set at glViewport time, and save a synchronous round-trip to the
++     * X Server.
++     */
++    if (pdraw->configureSeqno == pdp->configureSeqno &&
++	count == pdraw->bufferCount) {
++	for (i = 0; i < count; i++) {
++	    if (pdraw->buffers[i].attachment != attachments[i])
++		break;
++	}
++	if (i == count) {
++	    *out_count = pdraw->bufferCount;
++	    return pdraw->buffers;
++	}
++    }
++    pdraw->configureSeqno = pdp->configureSeqno;
++
+     buffers = DRI2GetBuffers(pdraw->base.psc->dpy, pdraw->base.xDrawable,
+ 			     width, height, attachments, count, out_count);
+     if (buffers == NULL)
+@@ -233,6 +307,10 @@ dri2GetBuffers(__DRIdrawable *driDrawable,
+ 
+     pdraw->width = *width;
+     pdraw->height = *height;
++    pdraw->bufferCount = *out_count;
++    pdraw->have_front = 0;
++    pdraw->have_fake_front = 0;
++    pdraw->have_back = 0;
+ 
+     /* This assumes the DRI2 buffer attachment tokens matches the
+      * __DRIbuffer tokens. */
+@@ -242,6 +320,12 @@ dri2GetBuffers(__DRIdrawable *driDrawable,
+ 	pdraw->buffers[i].pitch = buffers[i].pitch;
+ 	pdraw->buffers[i].cpp = buffers[i].cpp;
+ 	pdraw->buffers[i].flags = buffers[i].flags;
++	if (pdraw->buffers[i].attachment == __DRI_BUFFER_FRONT_LEFT)
++	    pdraw->have_front = 1;
++	if (pdraw->buffers[i].attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)
++	    pdraw->have_fake_front = 1;
++	if (pdraw->buffers[i].attachment == __DRI_BUFFER_BACK_LEFT)
++	    pdraw->have_back = 1;
+     }
+ 
+     Xfree(buffers);
+@@ -332,6 +416,8 @@ static __GLXDRIscreen *dri2CreateScreen(__GLXscreenConfigs *psc, int screen,
+     psp->createContext = dri2CreateContext;
+     psp->createDrawable = dri2CreateDrawable;
+     psp->swapBuffers = dri2SwapBuffers;
++    psp->waitGL = dri2WaitGL;
++    psp->waitX = dri2WaitX;
+ 
+     /* DRI2 suports SubBuffer through DRI2CopyRegion, so it's always
+      * available.*/
+@@ -359,6 +445,28 @@ static void dri2DestroyDisplay(__GLXDRIdisplay *dpy)
+     Xfree(dpy);
+ }
+ 
++/**
++ * Makes a note on receiving ConfigureNotify that we need to re-check the
++ * DRI2 buffers, as window sizes may have resulted in reallocation.
++ */
++static Bool dri2ConfigureNotifyProc(Display *dpy, XEvent *re, xEvent *event)
++{
++    __GLXdisplayPrivate *dpyPriv = __glXInitialize(dpy);
++    __GLXDRIdisplayPrivate *pdp;
++    Bool ret;
++
++    /* We should always be able to find our pdp, as it only gets torn down
++     * when the Display is torn down.
++     */
++    pdp = (__GLXDRIdisplayPrivate *)dpyPriv->dri2Display;
++
++    ret = pdp->oldConfigProc(dpy, re, event);
++
++    pdp->configureSeqno = re->xconfigure.serial;
++
++    return ret;
++}
++
+ /*
+  * Allocate, initialize and return a __DRIdisplayPrivate object.
+  * This is called from __glXInitialize() when we are given a new
+@@ -381,6 +489,9 @@ _X_HIDDEN __GLXDRIdisplay *dri2CreateDisplay(Display *dpy)
+ 	return NULL;
+     }
+ 
++    pdp->oldConfigProc = XESetWireToEvent(dpy, ConfigureNotify,
++					  dri2ConfigureNotifyProc);
++
+     pdp->driPatch = 0;
+ 
+     pdp->base.destroyDisplay = dri2DestroyDisplay;
+diff --git a/src/glx/x11/dri_glx.c b/src/glx/x11/dri_glx.c
+index 44724d2..3089aa1 100644
+--- a/src/glx/x11/dri_glx.c
++++ b/src/glx/x11/dri_glx.c
+@@ -655,6 +655,8 @@ static __GLXDRIscreen *driCreateScreen(__GLXscreenConfigs *psc, int screen,
+     psp->createContext = driCreateContext;
+     psp->createDrawable = driCreateDrawable;
+     psp->swapBuffers = driSwapBuffers;
++    psp->waitX = NULL;
++    psp->waitGL = NULL;
+ 
+     return psp;
+ }
+diff --git a/src/glx/x11/glxclient.h b/src/glx/x11/glxclient.h
+index 16f6074..d37b3ce 100644
+--- a/src/glx/x11/glxclient.h
++++ b/src/glx/x11/glxclient.h
+@@ -139,6 +139,8 @@ struct __GLXDRIscreenRec {
+     void (*swapBuffers)(__GLXDRIdrawable *pdraw);
+     void (*copySubBuffer)(__GLXDRIdrawable *pdraw,
+ 			  int x, int y, int width, int height);
++    void (*waitX)(__GLXDRIdrawable *pdraw);
++    void (*waitGL)(__GLXDRIdrawable *pdraw);
+ };
+ 
+ struct __GLXDRIcontextRec {
+@@ -602,6 +604,7 @@ extern void __glXSendLargeCommand(__GLXcontext *, const GLvoid *, GLint,
+ 				  const GLvoid *, GLint);
+ 
+ /* Initialize the GLX extension for dpy */
++extern __GLXdisplayPrivate * __glXGetPrivateFromDisplay(Display *dpy);
+ extern __GLXdisplayPrivate *__glXInitialize(Display*);
+ 
+ /************************************************************************/
+diff --git a/src/glx/x11/glxcmds.c b/src/glx/x11/glxcmds.c
+index c68b6ac..fc0e593 100644
+--- a/src/glx/x11/glxcmds.c
++++ b/src/glx/x11/glxcmds.c
+@@ -611,11 +611,15 @@ PUBLIC void glXWaitGL(void)
+ 
+ #ifdef GLX_DIRECT_RENDERING
+     if (gc->driContext) {
+-/* This bit of ugliness unwraps the glFinish function */
+-#ifdef glFinish
+-#undef glFinish
+-#endif
+-	glFinish();
++    	int screen;
++    	__GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, gc->currentDrawable, &screen);
++
++    	if ( pdraw != NULL ) {
++	    __GLXscreenConfigs * const psc = GetGLXScreenConfigs(dpy, screen);
++	    glFlush();
++	    if (psc->driScreen->waitGL != NULL)
++	    	(*psc->driScreen->waitGL)(pdraw);
++	}
+ 	return;
+     }
+ #endif
+@@ -647,7 +651,15 @@ PUBLIC void glXWaitX(void)
+ 
+ #ifdef GLX_DIRECT_RENDERING
+     if (gc->driContext) {
+-	XSync(dpy, False);
++    	int screen;
++    	__GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, gc->currentDrawable, &screen);
++
++    	if ( pdraw != NULL ) {
++	    __GLXscreenConfigs * const psc = GetGLXScreenConfigs(dpy, screen);
++	    if (psc->driScreen->waitX != NULL)
++	    	(*psc->driScreen->waitX)(pdraw);
++	} else
++	    XSync(dpy, False);
+ 	return;
+     }
+ #endif
+diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
+index ded0796..f990bde 100644
+--- a/src/mesa/drivers/dri/i965/brw_wm.h
++++ b/src/mesa/drivers/dri/i965/brw_wm.h
+@@ -60,16 +60,16 @@ struct brw_wm_prog_key {
+    GLuint aa_dest_stencil_reg:3;
+    GLuint dest_depth_reg:3;
+    GLuint nr_depth_regs:3;
+-   GLuint projtex_mask:8;
+-   GLuint shadowtex_mask:8;
+    GLuint computes_depth:1;	/* could be derived from program string */
+    GLuint source_depth_to_render_target:1;
+    GLuint flat_shade:1;
+    GLuint runtime_check_aads_emit:1;
+    
+-   GLuint yuvtex_mask:8;
+-   GLuint yuvtex_swap_mask:8;	/* UV swaped */
+-   GLuint pad1:16;
++   GLuint projtex_mask:16;
++   GLuint shadowtex_mask:16;
++   GLuint yuvtex_mask:16;
++   GLuint yuvtex_swap_mask:16;	/* UV swaped */
++   //   GLuint pad1:16;
+ 
+    GLuint program_string_id:32;
+    GLuint origin_x, origin_y;
+diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+index 205a716..fca7b7a 100644
+--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
++++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+@@ -379,14 +379,22 @@ static void pass0_precalc_mov( struct brw_wm_compile *c,
+ {
+    const struct prog_dst_register *dst = &inst->DstReg;
+    GLuint writemask = inst->DstReg.WriteMask;
++   struct brw_wm_ref *refs[4];
+    GLuint i;
+ 
+    /* Get the effect of a MOV by manipulating our register table:
++    * First get all refs, then assign refs.  This ensures that "in-place"
++    * swizzles such as:
++    *   MOV t, t.xxyx
++    * are handled correctly.  Previously, these two steps were done in
++    * one loop and the above case was incorrectly handled.
+     */
+    for (i = 0; i < 4; i++) {
+-      if (writemask & (1<<i)) {	    
+-	 pass0_set_fpreg_ref( c, dst->File, dst->Index, i, 
+-			      get_new_ref(c, inst->SrcReg[0], i, NULL));
++      refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL);
++   }
++   for (i = 0; i < 4; i++) {
++      if (writemask & (1 << i)) {	    
++         pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]);
+       }
+    }
+ }
+diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
+index 048286c..348da34 100644
+--- a/src/mesa/drivers/dri/intel/intel_context.h
++++ b/src/mesa/drivers/dri/intel/intel_context.h
+@@ -168,6 +168,7 @@ struct intel_context
+ 
+       GLint saved_vp_x, saved_vp_y;
+       GLsizei saved_vp_width, saved_vp_height;
++      GLenum saved_matrix_mode;
+    } meta;
+ 
+    GLint refcount;
+diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c
+index cf2f32d..5e32288 100644
+--- a/src/mesa/drivers/dri/intel/intel_pixel.c
++++ b/src/mesa/drivers/dri/intel/intel_pixel.c
+@@ -181,6 +181,7 @@ intel_meta_set_passthrough_transform(struct intel_context *intel)
+    intel->meta.saved_vp_y = ctx->Viewport.Y;
+    intel->meta.saved_vp_width = ctx->Viewport.Width;
+    intel->meta.saved_vp_height = ctx->Viewport.Height;
++   intel->meta.saved_matrix_mode = ctx->Transform.MatrixMode;
+ 
+    _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height);
+ 
+@@ -202,6 +203,8 @@ intel_meta_restore_transform(struct intel_context *intel)
+    _mesa_MatrixMode(GL_MODELVIEW);
+    _mesa_PopMatrix();
+ 
++   _mesa_MatrixMode(intel->meta.saved_matrix_mode);
++
+    _mesa_Viewport(intel->meta.saved_vp_x, intel->meta.saved_vp_y,
+ 		  intel->meta.saved_vp_width, intel->meta.saved_vp_height);
+ }
+diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
+index 0e83afa..bb36649 100644
+--- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c
++++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
+@@ -92,7 +92,7 @@ intel_texture_drawpixels(GLcontext * ctx,
+       return GL_FALSE;
+    }
+ 
+-   /* We don't have a way to generate fragments with stencil values which *
++   /* We don't have a way to generate fragments with stencil values which
+     * will set the resulting stencil value.
+     */
+    if (format == GL_STENCIL_INDEX)
+@@ -225,6 +225,10 @@ intel_stencil_drawpixels(GLcontext * ctx,
+       return GL_FALSE;
+    }
+ 
++   /* We don't support stencil testing/ops here */
++   if (ctx->Stencil.Enabled)
++      return GL_FALSE;
++
+    /* We use FBOs for our wrapping of the depthbuffer into a color
+     * destination.
+     */
+diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
+index 55a73ea..f6bd1eb 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
++++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
+@@ -247,6 +247,9 @@
+ #define PCI_CHIP_RS350_7835		0x7835
+ #define PCI_CHIP_RS690_791E             0x791E
+ #define PCI_CHIP_RS690_791F             0x791F
++#define PCI_CHIP_RS600_793F             0x793F
++#define PCI_CHIP_RS600_7941             0x7941
++#define PCI_CHIP_RS600_7942             0x7942
+ #define PCI_CHIP_RS740_796C             0x796C
+ #define PCI_CHIP_RS740_796D             0x796D
+ #define PCI_CHIP_RS740_796E             0x796E
+@@ -270,6 +273,7 @@ enum {
+    CHIP_FAMILY_R420,
+    CHIP_FAMILY_RV410,
+    CHIP_FAMILY_RS400,
++   CHIP_FAMILY_RS600,
+    CHIP_FAMILY_RS690,
+    CHIP_FAMILY_RS740,
+    CHIP_FAMILY_RV515,
+diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
+index 5f32dd5..81337da 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
++++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
+@@ -680,6 +680,12 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+       screen->chip_family = CHIP_FAMILY_RS400;
+       break;
+ 
++   case PCI_CHIP_RS600_793F:
++   case PCI_CHIP_RS600_7941:
++   case PCI_CHIP_RS600_7942:
++      screen->chip_family = CHIP_FAMILY_RS600;
++      break;
++
+    case PCI_CHIP_RS690_791E:
+    case PCI_CHIP_RS690_791F:
+       screen->chip_family = CHIP_FAMILY_RS690;
+@@ -838,7 +844,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+    ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
+                          &temp);
+    if (ret) {
+-       if (screen->chip_family < CHIP_FAMILY_RS690)
++       if (screen->chip_family < CHIP_FAMILY_RS600)
+ 	   screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
+        else {
+            FREE( screen );
+@@ -849,7 +855,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+        screen->fbLocation = (temp & 0xffff) << 16;
+    }
+ 
+-   if (screen->chip_family >= CHIP_FAMILY_RV515) {
++   if (screen->chip_family >= CHIP_FAMILY_R300) {
+        ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES,
+ 			     &temp);
+        if (ret) {
+diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c
+index ea35852..73fde86 100644
+--- a/src/mesa/drivers/x11/fakeglx.c
++++ b/src/mesa/drivers/x11/fakeglx.c
+@@ -1,8 +1,9 @@
+ /*
+  * Mesa 3-D graphics library
+- * Version:  7.1
++ * Version:  7.5
+  *
+- * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
++ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
++ * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+@@ -1392,6 +1393,25 @@ Fake_glXChooseVisual( Display *dpy, int screen, int *list )
+ }
+ 
+ 
++/**
++ * Init basic fields of a new fake_glx_context.
++ * If the MESA_GLX_FORCE_DIRECT env var is set, the context will be marked as
++ * a direct rendering context.  Some apps won't run without this.
++ */
++static void
++init_glx_context(struct fake_glx_context *glxCtx, Display *dpy)
++{
++   GLboolean direct = _mesa_getenv("MESA_GLX_FORCE_DIRECT") ? GL_TRUE : GL_FALSE;
++   glxCtx->xmesaContext->direct = direct;
++   glxCtx->glxContext.isDirect = direct;
++   glxCtx->glxContext.currentDpy = dpy;
++   glxCtx->glxContext.xid = (XID) glxCtx;  /* self pointer */
++
++   assert((void *) glxCtx == (void *) &(glxCtx->glxContext));
++}
++
++
++
+ static GLXContext
+ Fake_glXCreateContext( Display *dpy, XVisualInfo *visinfo,
+                        GLXContext share_list, Bool direct )
+@@ -1430,12 +1450,7 @@ Fake_glXCreateContext( Display *dpy, XVisualInfo *visinfo,
+       return NULL;
+    }
+ 
+-   glxCtx->xmesaContext->direct = GL_FALSE;
+-   glxCtx->glxContext.isDirect = GL_FALSE;
+-   glxCtx->glxContext.currentDpy = dpy;
+-   glxCtx->glxContext.xid = (XID) glxCtx;  /* self pointer */
+-
+-   assert((void *) glxCtx == (void *) &(glxCtx->glxContext));
++   init_glx_context(glxCtx, dpy);
+ 
+    return (GLXContext) glxCtx;
+ }
+@@ -2441,12 +2456,7 @@ Fake_glXCreateNewContext( Display *dpy, GLXFBConfig config,
+       return NULL;
+    }
+ 
+-   glxCtx->xmesaContext->direct = GL_FALSE;
+-   glxCtx->glxContext.isDirect = GL_FALSE;
+-   glxCtx->glxContext.currentDpy = dpy;
+-   glxCtx->glxContext.xid = (XID) glxCtx;  /* self pointer */
+-
+-   assert((void *) glxCtx == (void *) &(glxCtx->glxContext));
++   init_glx_context(glxCtx, dpy);
+ 
+    return (GLXContext) glxCtx;
+ }
+@@ -2664,12 +2674,7 @@ Fake_glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int re
+       return NULL;
+    }
+ 
+-   glxCtx->xmesaContext->direct = GL_FALSE;
+-   glxCtx->glxContext.isDirect = GL_FALSE;
+-   glxCtx->glxContext.currentDpy = dpy;
+-   glxCtx->glxContext.xid = (XID) glxCtx;  /* self pointer */
+-
+-   assert((void *) glxCtx == (void *) &(glxCtx->glxContext));
++   init_glx_context(glxCtx, dpy);
+ 
+    return (GLXContext) glxCtx;
+ }
+diff --git a/src/mesa/glapi/glthread.h b/src/mesa/glapi/glthread.h
+index 27ccd2e..8594a90 100644
+--- a/src/mesa/glapi/glthread.h
++++ b/src/mesa/glapi/glthread.h
+@@ -225,9 +225,21 @@ typedef xmutex_rec _glthread_Mutex;
+  */
+ #ifdef BEOS_THREADS
+ 
++/* Problem with OS.h and this file on haiku */
++#ifndef __HAIKU__
+ #include <kernel/OS.h>
++#endif
++
+ #include <support/TLS.h>
+ 
++/* The only two typedefs required here
++ * this is cause of the OS.h problem
++ */
++#ifdef __HAIKU__
++typedef int32 thread_id;
++typedef int32 sem_id;
++#endif
++
+ typedef struct {
+    int32        key;
+    int          initMagic;
+diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
+index 825c841..f77d331 100644
+--- a/src/mesa/main/attrib.c
++++ b/src/mesa/main/attrib.c
+@@ -1275,9 +1275,6 @@ adjust_buffer_object_ref_counts(struct gl_array_attrib *array, GLint step)
+       array->ArrayObj->TexCoord[i].BufferObj->RefCount += step;
+    for (i = 0; i < VERT_ATTRIB_MAX; i++)
+       array->ArrayObj->VertexAttrib[i].BufferObj->RefCount += step;
+-
+-   array->ArrayBufferObj->RefCount += step;
+-   array->ElementArrayBufferObj->RefCount += step;
+ }
+ 
+ 
+diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
+index 59fe8e2..df90610 100644
+--- a/src/mesa/main/bufferobj.c
++++ b/src/mesa/main/bufferobj.c
+@@ -794,6 +794,13 @@ _mesa_DeleteBuffersARB(GLsizei n, const GLuint *ids)
+ 
+          ASSERT(bufObj->Name == ids[i]);
+ 
++         if (bufObj->Pointer) {
++            /* if mapped, unmap it now */
++            ctx->Driver.UnmapBuffer(ctx, 0, bufObj);
++            bufObj->Access = GL_READ_WRITE_ARB;
++            bufObj->Pointer = NULL;
++         }
++
+          unbind(ctx, &ctx->Array.ArrayObj->Vertex.BufferObj, bufObj);
+          unbind(ctx, &ctx->Array.ArrayObj->Normal.BufferObj, bufObj);
+          unbind(ctx, &ctx->Array.ArrayObj->Color.BufferObj, bufObj);
+@@ -944,8 +951,10 @@ _mesa_BufferDataARB(GLenum target, GLsizeiptrARB size,
+    }
+    
+    if (bufObj->Pointer) {
+-      _mesa_error(ctx, GL_INVALID_OPERATION, "glBufferDataARB(buffer is mapped)" );
+-      return;
++      /* Unmap the existing buffer.  We'll replace it now.  Not an error. */
++      ctx->Driver.UnmapBuffer(ctx, target, bufObj);
++      bufObj->Access = GL_READ_WRITE_ARB;
++      bufObj->Pointer = NULL;
+    }  
+ 
+    ASSERT(ctx->Driver.BufferData);
+@@ -1061,11 +1070,8 @@ _mesa_UnmapBufferARB(GLenum target)
+       return GL_FALSE;
+    }
+ 
+-   if (ctx->Driver.UnmapBuffer) {
+-      status = ctx->Driver.UnmapBuffer( ctx, target, bufObj );
+-   }
+-
+-   bufObj->Access = GL_READ_WRITE_ARB; /* initial value, OK? */
++   status = ctx->Driver.UnmapBuffer( ctx, target, bufObj );
++   bufObj->Access = GL_READ_WRITE_ARB;
+    bufObj->Pointer = NULL;
+ 
+    return status;
+diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
+index cf1198c..f74576e 100644
+--- a/src/mesa/main/config.h
++++ b/src/mesa/main/config.h
+@@ -191,7 +191,7 @@
+ #define MAX_PROGRAM_CALL_DEPTH 8
+ #define MAX_PROGRAM_TEMPS 128
+ #define MAX_PROGRAM_ADDRESS_REGS 2
+-#define MAX_UNIFORMS 256   /**< number of vec4 uniforms */
++#define MAX_UNIFORMS 1024  /**< number of vec4 uniforms */
+ #define MAX_VARYING 8      /**< number of float[4] vectors */
+ #define MAX_SAMPLERS MAX_TEXTURE_IMAGE_UNITS
+ #define MAX_PROGRAM_INPUTS 32
+diff --git a/src/mesa/main/dlopen.c b/src/mesa/main/dlopen.c
+index becef81..d9d1152 100644
+--- a/src/mesa/main/dlopen.c
++++ b/src/mesa/main/dlopen.c
+@@ -36,6 +36,15 @@
+ #include <dlfcn.h>
+ #endif
+ 
++#if defined(_WIN32)
++#include <windows.h>
++#endif
++
++#if defined(__HAIKU__)
++/* for NULL */
++#include <stdio.h>
++#endif
++
+ 
+ /**
+  * Wrapper for dlopen().
+diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
+index 9522f04..97cfa26 100644
+--- a/src/mesa/main/extensions.c
++++ b/src/mesa/main/extensions.c
+@@ -204,7 +204,7 @@ _mesa_enable_sw_extensions(GLcontext *ctx)
+    ctx->Extensions.ARB_shading_language_100 = GL_TRUE;
+ #endif
+ #if FEATURE_ARB_shading_language_120
+-   ctx->Extensions.ARB_shading_language_120 = GL_FALSE; /* not quite done */
++   ctx->Extensions.ARB_shading_language_120 = GL_TRUE;
+ #endif
+    ctx->Extensions.ARB_shadow = GL_TRUE;
+    ctx->Extensions.ARB_texture_border_clamp = GL_TRUE;
+@@ -427,7 +427,7 @@ _mesa_enable_2_1_extensions(GLcontext *ctx)
+    ctx->Extensions.EXT_texture_sRGB = GL_TRUE;
+ #endif
+ #ifdef FEATURE_ARB_shading_language_120
+-   ctx->Extensions.ARB_shading_language_120 = GL_FALSE; /* not quite done */
++   ctx->Extensions.ARB_shading_language_120 = GL_TRUE;
+ #endif
+ }
+ 
+diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
+index 94bf5de..e1008d7 100644
+--- a/src/mesa/main/getstring.c
++++ b/src/mesa/main/getstring.c
+@@ -84,7 +84,7 @@ compute_version(const GLcontext *ctx)
+                               ctx->Extensions.ARB_texture_non_power_of_two &&
+                               ctx->Extensions.EXT_blend_equation_separate);
+    const GLboolean ver_2_1 = (ver_2_0 &&
+-                              /*ctx->Extensions.ARB_shading_language_120 &&*/
++                              ctx->Extensions.ARB_shading_language_120 &&
+                               ctx->Extensions.EXT_pixel_buffer_object &&
+                               ctx->Extensions.EXT_texture_sRGB);
+    if (ver_2_1)
+diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
+index 13b571d..d71d59c 100644
+--- a/src/mesa/main/imports.h
++++ b/src/mesa/main/imports.h
+@@ -325,7 +325,8 @@ static INLINE int iround(float f)
+ }
+ #define IROUND(x)  iround(x)
+ #elif defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__) && \
+-			(!defined(__BEOS__) || (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)))
++			(!(defined(__BEOS__) || defined(__HAIKU__))  || \
++			(__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)))
+ static INLINE int iround(float f)
+ {
+    int r;
+diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
+index 6fe54c7..5284b7d 100644
+--- a/src/mesa/main/state.c
++++ b/src/mesa/main/state.c
+@@ -173,13 +173,16 @@ update_arrays( GLcontext *ctx )
+ }
+ 
+ 
++/**
++ * Update the following fields:
++ *   ctx->VertexProgram._Enabled
++ *   ctx->FragmentProgram._Enabled
++ *   ctx->ATIFragmentShader._Enabled
++ * This needs to be done before texture state validation.
++ */
+ static void
+-update_program(GLcontext *ctx)
++update_program_enables(GLcontext *ctx)
+ {
+-   const struct gl_shader_program *shProg = ctx->Shader.CurrentProgram;
+-   const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
+-   const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
+-
+    /* These _Enabled flags indicate if the program is enabled AND valid. */
+    ctx->VertexProgram._Enabled = ctx->VertexProgram.Enabled
+       && ctx->VertexProgram.Current->Base.Instructions;
+@@ -187,6 +190,29 @@ update_program(GLcontext *ctx)
+       && ctx->FragmentProgram.Current->Base.Instructions;
+    ctx->ATIFragmentShader._Enabled = ctx->ATIFragmentShader.Enabled
+       && ctx->ATIFragmentShader.Current->Instructions[0];
++}
++
++
++/**
++ * Update vertex/fragment program state.  In particular, update these fields:
++ *   ctx->VertexProgram._Current
++ *   ctx->VertexProgram._TnlProgram,
++ * These point to the highest priority enabled vertex/fragment program or are
++ * NULL if fixed-function processing is to be done.
++ *
++ * This function needs to be called after texture state validation in case
++ * we're generating a fragment program from fixed-function texture state.
++ *
++ * \return bitfield which will indicate _NEW_PROGRAM state if a new vertex
++ * or fragment program is being used.
++ */
++static GLbitfield
++update_program(GLcontext *ctx)
++{
++   const struct gl_shader_program *shProg = ctx->Shader.CurrentProgram;
++   const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
++   const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
++   GLbitfield new_state = 0x0;
+ 
+    /*
+     * Set the ctx->VertexProgram._Current and ctx->FragmentProgram._Current
+@@ -256,15 +282,23 @@ update_program(GLcontext *ctx)
+ 
+    /* Let the driver know what's happening:
+     */
+-   if (ctx->FragmentProgram._Current != prevFP && ctx->Driver.BindProgram) {
+-      ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
+-                              (struct gl_program *) ctx->FragmentProgram._Current);
++   if (ctx->FragmentProgram._Current != prevFP) {
++      new_state |= _NEW_PROGRAM;
++      if (ctx->Driver.BindProgram) {
++         ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
++                          (struct gl_program *) ctx->FragmentProgram._Current);
++      }
+    }
+    
+-   if (ctx->VertexProgram._Current != prevVP && ctx->Driver.BindProgram) {
+-      ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
+-                              (struct gl_program *) ctx->VertexProgram._Current);
++   if (ctx->VertexProgram._Current != prevVP) {
++      new_state |= _NEW_PROGRAM;
++      if (ctx->Driver.BindProgram) {
++         ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
++                            (struct gl_program *) ctx->VertexProgram._Current);
++      }
+    }
++
++   return new_state;
+ }
+ 
+ 
+@@ -425,10 +459,29 @@ _mesa_update_state_locked( GLcontext *ctx )
+ {
+    GLbitfield new_state = ctx->NewState;
+    GLbitfield prog_flags = _NEW_PROGRAM;
++   GLbitfield new_prog_state = 0x0;
+ 
+    if (MESA_VERBOSE & VERBOSE_STATE)
+       _mesa_print_state("_mesa_update_state", new_state);
+ 
++   /* Determine which state flags effect vertex/fragment program state */
++   if (ctx->FragmentProgram._MaintainTexEnvProgram) {
++      prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
++   }
++   if (ctx->VertexProgram._MaintainTnlProgram) {
++      prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
++                     _NEW_TRANSFORM | _NEW_POINT |
++                     _NEW_FOG | _NEW_LIGHT |
++                     _MESA_NEW_NEED_EYE_COORDS);
++   }
++
++   /*
++    * Now update derived state info
++    */
++
++   if (new_state & prog_flags)
++      update_program_enables( ctx );
++
+    if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
+       _mesa_update_modelview_project( ctx, new_state );
+ 
+@@ -488,19 +541,13 @@ _mesa_update_state_locked( GLcontext *ctx )
+    if (new_state & _MESA_NEW_NEED_EYE_COORDS) 
+       _mesa_update_tnl_spaces( ctx, new_state );
+ 
+-   if (ctx->FragmentProgram._MaintainTexEnvProgram) {
+-      prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
+-   }
+-   if (ctx->VertexProgram._MaintainTnlProgram) {
+-      prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
+-                     _NEW_TRANSFORM | _NEW_POINT |
+-                     _NEW_FOG | _NEW_LIGHT |
+-                     _MESA_NEW_NEED_EYE_COORDS);
++   if (new_state & prog_flags) {
++      /* When we generate programs from fixed-function vertex/fragment state
++       * this call may generate/bind a new program.  If so, we need to
++       * propogate the _NEW_PROGRAM flag to the driver.
++       */
++      new_prog_state |= update_program( ctx );
+    }
+-   if (new_state & prog_flags)
+-      update_program( ctx );
+-
+-
+ 
+    /*
+     * Give the driver a chance to act upon the new_state flags.
+@@ -511,7 +558,8 @@ _mesa_update_state_locked( GLcontext *ctx )
+     * Set ctx->NewState to zero to avoid recursion if
+     * Driver.UpdateState() has to call FLUSH_VERTICES().  (fixed?)
+     */
+-   new_state = ctx->NewState;
++ out:
++   new_state = ctx->NewState | new_prog_state;
+    ctx->NewState = 0;
+    ctx->Driver.UpdateState(ctx, new_state);
+    ctx->Array.NewState = 0;
+diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
+index 9e968ba..a437b9c 100644
+--- a/src/mesa/main/teximage.c
++++ b/src/mesa/main/teximage.c
+@@ -2603,7 +2603,7 @@ _mesa_TexImage2D( GLenum target, GLint level, GLint internalFormat,
+                               1, border)) {
+          /* when error, clear all proxy texture image parameters */
+          if (texImage)
+-            clear_teximage_fields(ctx->Texture.ProxyTex[TEXTURE_2D_INDEX]->Image[0][level]);
++            clear_teximage_fields(texImage);
+       }
+       else {
+          /* no error, set the tex image parameters */
+diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
+index 7848f0b..c937dac 100644
+--- a/src/mesa/main/texobj.c
++++ b/src/mesa/main/texobj.c
+@@ -979,11 +979,11 @@ _mesa_BindTexture( GLenum target, GLuint texName )
+          ASSERT(texUnit->CurrentRect);
+          break;
+       case GL_TEXTURE_1D_ARRAY_EXT:
+-         texUnit->Current1DArray = newTexObj;
++         _mesa_reference_texobj(&texUnit->Current1DArray, newTexObj);
+          ASSERT(texUnit->Current1DArray);
+          break;
+       case GL_TEXTURE_2D_ARRAY_EXT:
+-         texUnit->Current2DArray = newTexObj;
++         _mesa_reference_texobj(&texUnit->Current2DArray, newTexObj);
+          ASSERT(texUnit->Current2DArray);
+          break;
+       default:
+diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
+index 9bfb7e0..7b5d904 100644
+--- a/src/mesa/main/texstate.c
++++ b/src/mesa/main/texstate.c
+@@ -498,25 +498,27 @@ update_texture_state( GLcontext *ctx )
+       texUnit->_ReallyEnabled = 0;
+       texUnit->_GenFlags = 0;
+ 
+-      /* Get the bitmask of texture enables.
++      /* Get the bitmask of texture target enables.
+        * enableBits will be a mask of the TEXTURE_*_BIT flags indicating
+        * which texture targets are enabled (fixed function) or referenced
+        * by a fragment shader/program.  When multiple flags are set, we'll
+        * settle on the one with highest priority (see texture_override below).
+        */
+-      if (fprog || vprog) {
+-         enableBits = 0x0;
+-         if (fprog)
+-            enableBits |= fprog->Base.TexturesUsed[unit];
+-         if (vprog)
+-            enableBits |= vprog->Base.TexturesUsed[unit];
++      enableBits = 0x0;
++      if (vprog) {
++         enableBits |= vprog->Base.TexturesUsed[unit];
++      }
++      if (fprog) {
++         enableBits |= fprog->Base.TexturesUsed[unit];
+       }
+       else {
+-         if (!texUnit->Enabled)
+-            continue;
+-         enableBits = texUnit->Enabled;
++         /* fixed-function fragment program */
++         enableBits |= texUnit->Enabled;
+       }
+ 
++      if (enableBits == 0x0)
++         continue;
++
+       ASSERT(texUnit->Current1D);
+       ASSERT(texUnit->Current2D);
+       ASSERT(texUnit->Current3D);
+diff --git a/src/mesa/main/version.h b/src/mesa/main/version.h
+index 3d874c8..84dcb26 100644
+--- a/src/mesa/main/version.h
++++ b/src/mesa/main/version.h
+@@ -1,6 +1,6 @@
+ /*
+  * Mesa 3-D graphics library
+- * Version:  7.3
++ * Version:  7.4
+  *
+  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+  *
+@@ -29,9 +29,9 @@
+ 
+ /* Mesa version */
+ #define MESA_MAJOR 7
+-#define MESA_MINOR 3
++#define MESA_MINOR 4
+ #define MESA_PATCH 0
+-#define MESA_VERSION_STRING "7.3"
++#define MESA_VERSION_STRING "7.4"
+ 
+ /* To make version comparison easy */
+ #define MESA_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h
+index 268afc5..0ef0251 100644
+--- a/src/mesa/shader/prog_instruction.h
++++ b/src/mesa/shader/prog_instruction.h
+@@ -240,12 +240,21 @@ typedef enum prog_opcode {
+ 
+ 
+ /**
++ * Number of bits for the src/dst register Index field.
++ * This limits the size of temp/uniform register files.
++ */
++#define INST_INDEX_BITS 10
++
++
++/**
+  * Instruction source register.
+  */
+ struct prog_src_register
+ {
+    GLuint File:4;	/**< One of the PROGRAM_* register file values. */
+-   GLint Index:9;	/**< May be negative for relative addressing. */
++   GLint Index:(INST_INDEX_BITS+1); /**< Extra bit here for sign bit.
++                                     * May be negative for relative addressing.
++                                     */
+    GLuint Swizzle:12;
+    GLuint RelAddr:1;
+ 
+@@ -289,7 +298,7 @@ struct prog_src_register
+ struct prog_dst_register
+ {
+    GLuint File:4;      /**< One of the PROGRAM_* register file values */
+-   GLuint Index:8;
++   GLuint Index:INST_INDEX_BITS;  /**< Unsigned, never negative */
+    GLuint WriteMask:4;
+    GLuint RelAddr:1;
+ 
+@@ -322,8 +331,7 @@ struct prog_dst_register
+     */
+    GLuint CondSrc:1;
+    /*@}*/
+-
+-   GLuint pad:30;
++   GLuint pad:28;
+ };
+ 
+ 
+diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c
+index 0ec13a4..b67a87f 100644
+--- a/src/mesa/shader/prog_print.c
++++ b/src/mesa/shader/prog_print.c
+@@ -85,6 +85,9 @@ file_string(enum register_file f, gl_prog_print_mode mode)
+ static const char *
+ arb_input_attrib_string(GLint index, GLenum progType)
+ {
++   /*
++    * These strings should match the VERT_ATTRIB_x and FRAG_ATTRIB_x tokens.
++    */
+    const char *vertAttribs[] = {
+       "vertex.position",
+       "vertex.weight",
+@@ -159,6 +162,9 @@ arb_input_attrib_string(GLint index, GLenum progType)
+ static const char *
+ arb_output_attrib_string(GLint index, GLenum progType)
+ {
++   /*
++    * These strings should match the VERT_RESULT_x and FRAG_RESULT_x tokens.
++    */
+    const char *vertResults[] = {
+       "result.position",
+       "result.color.primary",
+@@ -183,7 +189,12 @@ arb_output_attrib_string(GLint index, GLenum progType)
+    };
+    const char *fragResults[] = {
+       "result.color",
+-      "result.depth"
++      "result.color(half)",
++      "result.depth",
++      "result.color[0]",
++      "result.color[1]",
++      "result.color[2]",
++      "result.color[3]"
+    };
+ 
+    if (progType == GL_VERTEX_PROGRAM_ARB) {
+diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c
+index d114828..828db38 100644
+--- a/src/mesa/shader/program.c
++++ b/src/mesa/shader/program.c
+@@ -53,6 +53,15 @@ _mesa_init_program(GLcontext *ctx)
+ {
+    GLuint i;
+ 
++   /*
++    * If this assertion fails, we need to increase the field
++    * size for register indexes.
++    */
++   ASSERT(ctx->Const.VertexProgram.MaxUniformComponents / 4
++          <= (1 << INST_INDEX_BITS));
++   ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents / 4
++          <= (1 << INST_INDEX_BITS));
++
+    ctx->Program.ErrorPos = -1;
+    ctx->Program.ErrorString = _mesa_strdup("");
+ 
+diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c
+index b3d66c5..28b668d 100644
+--- a/src/mesa/shader/shader_api.c
++++ b/src/mesa/shader/shader_api.c
+@@ -1,8 +1,9 @@
+ /*
+  * Mesa 3-D graphics library
+- * Version:  7.2
++ * Version:  7.5
+  *
+  * Copyright (C) 2004-2008  Brian Paul   All Rights Reserved.
++ * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+@@ -801,6 +802,27 @@ is_integer_type(GLenum type)
+ }
+ 
+ 
++static GLboolean
++is_sampler_type(GLenum type)
++{
++   switch (type) {
++   case GL_SAMPLER_1D:
++   case GL_SAMPLER_2D:
++   case GL_SAMPLER_3D:
++   case GL_SAMPLER_CUBE:
++   case GL_SAMPLER_1D_SHADOW:
++   case GL_SAMPLER_2D_SHADOW:
++   case GL_SAMPLER_2D_RECT_ARB:
++   case GL_SAMPLER_2D_RECT_SHADOW_ARB:
++   case GL_SAMPLER_1D_ARRAY_EXT:
++   case GL_SAMPLER_2D_ARRAY_EXT:
++      return GL_TRUE;
++   default:
++      return GL_FALSE;
++   }
++}
++
++
+ static void
+ _mesa_get_active_attrib(GLcontext *ctx, GLuint program, GLuint index,
+                         GLsizei maxLength, GLsizei *length, GLint *size,
+@@ -866,6 +888,7 @@ _mesa_get_active_uniform(GLcontext *ctx, GLuint program, GLuint index,
+ {
+    const struct gl_shader_program *shProg;
+    const struct gl_program *prog;
++   const struct gl_program_parameter *param;
+    GLint progPos;
+ 
+    shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform");
+@@ -891,14 +914,30 @@ _mesa_get_active_uniform(GLcontext *ctx, GLuint program, GLuint index,
+    if (!prog || progPos < 0)
+       return; /* should never happen */
+ 
+-   if (nameOut)
+-      copy_string(nameOut, maxLength, length,
+-                  prog->Parameters->Parameters[progPos].Name);
+-   if (size)
+-      *size = prog->Parameters->Parameters[progPos].Size
+-         / sizeof_glsl_type(prog->Parameters->Parameters[progPos].DataType);
+-   if (type)
+-      *type = prog->Parameters->Parameters[progPos].DataType;
++   ASSERT(progPos < prog->Parameters->NumParameters);
++   param = &prog->Parameters->Parameters[progPos];
++
++   if (nameOut) {
++      copy_string(nameOut, maxLength, length, param->Name);
++   }
++
++   if (size) {
++      GLint typeSize = sizeof_glsl_type(param->DataType);
++      if (param->Size > typeSize) {
++         /* This is an array.
++          * Array elements are placed on vector[4] boundaries so they're
++          * a multiple of four floats.  We round typeSize up to next multiple
++          * of four to get the right size below.
++          */
++         typeSize = (typeSize + 3) & ~3;
++      }
++      /* Note that the returned size is in units of the <type>, not bytes */
++      *size = param->Size / typeSize;
++   }
++
++   if (type) {
++      *type = param->DataType;
++   }
+ }
+ 
+ 
+@@ -1135,24 +1174,30 @@ get_uniform_rows_cols(const struct gl_program_parameter *p,
+ }
+ 
+ 
+-#define MAX_UNIFORM_ELEMENTS 16
+-
+ /**
+- * Helper for GetUniformfv(), GetUniformiv()
+- * Returns number of elements written to 'params' output.
++ * Helper for get_uniform[fi]v() functions.
++ * Given a shader program name and uniform location, return a pointer
++ * to the shader program and return the program parameter position.
+  */
+-static GLuint
+-get_uniformfv(GLcontext *ctx, GLuint program, GLint location,
+-              GLfloat *params)
++static void
++lookup_uniform_parameter(GLcontext *ctx, GLuint program, GLint location,
++                         struct gl_program **progOut, GLint *paramPosOut)
+ {
+    struct gl_shader_program *shProg
+       = _mesa_lookup_shader_program_err(ctx, program, "glGetUniform[if]v");
+-   if (shProg) {
+-      if (shProg->Uniforms &&
+-          location >= 0 && location < (GLint) shProg->Uniforms->NumUniforms) {
+-         GLint progPos;
+-         const struct gl_program *prog = NULL;
++   struct gl_program *prog = NULL;
++   GLint progPos = -1;
++
++   /* if shProg is NULL, we'll have already recorded an error */
+ 
++   if (shProg) {
++      if (!shProg->Uniforms ||
++          location < 0 ||
++          location >= (GLint) shProg->Uniforms->NumUniforms) {
++         _mesa_error(ctx, GL_INVALID_OPERATION,  "glGetUniformfv(location)");
++      }
++      else {
++         /* OK, find the gl_program and program parameter location */
+          progPos = shProg->Uniforms->Uniforms[location].VertPos;
+          if (progPos >= 0) {
+             prog = &shProg->VertexProgram->Base;
+@@ -1163,33 +1208,11 @@ get_uniformfv(GLcontext *ctx, GLuint program, GLint location,
+                prog = &shProg->FragmentProgram->Base;
+             }
+          }
+-
+-         ASSERT(prog);
+-         if (prog) {
+-            const struct gl_program_parameter *p =
+-               &prog->Parameters->Parameters[progPos];
+-            GLint rows, cols, i, j, k;
+-
+-            /* See uniformiv() below */                    
+-            assert(p->Size <= MAX_UNIFORM_ELEMENTS);
+-
+-            get_uniform_rows_cols(p, &rows, &cols);
+-
+-            k = 0;
+-            for (i = 0; i < rows; i++) {
+-               for (j = 0; j < cols; j++ ) {
+-                  params[k++] = prog->Parameters->ParameterValues[progPos+i][j];
+-               }
+-            }
+-
+-            return p->Size;
+-         }
+-      }
+-      else {
+-         _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformfv(location)");
+       }
+    }
+-   return 0;
++
++   *progOut = prog;
++   *paramPosOut = progPos;
+ }
+ 
+ 
+@@ -1200,23 +1223,54 @@ static void
+ _mesa_get_uniformfv(GLcontext *ctx, GLuint program, GLint location,
+                     GLfloat *params)
+ {
+-   (void) get_uniformfv(ctx, program, location, params);
++   struct gl_program *prog;
++   GLint paramPos;
++
++   lookup_uniform_parameter(ctx, program, location, &prog, &paramPos);
++
++   if (prog) {
++      const struct gl_program_parameter *p =
++         &prog->Parameters->Parameters[paramPos];
++      GLint rows, cols, i, j, k;
++
++      get_uniform_rows_cols(p, &rows, &cols);
++
++      k = 0;
++      for (i = 0; i < rows; i++) {
++         for (j = 0; j < cols; j++ ) {
++            params[k++] = prog->Parameters->ParameterValues[paramPos+i][j];
++         }
++      }
++   }
+ }
+ 
+ 
+ /**
+  * Called via ctx->Driver.GetUniformiv().
++ * \sa _mesa_get_uniformfv, only difference is a cast.
+  */
+ static void
+ _mesa_get_uniformiv(GLcontext *ctx, GLuint program, GLint location,
+                     GLint *params)
+ {
+-   GLfloat fparams[MAX_UNIFORM_ELEMENTS];
+-   GLuint n = get_uniformfv(ctx, program, location, fparams);
+-   GLuint i;
+-   assert(n <= MAX_UNIFORM_ELEMENTS);
+-   for (i = 0; i < n; i++) {
+-      params[i] = (GLint) fparams[i];
++   struct gl_program *prog;
++   GLint paramPos;
++
++   lookup_uniform_parameter(ctx, program, location, &prog, &paramPos);
++
++   if (prog) {
++      const struct gl_program_parameter *p =
++         &prog->Parameters->Parameters[paramPos];
++      GLint rows, cols, i, j, k;
++
++      get_uniform_rows_cols(p, &rows, &cols);
++
++      k = 0;
++      for (i = 0; i < rows; i++) {
++         for (j = 0; j < cols; j++ ) {
++            params[k++] = (GLint) prog->Parameters->ParameterValues[paramPos+i][j];
++         }
++      }
+    }
+ }
+ 
+@@ -1401,7 +1455,8 @@ _mesa_use_program(GLcontext *ctx, GLuint program)
+          return;
+       }
+       if (!shProg->LinkStatus) {
+-         _mesa_error(ctx, GL_INVALID_OPERATION, "glUseProgram");
++         _mesa_error(ctx, GL_INVALID_OPERATION,
++                     "glUseProgram(program %u not linked)", program);
+          return;
+       }
+    }
+@@ -1447,27 +1502,6 @@ _mesa_update_shader_textures_used(struct gl_program *prog)
+ }
+ 
+ 
+-static GLboolean
+-is_sampler_type(GLenum type)
+-{
+-   switch (type) {
+-   case GL_SAMPLER_1D:
+-   case GL_SAMPLER_2D:
+-   case GL_SAMPLER_3D:
+-   case GL_SAMPLER_CUBE:
+-   case GL_SAMPLER_1D_SHADOW:
+-   case GL_SAMPLER_2D_SHADOW:
+-   case GL_SAMPLER_2D_RECT_ARB:
+-   case GL_SAMPLER_2D_RECT_SHADOW_ARB:
+-   case GL_SAMPLER_1D_ARRAY_EXT:
+-   case GL_SAMPLER_2D_ARRAY_EXT:
+-      return GL_TRUE;
+-   default:
+-      return GL_FALSE;
+-   }
+-}
+-
+-
+ /**
+  * Check if the type given by userType is allowed to set a uniform of the
+  * target type.  Generally, equivalence is required, but setting Boolean
+@@ -1506,10 +1540,10 @@ compatible_types(GLenum userType, GLenum targetType)
+  * \param program  the program whose uniform to update
+  * \param index  the index of the program parameter for the uniform
+  * \param offset  additional parameter slot offset (for arrays)
+- * \param type  the datatype of the uniform
++ * \param type  the incoming datatype of 'values'
+  * \param count  the number of uniforms to set
+- * \param elems  number of elements per uniform
+- * \param values  the new values
++ * \param elems  number of elements per uniform (1, 2, 3 or 4)
++ * \param values  the new values, of datatype 'type'
+  */
+ static void
+ set_program_uniform(GLcontext *ctx, struct gl_program *program,
+@@ -1519,8 +1553,12 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program,
+ {
+    struct gl_program_parameter *param =
+       &program->Parameters->Parameters[index];
++   const GLboolean isUniformBool = is_boolean_type(param->DataType);
++   const GLboolean areIntValues = is_integer_type(type);
+ 
+    assert(offset >= 0);
++   assert(elems >= 1);
++   assert(elems <= 4);
+ 
+    if (!compatible_types(type, param->DataType)) {
+       _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(type mismatch)");
+@@ -1535,27 +1573,36 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program,
+    if (param->Type == PROGRAM_SAMPLER) {
+       /* This controls which texture unit which is used by a sampler */
+       GLuint texUnit, sampler;
++      GLint i;
+ 
+       /* data type for setting samplers must be int */
+-      if (type != GL_INT || count != 1) {
++      if (type != GL_INT) {
+          _mesa_error(ctx, GL_INVALID_OPERATION,
+                      "glUniform(only glUniform1i can be used "
+                      "to set sampler uniforms)");
+          return;
+       }
+ 
+-      sampler = (GLuint) program->Parameters->ParameterValues[index][0];
+-      texUnit = ((GLuint *) values)[0];
++      /* XXX arrays of samplers haven't been tested much, but it's not a
++       * common thing...
++       */
++      for (i = 0; i < count; i++) {
++         sampler = (GLuint) program->Parameters->ParameterValues[index + i][0];
++         texUnit = ((GLuint *) values)[i];
++
++         /* check that the sampler (tex unit index) is legal */
++         if (texUnit >= ctx->Const.MaxTextureImageUnits) {
++            _mesa_error(ctx, GL_INVALID_VALUE,
++                        "glUniform1(invalid sampler/tex unit index)");
++            return;
++         }
+ 
+-      /* check that the sampler (tex unit index) is legal */
+-      if (texUnit >= ctx->Const.MaxTextureImageUnits) {
+-         _mesa_error(ctx, GL_INVALID_VALUE,
+-                     "glUniform1(invalid sampler/tex unit index)");
+-         return;
++         /* This maps a sampler to a texture unit: */
++         if (sampler < MAX_SAMPLERS) {
++            program->SamplerUnits[sampler] = texUnit;
++         }
+       }
+ 
+-      /* This maps a sampler to a texture unit: */
+-      program->SamplerUnits[sampler] = texUnit;
+       _mesa_update_shader_textures_used(program);
+ 
+       FLUSH_VERTICES(ctx, _NEW_TEXTURE);
+@@ -1563,20 +1610,36 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program,
+    else {
+       /* ordinary uniform variable */
+       GLsizei k, i;
+-      GLint slots = (param->Size + 3) / 4;
++      const GLint slots = (param->Size + 3) / 4;
++      const GLint typeSize = sizeof_glsl_type(param->DataType);
+ 
+-      if (count * elems > (GLint) param->Size) {
+-         _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(count too large)");
+-         return;
++      if (param->Size > typeSize) {
++         /* an array */
++         /* we'll ignore extra data below */
++      }
++      else {
++         /* non-array: count must be one */
++         if (count != 1) {
++            _mesa_error(ctx, GL_INVALID_OPERATION,
++                        "glUniform(uniform is not an array)");
++            return;
++         }
+       }
+ 
+-      if (count > slots)
+-         count = slots;
+-
++      /* loop over number of array elements */
+       for (k = 0; k < count; k++) {
+-         GLfloat *uniformVal =
+-            program->Parameters->ParameterValues[index + offset + k];
+-         if (is_integer_type(type)) {
++         GLfloat *uniformVal;
++
++         if (offset + k >= slots) {
++            /* Extra array data is ignored */
++            break;
++         }
++
++         /* uniformVal (the destination) is always float[4] */
++         uniformVal = program->Parameters->ParameterValues[index + offset + k];
++
++         if (areIntValues) {
++            /* convert user's ints to floats */
+             const GLint *iValues = ((const GLint *) values) + k * elems;
+             for (i = 0; i < elems; i++) {
+                uniformVal[i] = (GLfloat) iValues[i];
+@@ -1590,7 +1653,7 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program,
+          }
+ 
+          /* if the uniform is bool-valued, convert to 1.0 or 0.0 */
+-         if (is_boolean_type(param->DataType)) {
++         if (isUniformBool) {
+             for (i = 0; i < elems; i++) {
+                uniformVal[i] = uniformVal[i] ? 1.0 : 0.0;
+             }
+@@ -1619,6 +1682,11 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count,
+    if (location == -1)
+       return;   /* The standard specifies this as a no-op */
+ 
++   if (location < -1) {
++      _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(location)");
++      return;
++   }
++
+    split_location_offset(&location, &offset);
+ 
+    if (location < 0 || location >= (GLint) shProg->Uniforms->NumUniforms) {
+@@ -1758,6 +1826,11 @@ _mesa_uniform_matrix(GLcontext *ctx, GLint cols, GLint rows,
+    if (location == -1)
+       return;   /* The standard specifies this as a no-op */
+ 
++   if (location < -1) {
++      _mesa_error(ctx, GL_INVALID_OPERATION, "glUniformMatrix(location)");
++      return;
++   }
++
+    split_location_offset(&location, &offset);
+ 
+    if (location < 0 || location >= (GLint) shProg->Uniforms->NumUniforms) {
+diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c
+index 11340d2..cfdb868 100644
+--- a/src/mesa/shader/slang/slang_codegen.c
++++ b/src/mesa/shader/slang/slang_codegen.c
+@@ -3662,7 +3662,7 @@ _slang_gen_assignment(slang_assemble_ctx * A, slang_operation *oper)
+       if (lhs && rhs) {
+          /* convert lhs swizzle into writemask */
+          const GLuint swizzle = root_swizzle(lhs->Store);
+-         GLuint writemask, newSwizzle;
++         GLuint writemask, newSwizzle = 0x0;
+          if (!swizzle_to_writemask(A, swizzle, &writemask, &newSwizzle)) {
+             /* Non-simple writemask, need to swizzle right hand side in
+              * order to put components into the right place.
+diff --git a/src/mesa/shader/slang/slang_compile.c b/src/mesa/shader/slang/slang_compile.c
+index 818b90b..26a0598 100644
+--- a/src/mesa/shader/slang/slang_compile.c
++++ b/src/mesa/shader/slang/slang_compile.c
+@@ -1450,7 +1450,7 @@ parse_expression(slang_parse_ctx * C, slang_output_ctx * O,
+       case OP_CALL:
+          {
+             GLboolean array_constructor = GL_FALSE;
+-            GLint array_constructor_size;
++            GLint array_constructor_size = 0;
+ 
+             op->type = SLANG_OPER_CALL;
+             op->a_id = parse_identifier(C);
+diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c
+index ea446fa..80ccc79 100644
+--- a/src/mesa/shader/slang/slang_emit.c
++++ b/src/mesa/shader/slang/slang_emit.c
+@@ -164,7 +164,7 @@ _slang_var_swizzle(GLint size, GLint comp)
+ {
+    switch (size) {
+    case 1:
+-      return MAKE_SWIZZLE4(comp, comp, comp, comp);
++      return MAKE_SWIZZLE4(comp, SWIZZLE_NIL, SWIZZLE_NIL, SWIZZLE_NIL);
+    case 2:
+       return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_NIL, SWIZZLE_NIL);
+    case 3:
+@@ -451,7 +451,7 @@ emit_arl_load(slang_emit_info *emitInfo,
+    struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ARL);
+    inst->SrcReg[0].File = file;
+    inst->SrcReg[0].Index = index;
+-   inst->SrcReg[0].Swizzle = swizzle;
++   inst->SrcReg[0].Swizzle = fix_swizzle(swizzle);
+    inst->DstReg.File = PROGRAM_ADDRESS;
+    inst->DstReg.Index = 0;
+    inst->DstReg.WriteMask = WRITEMASK_X;
+@@ -873,6 +873,7 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
+ 
+    if (n->Children[0]->Store->Size != n->Children[1]->Store->Size) {
+       slang_info_log_error(emitInfo->log, "invalid operands to == or !=");
++      n->Store = NULL;
+       return NULL;
+    }
+ 
+@@ -902,6 +903,7 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
+       slang_ir_storage tempStore;
+ 
+       if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
++         n->Store = NULL;
+          return NULL;
+          /* out of temps */
+       }
+@@ -1358,6 +1360,7 @@ emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
+ 
+ #if PEEPHOLE_OPTIMIZATIONS
+    if (inst &&
++       (n->Children[1]->Opcode != IR_SWIZZLE) &&
+        _slang_is_temp(emitInfo->vt, n->Children[1]->Store) &&
+        (inst->DstReg.File == n->Children[1]->Store->File) &&
+        (inst->DstReg.Index == n->Children[1]->Store->Index) &&
+@@ -1374,13 +1377,9 @@ emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
+        * becomes:
+        *   MUL a, x, y;
+        */
+-      if (n->Children[1]->Opcode != IR_SWIZZLE)
+-         _slang_free_temp(emitInfo->vt, n->Children[1]->Store);
+-      *n->Children[1]->Store = *n->Children[0]->Store;
+ 
+       /* fixup the previous instruction (which stored the RHS result) */
+       assert(n->Children[0]->Store->Index >= 0);
+-
+       storage_to_dst_reg(&inst->DstReg, n->Children[0]->Store);
+       return inst;
+    }
+@@ -1813,6 +1812,25 @@ emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n)
+ }
+ 
+ 
++/**
++ * Return the size of a swizzle mask given that some swizzle components
++ * may be NIL/undefined.  For example:
++ *  swizzle_size(".zzxx") = 4
++ *  swizzle_size(".xy??") = 2
++ *  swizzle_size(".w???") = 1
++ */
++static GLuint
++swizzle_size(GLuint swizzle)
++{
++   GLuint i;
++   for (i = 0; i < 4; i++) {
++      if (GET_SWZ(swizzle, i) == SWIZZLE_NIL)
++         return i;
++   }
++   return 4;
++}
++
++
+ static struct prog_instruction *
+ emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
+ {
+@@ -1820,14 +1838,25 @@ emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
+ 
+    inst = emit(emitInfo, n->Children[0]);
+ 
+-#if 0
+-   assert(n->Store->Parent);
+-   /* Apply this node's swizzle to parent's storage */
+-   GLuint swizzle = n->Store->Swizzle;
+-   _slang_copy_ir_storage(n->Store, n->Store->Parent);
+-   n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
++   if (!n->Store->Parent) {
++      /* this covers a case such as "(b ? p : q).x" */
++      n->Store->Parent = n->Children[0]->Store;
++      assert(n->Store->Parent);
++   }
++
++   {
++      const GLuint swizzle = n->Store->Swizzle;
++      /* new storage is parent storage with updated Swizzle + Size fields */
++      _slang_copy_ir_storage(n->Store, n->Store->Parent);
++      /* Apply this node's swizzle to parent's storage */
++      n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
++      /* Update size */
++      n->Store->Size = swizzle_size(n->Store->Swizzle);
++   }
++
+    assert(!n->Store->Parent);
+-#endif
++   assert(n->Store->Index >= 0);
++
+    return inst;
+ }
+ 
+@@ -2120,6 +2149,10 @@ emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n)
+       /* mark var as used */
+       _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name);
+    }
++   else if (n->Store->File == PROGRAM_INPUT) {
++      assert(n->Store->Index >= 0);
++      emitInfo->prog->InputsRead |= (1 << n->Store->Index);
++   }
+ 
+    if (n->Store->Index < 0) {
+       /* probably ran out of registers */
+@@ -2424,7 +2457,9 @@ _slang_emit_code(slang_ir_node *n, slang_var_table *vt,
+       maxUniforms = ctx->Const.VertexProgram.MaxUniformComponents / 4;
+    }
+    if (prog->Parameters->NumParameters > maxUniforms) {
+-      slang_info_log_error(log, "Constant/uniform register limit exceeded");
++      slang_info_log_error(log, "Constant/uniform register limit exceeded "
++                           "(max=%u vec4)", maxUniforms);
++
+       return GL_FALSE;
+    }
+ 
+diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c
+index c6d5cc0..b8427ca 100644
+--- a/src/mesa/shader/slang/slang_link.c
++++ b/src/mesa/shader/slang/slang_link.c
+@@ -318,7 +318,7 @@ _slang_resolve_attributes(struct gl_shader_program *shProg,
+ {
+    GLint attribMap[MAX_VERTEX_ATTRIBS];
+    GLuint i, j;
+-   GLbitfield usedAttributes;
++   GLbitfield usedAttributes; /* generics only, not legacy attributes */
+ 
+    assert(origProg != linkedProg);
+    assert(origProg->Target == GL_VERTEX_PROGRAM_ARB);
+@@ -342,6 +342,15 @@ _slang_resolve_attributes(struct gl_shader_program *shProg,
+       usedAttributes |= (1 << attr);
+    }
+ 
++   /* If gl_Vertex is used, that actually counts against the limit
++    * on generic vertex attributes.  This avoids the ambiguity of
++    * whether glVertexAttrib4fv(0, v) sets legacy attribute 0 (vert pos)
++    * or generic attribute[0].  If gl_Vertex is used, we want the former.
++    */
++   if (origProg->InputsRead & VERT_BIT_POS) {
++      usedAttributes |= 0x1;
++   }
++
+    /* initialize the generic attribute map entries to -1 */
+    for (i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
+       attribMap[i] = -1;
+@@ -384,7 +393,7 @@ _slang_resolve_attributes(struct gl_shader_program *shProg,
+                    * Start at 1 since generic attribute 0 always aliases
+                    * glVertex/position.
+                    */
+-                  for (attr = 1; attr < MAX_VERTEX_ATTRIBS; attr++) {
++                  for (attr = 0; attr < MAX_VERTEX_ATTRIBS; attr++) {
+                      if (((1 << attr) & usedAttributes) == 0)
+                         break;
+                   }
+@@ -486,8 +495,33 @@ _slang_update_inputs_outputs(struct gl_program *prog)
+             maxAddrReg = MAX2(maxAddrReg, (GLuint) (inst->SrcReg[j].Index + 1));
+          }
+       }
++
+       if (inst->DstReg.File == PROGRAM_OUTPUT) {
+          prog->OutputsWritten |= 1 << inst->DstReg.Index;
++         if (inst->DstReg.RelAddr) {
++            /* If the output attribute is indexed with relative addressing
++             * we know that it must be a varying or texcoord such as
++             * gl_TexCoord[i] = v;  In this case, mark all the texcoords
++             * or varying outputs as being written.  It's not an error if
++             * a vertex shader writes varying vars that aren't used by the
++             * fragment shader.  But it is an error for a fragment shader
++             * to use varyings that are not written by the vertex shader.
++             */
++            if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
++               if (inst->DstReg.Index == VERT_RESULT_TEX0) {
++                  /* mark all texcoord outputs as written */
++                  const GLbitfield mask =
++                     ((1 << MAX_TEXTURE_COORD_UNITS) - 1) << VERT_RESULT_TEX0;
++                  prog->OutputsWritten |= mask;
++               }
++               else if (inst->DstReg.Index == VERT_RESULT_VAR0) {
++                  /* mark all generic varying outputs as written */
++                  const GLbitfield mask =
++                     ((1 << MAX_VARYING) - 1) << VERT_RESULT_VAR0;
++                  prog->OutputsWritten |= mask;
++               }
++            }
++         }
+       }
+       else if (inst->DstReg.File == PROGRAM_ADDRESS) {
+          maxAddrReg = MAX2(maxAddrReg, inst->DstReg.Index + 1);
+diff --git a/src/mesa/shader/slang/slang_vartable.c b/src/mesa/shader/slang/slang_vartable.c
+index de0c939..a4ebacc 100644
+--- a/src/mesa/shader/slang/slang_vartable.c
++++ b/src/mesa/shader/slang/slang_vartable.c
+@@ -4,6 +4,7 @@
+ #include "shader/prog_print.h"
+ #include "slang_compile.h"
+ #include "slang_compile_variable.h"
++#include "slang_emit.h"
+ #include "slang_mem.h"
+ #include "slang_vartable.h"
+ #include "slang_ir.h"
+@@ -72,9 +73,8 @@ _slang_delete_var_table(slang_var_table *vt)
+ 
+ 
+ /**
+- * Create new table, put at head, return ptr to it.
+- * XXX we should take a maxTemps parameter to indicate how many temporaries
+- * are available for the current shader/program target.
++ * Create new table on top of vartable stack.
++ * Used when we enter a {} block.
+  */
+ void
+ _slang_push_var_table(slang_var_table *vt)
+@@ -95,7 +95,8 @@ _slang_push_var_table(slang_var_table *vt)
+ 
+ 
+ /**
+- * Destroy given table, return ptr to Parent
++ * Pop top entry from variable table.
++ * Used when we leave a {} block.
+  */
+ void
+ _slang_pop_var_table(slang_var_table *vt)
+@@ -125,10 +126,12 @@ _slang_pop_var_table(slang_var_table *vt)
+       else
+          comp = 0;
+ 
+-      assert(store->Index >= 0);
+-      for (j = 0; j < store->Size; j++) {
+-         assert(t->Temps[store->Index * 4 + j + comp] == VAR);
+-         t->Temps[store->Index * 4 + j + comp] = FREE;
++      /* store->Index may be -1 if we run out of registers */
++      if (store->Index >= 0) {
++         for (j = 0; j < store->Size; j++) {
++            assert(t->Temps[store->Index * 4 + j + comp] == VAR);
++            t->Temps[store->Index * 4 + j + comp] = FREE;
++         }
+       }
+       store->Index = -1;
+    }
+@@ -156,7 +159,7 @@ _slang_pop_var_table(slang_var_table *vt)
+ 
+ 
+ /**
+- * Add a new variable to the given symbol table.
++ * Add a new variable to the given var/symbol table.
+  */
+ void
+ _slang_add_variable(slang_var_table *vt, slang_variable *v)
+@@ -214,6 +217,7 @@ alloc_reg(slang_var_table *vt, GLint size, GLboolean isTemp)
+    for (i = 0; i <= vt->MaxRegisters * 4 - size; i += step) {
+       GLuint found = 0;
+       for (j = 0; j < (GLuint) size; j++) {
++         assert(i + j < 4 * MAX_PROGRAM_TEMPS);
+          if (i + j < vt->MaxRegisters * 4 && t->Temps[i + j] == FREE) {
+             found++;
+          }
+@@ -225,13 +229,17 @@ alloc_reg(slang_var_table *vt, GLint size, GLboolean isTemp)
+          /* found block of size free regs */
+          if (size > 1)
+             assert(i % 4 == 0);
+-         for (j = 0; j < (GLuint) size; j++)
++         for (j = 0; j < (GLuint) size; j++) {
++            assert(i + j < 4 * MAX_PROGRAM_TEMPS);
+             t->Temps[i + j] = isTemp ? TEMP : VAR;
++         }
+          assert(i < MAX_PROGRAM_TEMPS * 4);
+          t->ValSize[i] = size;
+          return i;
+       }
+    }
++
++   /* if we get here, we ran out of registers */
+    return -1;
+ }
+ 
+@@ -259,21 +267,7 @@ _slang_alloc_var(slang_var_table *vt, slang_ir_storage *store)
+       return GL_FALSE;
+ 
+    store->Index = i / 4;
+-   if (store->Size == 1) {
+-      const GLuint comp = i % 4;
+-      store->Swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+-   }
+-   else if (store->Size == 2) {
+-      store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+-                                     SWIZZLE_NIL, SWIZZLE_NIL);
+-   }
+-   else if (store->Size == 3) {
+-      store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+-                                     SWIZZLE_Z, SWIZZLE_NIL);
+-   }
+-   else {
+-      store->Swizzle = SWIZZLE_NOOP;
+-   }
++   store->Swizzle = _slang_var_swizzle(store->Size, i % 4);
+ 
+    if (dbg)
+       printf("Alloc var storage sz %d at %d.%s (level %d) store %p\n",
+@@ -301,20 +295,7 @@ _slang_alloc_temp(slang_var_table *vt, slang_ir_storage *store)
+    assert(store->Index < 0);
+ 
+    store->Index = i / 4;
+-   if (store->Size == 1) {
+-      const GLuint comp = i % 4;
+-      store->Swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+-   }
+-   else {
+-      /* XXX improve swizzled for size=2/3, use for writemask... */
+-#if 1
+-      if (store->Size == 2) {
+-         store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+-                                        SWIZZLE_NIL, SWIZZLE_NIL);
+-      }
+-#endif
+-      store->Swizzle = SWIZZLE_NOOP;
+-   }
++   store->Swizzle = _slang_var_swizzle(store->Size, i % 4);
+ 
+    if (dbg) printf("Alloc temp sz %d at %d.%s (level %d) store %p\n",
+                    store->Size, store->Index,
+diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c
+index 525cf9d..c0bda32 100644
+--- a/src/mesa/swrast/s_fragprog.c
++++ b/src/mesa/swrast/s_fragprog.c
+@@ -40,20 +40,27 @@ static void
+ fetch_texel_lod( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
+                  GLuint unit, GLfloat color[4] )
+ {
+-   GLchan rgba[4];
+-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+    const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
+ 
+-   if (texObj)
++   if (texObj) {
++      SWcontext *swrast = SWRAST_CONTEXT(ctx);
++      GLchan rgba[4];
++
+       lambda = CLAMP(lambda, texObj->MinLod, texObj->MaxLod);
+ 
+-   /* XXX use a float-valued TextureSample routine here!!! */
+-   swrast->TextureSample[unit](ctx, texObj, 1, (const GLfloat (*)[4]) texcoord,
+-                               &lambda, &rgba);
+-   color[0] = CHAN_TO_FLOAT(rgba[0]);
+-   color[1] = CHAN_TO_FLOAT(rgba[1]);
+-   color[2] = CHAN_TO_FLOAT(rgba[2]);
+-   color[3] = CHAN_TO_FLOAT(rgba[3]);
++      /* XXX use a float-valued TextureSample routine here!!! */
++      swrast->TextureSample[unit](ctx, texObj, 1,
++                                  (const GLfloat (*)[4]) texcoord,
++                                  &lambda, &rgba);
++      color[0] = CHAN_TO_FLOAT(rgba[0]);
++      color[1] = CHAN_TO_FLOAT(rgba[1]);
++      color[2] = CHAN_TO_FLOAT(rgba[2]);
++      color[3] = CHAN_TO_FLOAT(rgba[3]);
++   }
++   else {
++      color[0] = color[1] = color[2] = 0.0F;
++      color[3] = 1.0F;
++   }
+ }
+ 
+ 
+@@ -69,13 +76,14 @@ fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
+ {
+    SWcontext *swrast = SWRAST_CONTEXT(ctx);
+    const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
+-   GLfloat lambda;
+-   GLchan rgba[4];
+ 
+    if (texObj) {
+-      const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
++      const struct gl_texture_image *texImg =
++         texObj->Image[0][texObj->BaseLevel];
+       const GLfloat texW = (GLfloat) texImg->WidthScale;
+       const GLfloat texH = (GLfloat) texImg->HeightScale;
++      GLfloat lambda;
++      GLchan rgba[4];
+ 
+       lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
+                                       texdx[1], texdy[1], /* dt/dx, dt/dy */
+@@ -85,14 +93,20 @@ fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
+                                       1.0F / texcoord[3]) + lodBias;
+ 
+       lambda = CLAMP(lambda, texObj->MinLod, texObj->MaxLod);
+-   }
+ 
+-   swrast->TextureSample[unit](ctx, texObj, 1, (const GLfloat (*)[4]) texcoord,
+-                               &lambda, &rgba);
+-   color[0] = CHAN_TO_FLOAT(rgba[0]);
+-   color[1] = CHAN_TO_FLOAT(rgba[1]);
+-   color[2] = CHAN_TO_FLOAT(rgba[2]);
+-   color[3] = CHAN_TO_FLOAT(rgba[3]);
++      /* XXX use a float-valued TextureSample routine here!!! */
++      swrast->TextureSample[unit](ctx, texObj, 1,
++                                  (const GLfloat (*)[4]) texcoord,
++                                  &lambda, &rgba);
++      color[0] = CHAN_TO_FLOAT(rgba[0]);
++      color[1] = CHAN_TO_FLOAT(rgba[1]);
++      color[2] = CHAN_TO_FLOAT(rgba[2]);
++      color[3] = CHAN_TO_FLOAT(rgba[3]);
++   }
++   else {
++      color[0] = color[1] = color[2] = 0.0F;
++      color[3] = 1.0F;
++   }
+ }
+ 
+ 
+diff --git a/src/mesa/swrast/s_triangle.c b/src/mesa/swrast/s_triangle.c
+index a2e8433..a501f42 100644
+--- a/src/mesa/swrast/s_triangle.c
++++ b/src/mesa/swrast/s_triangle.c
+@@ -265,9 +265,6 @@ affine_span(GLcontext *ctx, SWspan *span,
+    GLchan sample[4];  /* the filtered texture sample */
+    const GLuint texEnableSave = ctx->Texture._EnabledUnits;
+ 
+-   /* Disable tex units so they're not re-applied in swrast_write_rgba_span */
+-   ctx->Texture._EnabledUnits = 0x0;
+-
+    /* Instead of defining a function for each mode, a test is done
+     * between the outer and inner loops. This is to reduce code size
+     * and complexity. Observe that an optimizing compiler kills
+@@ -396,6 +393,9 @@ affine_span(GLcontext *ctx, SWspan *span,
+    GLuint i;
+    GLchan *dest = span->array->rgba[0];
+ 
++   /* Disable tex units so they're not re-applied in swrast_write_rgba_span */
++   ctx->Texture._EnabledUnits = 0x0;
++
+    span->intTex[0] -= FIXED_HALF;
+    span->intTex[1] -= FIXED_HALF;
+    switch (info->filter) {
+diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c
+index 5e2a582..19ec556 100644
+--- a/src/mesa/tnl/t_context.c
++++ b/src/mesa/tnl/t_context.c
+@@ -109,24 +109,28 @@ _tnl_InvalidateState( GLcontext *ctx, GLuint new_state )
+ 
+    tnl->pipeline.new_state |= new_state;
+ 
+-   /* Calculate tnl->render_inputs:
++   /* Calculate tnl->render_inputs.  This bitmask indicates which vertex
++    * attributes need to be emitted to the rasterizer.
+     */
+    if (ctx->Visual.rgbMode) {
+       GLuint i;
+ 
+       RENDERINPUTS_ZERO( tnl->render_inputs_bitset );
+       RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_POS );
++
+       if (!fp || (fp->Base.InputsRead & FRAG_BIT_COL0)) {
+          RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR0 );
+       }
++
++      if (NEED_SECONDARY_COLOR(ctx))
++         RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 );
++
+       for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
+-         if (ctx->Texture._EnabledCoordUnits & (1 << i)) {
++         if (ctx->Texture._EnabledCoordUnits & (1 << i) ||
++             (fp && fp->Base.InputsRead & FRAG_BIT_TEX(i))) {
+             RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) );
+          }
+       }
+-
+-      if (NEED_SECONDARY_COLOR(ctx))
+-         RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 );
+    }
+    else {
+       RENDERINPUTS_SET( tnl->render_inputs_bitset, _TNL_ATTRIB_POS );
+diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
+index d48f523..f6daa25 100644
+--- a/src/mesa/vbo/vbo_exec_api.c
++++ b/src/mesa/vbo/vbo_exec_api.c
+@@ -148,11 +148,14 @@ static void vbo_exec_copy_to_current( struct vbo_exec_context *exec )
+          /* Note: the exec->vtx.current[i] pointers point into the
+           * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays.
+           */
++         if (exec->vtx.attrptr[i]) {
++
+ 	 COPY_CLEAN_4V(current, 
+ 		       exec->vtx.attrsz[i], 
+ 		       exec->vtx.attrptr[i]);
+ 
+-	 
++	 }
++
+ 	 /* Given that we explicitly state size here, there is no need
+ 	  * for the COPY_CLEAN above, could just copy 16 bytes and be
+ 	  * done.  The only problem is when Mesa accesses ctx->Current
+diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
+index 92356ba..ad8b6e8 100644
+--- a/src/mesa/vbo/vbo_exec_draw.c
++++ b/src/mesa/vbo/vbo_exec_draw.c
+@@ -175,7 +175,20 @@ static void vbo_exec_bind_arrays( GLcontext *ctx )
+          exec->vtx.inputs[attr + 16] = &vbo->generic_currval[attr];
+       }
+       map = vbo->map_vp_arb;
++
++      /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read.
++       * In that case we effectively need to route the data from
++       * glVertexAttrib(0, val) calls to feed into the GENERIC0 input.
++       */
++      if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 &&
++          (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) {
++         exec->vtx.inputs[16] = exec->vtx.inputs[0];
++         exec->vtx.attrsz[16] = exec->vtx.attrsz[0];
++         exec->vtx.attrsz[0] = 0;
++      }
+       break;
++   default:
++      assert(0);
+    }
+ 
+    /* Make all active attributes (including edgeflag) available as
+diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
+index ed82f09..ed40b5c 100644
+--- a/src/mesa/vbo/vbo_save_draw.c
++++ b/src/mesa/vbo/vbo_save_draw.c
+@@ -110,6 +110,9 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
+    GLuint data = node->buffer_offset;
+    const GLuint *map;
+    GLuint attr;
++   GLubyte node_attrsz[VBO_ATTRIB_MAX];  /* copy of node->attrsz[] */
++
++   memcpy(node_attrsz, node->attrsz, sizeof(node->attrsz));
+ 
+    /* Install the default (ie Current) attributes first, then overlay
+     * all active ones.
+@@ -135,13 +138,26 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
+          save->inputs[attr + 16] = &vbo->generic_currval[attr];
+       }
+       map = vbo->map_vp_arb;
++
++      /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read.
++       * In that case we effectively need to route the data from
++       * glVertexAttrib(0, val) calls to feed into the GENERIC0 input.
++       */
++      if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 &&
++          (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) {
++         save->inputs[16] = save->inputs[0];
++         node_attrsz[16] = node_attrsz[0];
++         node_attrsz[0] = 0;
++      }
+       break;
++   default:
++      assert(0);
+    }
+ 
+    for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
+       GLuint src = map[attr];
+ 
+-      if (node->attrsz[src]) {
++      if (node_attrsz[src]) {
+          /* override the default array set above */
+          save->inputs[attr] = &arrays[attr];
+ 
+diff --git a/src/mesa/x86/gen_matypes.c b/src/mesa/x86/gen_matypes.c
+index afb4b11..8c690b4 100644
+--- a/src/mesa/x86/gen_matypes.c
++++ b/src/mesa/x86/gen_matypes.c
+@@ -61,7 +61,7 @@ do {									\
+    printf( "\n" );							\
+ } while (0)
+ 
+-#if defined(__BEOS__) || defined(_LP64)
++#if defined(__BEOS__) || defined(__HAIKU__) || defined(_LP64)
+ #define OFFSET( s, t, m )						\
+    printf( "#define %s\t%ld\n", s, offsetof( t, m ) );
+ #else
+@@ -69,7 +69,7 @@ do {									\
+    printf( "#define %s\t%d\n", s, offsetof( t, m ) );
+ #endif
+ 
+-#if defined(__BEOS__) || defined(_LP64)
++#if defined(__BEOS__) || defined(__HAIKU__) || defined(_LP64)
+ #define SIZEOF( s, t )							\
+    printf( "#define %s\t%ld\n", s, sizeof(t) );
+ #else
diff --git a/mesa.spec b/mesa.spec
index 39a935e..af182f9 100644
--- a/mesa.spec
+++ b/mesa.spec
@@ -20,7 +20,7 @@
 Summary: Mesa graphics libraries
 Name: mesa
 Version: 7.3
-Release: 8%{?dist}
+Release: 9%{?dist}
 License: MIT
 Group: System Environment/Libraries
 URL: http://www.mesa3d.org
@@ -36,11 +36,13 @@ Source3: make-git-snapshot.sh
 
 Source5: http://www.x.org/pub/individual/app/%{xdriinfo}.tar.bz2
 
-Patch0: mesa-7.1-osmesa-version.patch
+Patch0: mesa-7.3-fixes-from-7.4-branch.patch
+Patch1: mesa-7.1-osmesa-version.patch
 Patch2: mesa-7.1-nukeglthread-debug.patch
 Patch3: mesa-no-mach64.patch
 
-Patch5: radeon-rewrite.patch
+Patch5: mesa-7.3-dri-configs-fixes.patch
+Patch6: radeon-rewrite.patch
 
 Patch7: mesa-7.1-link-shared.patch
 Patch9: intel-revert-vbl.patch
@@ -167,10 +169,12 @@ This package provides some demo applications for testing Mesa.
 %prep
 %setup -q -n Mesa-%{version}%{?snapshot} -b0 -b1 -b2 -b5
 #%setup -q -n mesa-%{gitdate} -b2 -b5
-%patch0 -p1 -b .osmesa
+%patch0 -p1 -b .mesa74
+%patch1 -p1 -b .osmesa
 %patch2 -p1 -b .intel-glthread
 %patch3 -p0 -b .no-mach64
-%patch5 -p1 -b .radeon-rewrite
+%patch5 -p1 -b .driconfigs
+%patch6 -p1 -b .radeon-rewrite
 %patch7 -p1 -b .dricore
 %patch9 -p1 -b .intel-vbl
 %patch12 -p1 -b .intel-nowarn
@@ -423,6 +427,9 @@ rm -rf $RPM_BUILD_ROOT
 %{_libdir}/mesa-demos-data
 
 %changelog
+* Wed Mar 04 2009 Dave Airlie <airlied@redhat.com> 7.3-9
+- try again: pull in 7.4 fixes, dri configs changes, new radeon-rewrite
+
 * Fri Feb 27 2009 Dave Airlie <airlied@redhat.com> 7.3-8
 - reset whole place back to 7.3-6 - bad plan
 
diff --git a/radeon-rewrite.patch b/radeon-rewrite.patch
index 9a86cc4..0edb095 100644
--- a/radeon-rewrite.patch
+++ b/radeon-rewrite.patch
@@ -1,18848 +1,47 @@
-diff --git a/configs/autoconf.in b/configs/autoconf.in
-index b352974..d786029 100644
---- a/configs/autoconf.in
-+++ b/configs/autoconf.in
-@@ -20,6 +20,8 @@ CXXFLAGS = @CPPFLAGS@ @CXXFLAGS@ \
- 	$(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES)
- LDFLAGS = @LDFLAGS@
- EXTRA_LIB_PATH = @EXTRA_LIB_PATH@
-+RADEON_CFLAGS = @RADEON_CFLAGS@
-+RADEON_LDFLAGS = @RADEON_LDFLAGS@
- 
- # Assembler
- MESA_ASM_SOURCES = @MESA_ASM_SOURCES@
-diff --git a/configure.ac b/configure.ac
-index a9a8d5a..ea2992d 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -573,6 +575,13 @@ dri)
-     GL_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED dri2proto >= $DRI2PROTO_REQUIRED"
-     DRI_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED"
- 
-+    PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon], HAVE_LIBDRM_RADEON=yes, HAVE_LIBDRM_RADEON=no)
-+
-+    if test "$HAVE_LIBDRM_RADEON" = yes; then
-+	RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS"
-+	RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS
-+    fi
-+
-     # find the DRI deps for libGL
-     if test "$x11_pkgconfig" = yes; then
-         # add xcb modules if necessary
-@@ -578,6 +585,8 @@ AC_SUBST([GL_PC_REQ_PRIV])
- AC_SUBST([GL_PC_LIB_PRIV])
- AC_SUBST([GL_PC_CFLAGS])
- AC_SUBST([DRI_PC_REQ_PRIV])
-+AC_SUBST([RADEON_CFLAGS])
-+AC_SUBST([RADEON_LDFLAGS])
- 
- dnl
- dnl More X11 setup
-diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
-index e9144ac..e593ed9 100644
---- a/src/mesa/drivers/dri/r200/Makefile
-+++ b/src/mesa/drivers/dri/r200/Makefile
-@@ -3,6 +3,8 @@
- TOP = ../../../../..
- include $(TOP)/configs/current
- 
-+CFLAGS += $(RADEON_CFLAGS)
-+
- LIBNAME = r200_dri.so
- 
- MINIGLX_SOURCES = server/radeon_dri.c 
-@@ -11,25 +13,35 @@ ifeq ($(USING_EGL), 1)
- EGL_SOURCES = server/radeon_egl.c
- endif
- 
-+RADEON_COMMON_SOURCES = \
-+	radeon_texture.c \
-+	radeon_common_context.c \
-+	radeon_common.c \
-+	radeon_dma.c \
-+	radeon_lock.c \
-+	radeon_bo_legacy.c \
-+	radeon_cs_legacy.c \
-+	radeon_mipmap_tree.c \
-+	radeon_span.c
-+
-+
- DRIVER_SOURCES = r200_context.c \
- 		 r200_ioctl.c \
--		 r200_lock.c \
- 		 r200_state.c \
- 		 r200_state_init.c \
- 		 r200_cmdbuf.c \
- 		 r200_pixel.c \
- 		 r200_tex.c \
--		 r200_texmem.c \
- 		 r200_texstate.c \
- 		 r200_tcl.c \
- 		 r200_swtcl.c \
--		 r200_span.c \
- 		 r200_maos.c \
- 		 r200_sanity.c \
- 		 r200_fragshader.c \
- 		 r200_vertprog.c \
- 		 radeon_screen.c \
--		 $(EGL_SOURCES)
-+		 $(EGL_SOURCES) \
-+		 $(RADEON_COMMON_SOURCES)
- 
- C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
- 
-@@ -48,7 +60,29 @@ SYMLINKS = \
- COMMON_SYMLINKS = \
- 	radeon_chipset.h \
- 	radeon_screen.c \
--	radeon_screen.h
-+	radeon_screen.h \
-+	radeon_bo_legacy.c \
-+	radeon_cs_legacy.c \
-+	radeon_bo_legacy.h \
-+	radeon_cs_legacy.h \
-+	radeon_bocs_wrapper.h \
-+	radeon_span.h \
-+	radeon_span.c \
-+	radeon_lock.c \
-+	radeon_lock.h \
-+	radeon_common.c \
-+	radeon_common_context.c \
-+	radeon_common_context.h \
-+	radeon_common.h \
-+	radeon_cmdbuf.h \
-+	radeon_mipmap_tree.c \
-+	radeon_mipmap_tree.h \
-+	radeon_texture.c \
-+	radeon_texture.h \
-+	radeon_dma.c \
-+	radeon_dma.h
-+
-+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
- 
- ##### TARGETS #####
- 
-diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
-index e163377..ae31bcb 100644
---- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c
-+++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
-@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "swrast/swrast.h"
- #include "main/simple_list.h"
- 
-+#include "radeon_common.h"
- #include "r200_context.h"
- #include "r200_state.h"
- #include "r200_ioctl.h"
-@@ -45,18 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r200_sanity.h"
- #include "radeon_reg.h"
- 
--static void print_state_atom( struct r200_state_atom *state )
--{
--   int i;
--
--   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
--
--   if (0 & R200_DEBUG & DEBUG_VERBOSE) 
--      for (i = 0 ; i < state->cmd_size ; i++) 
--	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
--
--}
--
- /* The state atoms will be emitted in the order they appear in the atom list,
-  * so this step is important.
-  */
-@@ -64,141 +53,56 @@ void r200SetUpAtomList( r200ContextPtr rmesa )
- {
-    int i, mtu;
- 
--   mtu = rmesa->glCtx->Const.MaxTextureUnits;
--
--   make_empty_list(&rmesa->hw.atomlist);
--   rmesa->hw.atomlist.name = "atom-list";
--
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf );
-+   mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
-+
-+   make_empty_list(&rmesa->radeon.hw.atomlist);
-+   rmesa->radeon.hw.atomlist.name = "atom-list";
-+
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.set );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf );
-    for (i = 0; i < mtu; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] );
-    for (i = 0; i < mtu; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] );
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] );
-    for (i = 0; i < 6; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] );
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] );
-    for (i = 0; i < 8; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] );
-    for (i = 0; i < 3 + mtu; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt );
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt );
-    for (i = 0; i < 2; ++i)
--      insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] );
-+      insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] );
-    for (i = 0; i < 6; ++i)
--       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.spr );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ptp );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.prf );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pvs );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[0] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[1] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[0] );
--   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[1] );
--}
--
--static void r200SaveHwState( r200ContextPtr rmesa )
--{
--   struct r200_state_atom *atom;
--   char * dest = rmesa->backup_store.cmd_buf;
--
--   if (R200_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   rmesa->backup_store.cmd_used = 0;
--
--   foreach( atom, &rmesa->hw.atomlist ) {
--      if ( atom->check( rmesa->glCtx, atom->idx ) ) {
--	 int size = atom->cmd_size * 4;
--	 memcpy( dest, atom->cmd, size);
--	 dest += size;
--	 rmesa->backup_store.cmd_used += size;
--	 if (R200_DEBUG & DEBUG_STATE)
--	    print_state_atom( atom );
--      }
--   }
--
--   assert( rmesa->backup_store.cmd_used <= R200_CMD_BUF_SZ );
--   if (R200_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "Returning to r200EmitState\n");
--}
--
--void r200EmitState( r200ContextPtr rmesa )
--{
--   char *dest;
--   int mtu;
--   struct r200_state_atom *atom;
--
--   if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   if (rmesa->save_on_next_emit) {
--      r200SaveHwState(rmesa);
--      rmesa->save_on_next_emit = GL_FALSE;
--   }
--
--   if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
--      return;
--
--   mtu = rmesa->glCtx->Const.MaxTextureUnits;
--
--   /* To avoid going across the entire set of states multiple times, just check
--    * for enough space for the case of emitting all state, and inline the
--    * r200AllocCmdBuf code here without all the checks.
--    */
--   r200EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size );
--
--   /* we need to calculate dest after EnsureCmdBufSpace
--      as we may flush the buffer - airlied */
--   dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
--   if (R200_DEBUG & DEBUG_STATE) {
--      foreach( atom, &rmesa->hw.atomlist ) {
--	 if ( atom->dirty || rmesa->hw.all_dirty ) {
--	    if ( atom->check( rmesa->glCtx, atom->idx ) )
--	       print_state_atom( atom );
--	    else
--	       fprintf(stderr, "skip state %s\n", atom->name);
--	 }
--      }
--   }
--
--   foreach( atom, &rmesa->hw.atomlist ) {
--      if ( rmesa->hw.all_dirty )
--	 atom->dirty = GL_TRUE;
--      if ( atom->dirty ) {
--	 if ( atom->check( rmesa->glCtx, atom->idx ) ) {
--	    int size = atom->cmd_size * 4;
--	    memcpy( dest, atom->cmd, size);
--	    dest += size;
--	    rmesa->store.cmd_used += size;
--	    atom->dirty = GL_FALSE;
--	 }
--      }
--   }
--
--   assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
--
--   rmesa->hw.is_dirty = GL_FALSE;
--   rmesa->hw.all_dirty = GL_FALSE;
-+       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] );
-+   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] );
- }
- 
- /* Fire a section of the retained (indexed_verts) buffer as a regular
-@@ -209,50 +113,81 @@ void r200EmitVbufPrim( r200ContextPtr rmesa,
-                        GLuint vertex_nr )
- {
-    drm_radeon_cmd_header_t *cmd;
-+   BATCH_LOCALS(&rmesa->radeon);
- 
-    assert(!(primitive & R200_VF_PRIM_WALK_IND));
-    
--   r200EmitState( rmesa );
-+   radeonEmitState(&rmesa->radeon);
-    
-    if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
-       fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
- 	      rmesa->store.cmd_used/4, primitive, vertex_nr);
--   
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VBUF_BUFSZ,
--						  __FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = R200_CP_CMD_3D_DRAW_VBUF_2;
--   cmd[2].i = (primitive | 
--	       R200_VF_PRIM_WALK_LIST |
--	       R200_VF_COLOR_ORDER_RGBA |
--	       (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
-+ 
-+   BEGIN_BATCH(3);
-+   OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
-+   OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA |
-+	     (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
-+   END_BATCH();
- }
- 
-+static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type)
-+{
-+	BATCH_LOCALS(&rmesa->radeon);
-+
-+	if (vertex_count > 0) {
-+		BEGIN_BATCH(8+2);
-+		OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_INDX_2, 0);
-+		OUT_BATCH(R200_VF_PRIM_WALK_IND |
-+			  ((vertex_count + 0) << 16) |
-+			  type);
-+		
-+		if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+			OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
-+			OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
-+			OUT_BATCH_RELOC(rmesa->tcl.elt_dma_offset,
-+					rmesa->tcl.elt_dma_bo,
-+					rmesa->tcl.elt_dma_offset,
-+					RADEON_GEM_DOMAIN_GTT, 0, 0);
-+			OUT_BATCH(vertex_count/2);
-+		} else {
-+			OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
-+			OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
-+			OUT_BATCH(rmesa->tcl.elt_dma_offset);
-+			OUT_BATCH(vertex_count/2);
-+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+					      rmesa->tcl.elt_dma_bo,
-+					      RADEON_GEM_DOMAIN_GTT, 0, 0);
-+		}
-+		END_BATCH();
-+	}
-+}
- 
--void r200FlushElts( r200ContextPtr rmesa )
-+void r200FlushElts(GLcontext *ctx)
- {
--   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
-+  r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    int dwords;
--   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 12)) / 2;
-+   int nr, elt_used = rmesa->tcl.elt_used;
- 
-    if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
--      fprintf(stderr, "%s\n", __FUNCTION__);
-+     fprintf(stderr, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used);
-+
-+   assert( rmesa->radeon.dma.flush == r200FlushElts );
-+   rmesa->radeon.dma.flush = NULL;
-+
-+   elt_used = (elt_used + 2) & ~2;
- 
--   assert( rmesa->dma.flush == r200FlushElts );
--   rmesa->dma.flush = NULL;
-+   nr = elt_used / 2;
- 
--   /* Cope with odd number of elts:
--    */
--   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
--   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
-+   radeon_bo_unmap(rmesa->tcl.elt_dma_bo);
- 
--   cmd[1] |= (dwords - 3) << 16;
--   cmd[2] |= nr << R200_VF_VERTEX_NUMBER_SHIFT;
-+   r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive);
-+
-+   radeon_bo_unref(rmesa->tcl.elt_dma_bo);
-+   rmesa->tcl.elt_dma_bo = NULL;
- 
-    if (R200_DEBUG & DEBUG_SYNC) {
-       fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
--      r200Finish( rmesa->glCtx );
-+      radeonFinish( rmesa->radeon.glCtx );
-    }
- }
- 
-@@ -261,7 +196,6 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
- 				    GLuint primitive,
- 				    GLuint min_nr )
- {
--   drm_radeon_cmd_header_t *cmd;
-    GLushort *retval;
- 
-    if (R200_DEBUG & DEBUG_IOCTL)
-@@ -269,30 +203,25 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
- 
-    assert((primitive & R200_VF_PRIM_WALK_IND));
-    
--   r200EmitState( rmesa );
--   
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, ELTS_BUFSZ(min_nr),
--						__FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = R200_CP_CMD_3D_DRAW_INDX_2;
--   cmd[2].i = (primitive | 
--	       R200_VF_PRIM_WALK_IND |
--	       R200_VF_COLOR_ORDER_RGBA);
-+   radeonEmitState(&rmesa->radeon);
- 
-+   rmesa->tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
-+					  0, R200_ELT_BUF_SZ, 4,
-+					  RADEON_GEM_DOMAIN_GTT, 0);
-+   rmesa->tcl.elt_dma_offset = 0;
-+   rmesa->tcl.elt_used = min_nr * 2;
-+
-+   radeon_bo_map(rmesa->tcl.elt_dma_bo, 1);
-+   retval = rmesa->tcl.elt_dma_bo->ptr + rmesa->tcl.elt_dma_offset;
-    
--   retval = (GLushort *)(cmd+3);
- 
-    if (R200_DEBUG & DEBUG_PRIMS)
--      fprintf(stderr, "%s: header 0x%x prim %x \n",
--	      __FUNCTION__,
--	      cmd[1].i, primitive);
--
--   assert(!rmesa->dma.flush);
--   rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
--   rmesa->dma.flush = r200FlushElts;
-+      fprintf(stderr, "%s: header prim %x \n",
-+	      __FUNCTION__, primitive);
- 
--   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
-+   assert(!rmesa->radeon.dma.flush);
-+   rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-+   rmesa->radeon.dma.flush = r200FlushElts;
- 
-    return retval;
- }
-@@ -300,129 +229,130 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
- 
- 
- void r200EmitVertexAOS( r200ContextPtr rmesa,
--			  GLuint vertex_size,
--			  GLuint offset )
-+			GLuint vertex_size,
-+ 			struct radeon_bo *bo,
-+			GLuint offset )
- {
--   drm_radeon_cmd_header_t *cmd;
-+   BATCH_LOCALS(&rmesa->radeon);
- 
-    if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
-       fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
- 	      __FUNCTION__, vertex_size, offset);
- 
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
--						  __FUNCTION__ );
- 
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (2 << 16);
--   cmd[2].i = 1;
--   cmd[3].i = vertex_size | (vertex_size << 8);
--   cmd[4].i = offset;
-+   BEGIN_BATCH(5);
-+   OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2);
-+   OUT_BATCH(1);
-+   OUT_BATCH(vertex_size | (vertex_size << 8));
-+   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
-+   END_BATCH();
- }
--		       
- 
--void r200EmitAOS( r200ContextPtr rmesa,
--		    struct r200_dma_region **component,
--		    GLuint nr,
--		    GLuint offset )
-+void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
- {
--   drm_radeon_cmd_header_t *cmd;
--   int sz = AOS_BUFSZ(nr);
-+   BATCH_LOCALS(&rmesa->radeon);
-+   uint32_t voffset;
-+   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
-    int i;
--   int *tmp;
--
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s nr arrays: %d\n", __FUNCTION__, nr);
--
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sz, __FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (((sz / sizeof(int)) - 3) << 16);
--   cmd[2].i = nr;
--   tmp = &cmd[0].i;
--   cmd += 3;
--
--   for (i = 0 ; i < nr ; i++) {
--      if (i & 1) {
--	 cmd[0].i |= ((component[i]->aos_stride << 24) | 
--		      (component[i]->aos_size << 16));
--	 cmd[2].i = (component[i]->aos_start + 
--		     offset * component[i]->aos_stride * 4);
--	 cmd += 3;
-+   
-+   if (RADEON_DEBUG & DEBUG_VERTS)
-+      fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
-+	      offset);
-+
-+   BEGIN_BATCH(sz+2+ (nr*2));
-+   OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
-+   OUT_BATCH(nr);
-+
-+    
-+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
-+		   (rmesa->tcl.aos[i].stride << 8) |
-+		   (rmesa->tcl.aos[i + 1].components << 16) |
-+		   (rmesa->tcl.aos[i + 1].stride << 24));
-+			
-+	 voffset =  rmesa->tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->tcl.aos[i].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-+	 voffset =  rmesa->tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->tcl.aos[i+1].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-       }
--      else {
--	 cmd[0].i = ((component[i]->aos_stride << 8) | 
--		     (component[i]->aos_size << 0));
--	 cmd[1].i = (component[i]->aos_start + 
--		     offset * component[i]->aos_stride * 4);
-+      
-+      if (nr & 1) {
-+	 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
-+		   (rmesa->tcl.aos[nr - 1].stride << 8));
-+	 voffset =  rmesa->tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->tcl.aos[nr - 1].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-+      }
-+   } else {
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
-+		   (rmesa->tcl.aos[i].stride << 8) |
-+		   (rmesa->tcl.aos[i + 1].components << 16) |
-+		   (rmesa->tcl.aos[i + 1].stride << 24));
-+	 
-+	 voffset =  rmesa->tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
-+	 OUT_BATCH(voffset);
-+	 voffset =  rmesa->tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
-+	 OUT_BATCH(voffset);
-+      }
-+      
-+      if (nr & 1) {
-+	 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
-+		   (rmesa->tcl.aos[nr - 1].stride << 8));
-+	 voffset =  rmesa->tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
-+	 OUT_BATCH(voffset);
-+      }
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 voffset =  rmesa->tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->tcl.aos[i+0].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-+	 voffset =  rmesa->tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->tcl.aos[i+1].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-+      }
-+      if (nr & 1) {
-+	 voffset =  rmesa->tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->tcl.aos[nr-1].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-       }
-    }
--
--   if (R200_DEBUG & DEBUG_VERTS) {
--      fprintf(stderr, "%s:\n", __FUNCTION__);
--      for (i = 0 ; i < sz ; i++)
--	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
--   }
-+   END_BATCH();
- }
- 
--void r200EmitBlit( r200ContextPtr rmesa,
--		   GLuint color_fmt,
--		   GLuint src_pitch,
--		   GLuint src_offset,
--		   GLuint dst_pitch,
--		   GLuint dst_offset,
--		   GLint srcx, GLint srcy,
--		   GLint dstx, GLint dsty,
--		   GLuint w, GLuint h )
-+void r200FireAOS(r200ContextPtr rmesa, int vertex_count, int type)
- {
--   drm_radeon_cmd_header_t *cmd;
-+	BATCH_LOCALS(&rmesa->radeon);
- 
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
--	      __FUNCTION__, 
--	      src_pitch, src_offset, srcx, srcy,
--	      dst_pitch, dst_offset, dstx, dsty,
--	      w, h);
--
--   assert( (src_pitch & 63) == 0 );
--   assert( (dst_pitch & 63) == 0 );
--   assert( (src_offset & 1023) == 0 );
--   assert( (dst_offset & 1023) == 0 );
--   assert( w < (1<<16) );
--   assert( h < (1<<16) );
--
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 8 * sizeof(int),
--						  __FUNCTION__ );
--
--
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16);
--   cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
--	       RADEON_GMC_DST_PITCH_OFFSET_CNTL |
--	       RADEON_GMC_BRUSH_NONE |
--	       (color_fmt << 8) |
--	       RADEON_GMC_SRC_DATATYPE_COLOR |
--	       RADEON_ROP3_S |
--	       RADEON_DP_SRC_SOURCE_MEMORY |
--	       RADEON_GMC_CLR_CMP_CNTL_DIS |
--	       RADEON_GMC_WR_MSK_DIS );
--
--   cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
--   cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
--   cmd[5].i = (srcx << 16) | srcy;
--   cmd[6].i = (dstx << 16) | dsty; /* dst */
--   cmd[7].i = (w << 16) | h;
-+	BEGIN_BATCH(3);
-+	OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
-+	OUT_BATCH(R200_VF_PRIM_WALK_LIST | (vertex_count << 16) | type);
-+	END_BATCH();
- }
- 
--
--void r200EmitWait( r200ContextPtr rmesa, GLuint flags )
--{
--   drm_radeon_cmd_header_t *cmd;
--
--   assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
--
--   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 1 * sizeof(int),
--					   __FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
--   cmd[0].wait.flags = flags;
--}
-diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
-index c067515..a744469 100644
---- a/src/mesa/drivers/dri/r200/r200_context.c
-+++ b/src/mesa/drivers/dri/r200/r200_context.c
-@@ -52,9 +52,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DE
- #include "drivers/common/driverfuncs.h"
- 
- #include "r200_context.h"
-+#include "radeon_span.h"
- #include "r200_ioctl.h"
- #include "r200_state.h"
--#include "r200_span.h"
- #include "r200_pixel.h"
- #include "r200_tex.h"
- #include "r200_swtcl.h"
-@@ -78,9 +79,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "vblank.h"
- #include "utils.h"
- #include "xmlpool.h" /* for symbolic values of enum-type options */
--#ifndef R200_DEBUG
--int R200_DEBUG = (0);
--#endif
- 
- /* Return various strings for glGetString().
-  */
-@@ -89,8 +87,8 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    static char buffer[128];
-    unsigned   offset;
--   GLuint agp_mode = (rmesa->r200Screen->card_type == RADEON_CARD_PCI)? 0 :
--      rmesa->r200Screen->AGPMode;
-+   GLuint agp_mode = (rmesa->radeon.radeonScreen->card_type == RADEON_CARD_PCI)? 0 :
-+      rmesa->radeon.radeonScreen->AGPMode;
- 
-    switch ( name ) {
-    case GL_VENDOR:
-@@ -101,7 +99,7 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
- 				     agp_mode );
- 
-       sprintf( & buffer[ offset ], " %sTCL",
--	       !(rmesa->TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
-+	       !(rmesa->radeon.TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
- 	       ? "" : "NO-" );
- 
-       return (GLubyte *)buffer;
-@@ -234,6 +232,40 @@ static const struct dri_debug_control debug_control[] =
-     { NULL,    0 }
- };
- 
-+static void r200_get_lock(radeonContextPtr radeon)
-+{
-+   r200ContextPtr rmesa = (r200ContextPtr)radeon;
-+   drm_radeon_sarea_t *sarea = radeon->sarea;
-+   int i;
-+
-+   R200_STATECHANGE( rmesa, ctx );
-+   if (rmesa->radeon.sarea->tiling_enabled) {
-+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
-+   }
-+   else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
-+
-+   if ( sarea->ctx_owner != rmesa->radeon.dri.hwContext ) {
-+      sarea->ctx_owner = rmesa->radeon.dri.hwContext;
-+      if (!radeon->radeonScreen->kernel_mm)
-+         radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
-+   }
-+
-+}
-+
-+static void r200_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
-+{
-+}
-+
-+
-+static void r200_init_vtbl(radeonContextPtr radeon)
-+{
-+   radeon->vtbl.get_lock = r200_get_lock;
-+   radeon->vtbl.update_viewport_offset = r200UpdateViewportOffset;
-+   radeon->vtbl.update_draw_buffer = r200UpdateDrawBuffer;
-+   radeon->vtbl.emit_cs_header = r200_vtbl_emit_cs_header;
-+   radeon->vtbl.swtcl_flush = r200_swtcl_flush;
-+}
-+
- 
- /* Create the device specific rendering context.
-  */
-@@ -245,9 +277,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-    radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
-    struct dd_function_table functions;
-    r200ContextPtr rmesa;
--   GLcontext *ctx, *shareCtx;
-+   GLcontext *ctx;
-    int i;
--   int tcl_mode, fthrottle_mode;
-+   int tcl_mode;
- 
-    assert(glVisual);
-    assert(driContextPriv);
-@@ -257,7 +289,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-    rmesa = (r200ContextPtr) CALLOC( sizeof(*rmesa) );
-    if ( !rmesa )
-       return GL_FALSE;
--      
-+
-+   r200_init_vtbl(&rmesa->radeon);
-    /* init exp fog table data */
-    r200InitStaticFogData();
- 
-@@ -265,12 +298,12 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-     * Do this here so that initialMaxAnisotropy is set before we create
-     * the default textures.
-     */
--   driParseConfigFiles (&rmesa->optionCache, &screen->optionCache,
-+   driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
- 			screen->driScreen->myNum, "r200");
--   rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
--                                                 "def_max_anisotropy");
-+   rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
-+							"def_max_anisotropy");
- 
--   if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
-+   if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
-       if ( sPriv->drm_version.minor < 13 )
- 	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
- 			  "disabling.\n", sPriv->drm_version.minor );
-@@ -291,59 +324,21 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-    r200InitTextureFuncs(&functions);
-    r200InitShaderFuncs(&functions); 
- 
--   /* Allocate and initialize the Mesa context */
--   if (sharedContextPrivate)
--      shareCtx = ((r200ContextPtr) sharedContextPrivate)->glCtx;
--   else
--      shareCtx = NULL;
--   rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
--                                       &functions, (void *) rmesa);
--   if (!rmesa->glCtx) {
--      FREE(rmesa);
--      return GL_FALSE;
--   }
--   driContextPriv->driverPrivate = rmesa;
--
--   /* Init r200 context data */
--   rmesa->dri.context = driContextPriv;
--   rmesa->dri.screen = sPriv;
--   rmesa->dri.drawable = NULL; /* Set by XMesaMakeCurrent */
--   rmesa->dri.hwContext = driContextPriv->hHWContext;
--   rmesa->dri.hwLock = &sPriv->pSAREA->lock;
--   rmesa->dri.fd = sPriv->fd;
--   rmesa->dri.drmMinor = sPriv->drm_version.minor;
--
--   rmesa->r200Screen = screen;
--   rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
--				       screen->sarea_priv_offset);
--
--
--   rmesa->dma.buf0_address = rmesa->r200Screen->buffers->list[0].address;
--
--   (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
--   make_empty_list( & rmesa->swapped );
--
--   rmesa->nr_heaps = 1 /* screen->numTexHeaps */ ;
--   assert(rmesa->nr_heaps < RADEON_NR_TEX_HEAPS);
--   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
--      rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
--	    screen->texSize[i],
--	    12,
--	    RADEON_NR_TEX_REGIONS,
--	    (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
--	    & rmesa->sarea->tex_age[i],
--	    & rmesa->swapped,
--	    sizeof( r200TexObj ),
--	    (destroy_texture_object_t *) r200DestroyTexObj );
-+   if (!radeonInitContext(&rmesa->radeon, &functions,
-+			  glVisual, driContextPriv,
-+			  sharedContextPrivate)) {
-+     FREE(rmesa);
-+     return GL_FALSE;
-    }
--   rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
-+
-+   rmesa->radeon.texture_depth = driQueryOptioni (&rmesa->radeon.optionCache,
- 					   "texture_depth");
--   if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
--      rmesa->texture_depth = ( screen->cpp == 4 ) ?
-+   if (rmesa->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
-+      rmesa->radeon.texture_depth = ( screen->cpp == 4 ) ?
- 	 DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
- 
--   rmesa->swtcl.RenderIndex = ~0;
--   rmesa->hw.all_dirty = 1;
-+   rmesa->radeon.swtcl.RenderIndex = ~0;
-+   rmesa->radeon.hw.all_dirty = 1;
- 
-    /* Set the maximum texture size small enough that we can guarentee that
-     * all texture units can bind a maximal texture and have all of them in
-@@ -351,29 +346,13 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-     * setting allow larger textures.
-     */
- 
--   ctx = rmesa->glCtx;
--   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache,
-+   ctx = rmesa->radeon.glCtx;
-+   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
- 						 "texture_units");
-    ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
-    ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
- 
--   i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures");
--
--   driCalculateMaxTextureLevels( rmesa->texture_heaps,
--				 rmesa->nr_heaps,
--				 & ctx->Const,
--				 4,
--				 11, /* max 2D texture size is 2048x2048 */
--#if ENABLE_HW_3D_TEXTURE
--				 8,  /* max 3D texture size is 256^3 */
--#else
--				 0,  /* 3D textures unsupported */
--#endif
--				 11, /* max cube texture size is 2048x2048 */
--				 11, /* max texture rectangle size is 2048x2048 */
--				 12,
--				 GL_FALSE,
--				 i );
-+   i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
- 
-    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
- 
-@@ -383,7 +362,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-    ctx->Const.MinPointSizeAA = 1.0;
-    ctx->Const.MaxPointSizeAA = 1.0;
-    ctx->Const.PointSizeGranularity = 0.0625;
--   if (rmesa->r200Screen->drmSupportsPointSprites)
-+   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
-       ctx->Const.MaxPointSize = 2047.0;
-    else
-       ctx->Const.MaxPointSize = 1.0;
-@@ -439,32 +418,32 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
-    _math_matrix_set_identity( &rmesa->tmpmat );
- 
-    driInitExtensions( ctx, card_extensions, GL_TRUE );
--   if (!(rmesa->r200Screen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
-+   if (!(rmesa->radeon.radeonScreen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
-      /* yuv textures don't work with some chips - R200 / rv280 okay so far
- 	others get the bit ordering right but don't actually do YUV-RGB conversion */
-       _mesa_enable_extension( ctx, "GL_MESA_ycbcr_texture" );
-    }
--   if (rmesa->glCtx->Mesa_DXTn) {
-+   if (rmesa->radeon.glCtx->Mesa_DXTn) {
-       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-       _mesa_enable_extension( ctx, "GL_S3_s3tc" );
-    }
--   else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) {
-+   else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
-       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-    }
- 
--   if (rmesa->r200Screen->drmSupportsCubeMapsR200)
-+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200)
-       _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
--   if (rmesa->r200Screen->drmSupportsBlendColor) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
-        driInitExtensions( ctx, blend_extensions, GL_FALSE );
-    }
--   if(rmesa->r200Screen->drmSupportsVertexProgram)
-+   if(rmesa->radeon.radeonScreen->drmSupportsVertexProgram)
-       driInitSingleExtension( ctx, ARB_vp_extension );
--   if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program"))
-+   if(driQueryOptionb(&rmesa->radeon.optionCache, "nv_vertex_program"))
-       driInitSingleExtension( ctx, NV_vp_extension );
- 
--   if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader)
-+   if ((ctx->Const.MaxTextureUnits == 6) && rmesa->radeon.radeonScreen->drmSupportsFragShader)
-       driInitSingleExtension( ctx, ATI_fs_extension );
--   if (rmesa->r200Screen->drmSupportsPointSprites)
-+   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
-       driInitExtensions( ctx, point_extensions, GL_FALSE );
- #if 0
-    r200InitDriverFuncs( ctx );
-@@ -474,33 +453,15 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
- #endif
-    /* plug in a few more device driver functions */
-    /* XXX these should really go right after _mesa_init_driver_functions() */
-+   radeonInitSpanFuncs( ctx );
-    r200InitPixelFuncs( ctx );
--   r200InitSpanFuncs( ctx );
-    r200InitTnlFuncs( ctx );
-    r200InitState( rmesa );
-    r200InitSwtcl( ctx );
- 
--   fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode");
--   rmesa->iw.irq_seq = -1;
--   rmesa->irqsEmitted = 0;
--   rmesa->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
--		     rmesa->r200Screen->irq);
--
--   rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
--
--   if (!rmesa->do_irqs)
--      fprintf(stderr,
--	      "IRQ's not enabled, falling back to %s: %d %d\n",
--	      rmesa->do_usleeps ? "usleeps" : "busy waits",
--	      fthrottle_mode,
--	      rmesa->r200Screen->irq);
--
-    rmesa->prefer_gart_client_texturing = 
-       (getenv("R200_GART_CLIENT_TEXTURES") != 0);
- 
--   (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
--
--
- #if DO_DEBUG
-    R200_DEBUG  = driParseDebugString( getenv( "R200_DEBUG" ),
- 				      debug_control );
-@@ -508,18 +469,18 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
- 				      debug_control );
- #endif
- 
--   tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode");
--   if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
-+   tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
-+   if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
-       fprintf(stderr, "disabling 3D acceleration\n");
-       FALLBACK(rmesa, R200_FALLBACK_DISABLE, 1);
-    }
-    else if (tcl_mode == DRI_CONF_TCL_SW || getenv("R200_NO_TCL") ||
--	    !(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) {
--      if (rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL) {
--	 rmesa->r200Screen->chip_flags &= ~RADEON_CHIPSET_TCL;
-+	    !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
-+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
-+	 rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
- 	 fprintf(stderr, "Disabling HW TCL support\n");
-       }
--      TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
-+      TCL_FALLBACK(rmesa->radeon.glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
-    }
- 
-    return GL_TRUE;
-@@ -538,55 +499,33 @@ void r200DestroyContext( __DRIcontextPrivate *driContextPriv )
- 
-    /* check if we're deleting the currently bound context */
-    if (rmesa == current) {
--      R200_FIREVERTICES( rmesa );
-+      radeon_firevertices(&rmesa->radeon);
-       _mesa_make_current(NULL, NULL, NULL);
-    }
- 
-    /* Free r200 context resources */
-    assert(rmesa); /* should never be null */
-    if ( rmesa ) {
--      GLboolean   release_texture_heaps;
--
- 
--      release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
--      _swsetup_DestroyContext( rmesa->glCtx );
--      _tnl_DestroyContext( rmesa->glCtx );
--      _vbo_DestroyContext( rmesa->glCtx );
--      _swrast_DestroyContext( rmesa->glCtx );
-+      _swsetup_DestroyContext( rmesa->radeon.glCtx );
-+      _tnl_DestroyContext( rmesa->radeon.glCtx );
-+      _vbo_DestroyContext( rmesa->radeon.glCtx );
-+      _swrast_DestroyContext( rmesa->radeon.glCtx );
- 
--      r200DestroySwtcl( rmesa->glCtx );
--      r200ReleaseArrays( rmesa->glCtx, ~0 );
-+      r200DestroySwtcl( rmesa->radeon.glCtx );
-+      r200ReleaseArrays( rmesa->radeon.glCtx, ~0 );
- 
--      if (rmesa->dma.current.buf) {
--	 r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
--	 r200FlushCmdBuf( rmesa, __FUNCTION__ );
-+      if (rmesa->radeon.dma.current) {
-+	 radeonReleaseDmaRegion( &rmesa->radeon );
-+	 rcommonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ );
-       }
- 
--      if (rmesa->state.scissor.pClipRects) {
--	 FREE(rmesa->state.scissor.pClipRects);
--	 rmesa->state.scissor.pClipRects = NULL;
--      }
--
--      if ( release_texture_heaps ) {
--         /* This share group is about to go away, free our private
--          * texture object data.
--          */
--         int i;
--
--         for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
--	    driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
--	    rmesa->texture_heaps[ i ] = NULL;
--         }
--
--	 assert( is_empty_list( & rmesa->swapped ) );
-+      if (rmesa->radeon.state.scissor.pClipRects) {
-+	 FREE(rmesa->radeon.state.scissor.pClipRects);
-+	 rmesa->radeon.state.scissor.pClipRects = NULL;
-       }
- 
--      /* free the Mesa context */
--      rmesa->glCtx->DriverCtx = NULL;
--      _mesa_destroy_context( rmesa->glCtx );
--
--      /* free the option cache */
--      driDestroyOptionCache (&rmesa->optionCache);
-+      radeonCleanupContext(&rmesa->radeon);
- 
-       FREE( rmesa );
-    }
-@@ -594,107 +533,6 @@ void r200DestroyContext( __DRIcontextPrivate *driContextPriv )
- 
- 
- 
--
--void
--r200SwapBuffers( __DRIdrawablePrivate *dPriv )
--{
--   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--      r200ContextPtr rmesa;
--      GLcontext *ctx;
--      rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
--      ctx = rmesa->glCtx;
--      if (ctx->Visual.doubleBufferMode) {
--         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
--         if ( rmesa->doPageFlip ) {
--            r200PageFlip( dPriv );
--         }
--         else {
--	     r200CopyBuffer( dPriv, NULL );
--         }
--      }
--   }
--   else {
--      /* XXX this shouldn't be an error but we can't handle it for now */
--      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
--   }
--}
--
--void
--r200CopySubBuffer( __DRIdrawablePrivate *dPriv,
--		   int x, int y, int w, int h )
--{
--   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--      r200ContextPtr rmesa;
--      GLcontext *ctx;
--      rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
--      ctx = rmesa->glCtx;
--      if (ctx->Visual.doubleBufferMode) {
--	 drm_clip_rect_t rect;
--	 rect.x1 = x + dPriv->x;
--	 rect.y1 = (dPriv->h - y - h) + dPriv->y;
--	 rect.x2 = rect.x1 + w;
--	 rect.y2 = rect.y1 + h;
--         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
--	 r200CopyBuffer( dPriv, &rect );
--      }
--   }
--   else {
--      /* XXX this shouldn't be an error but we can't handle it for now */
--      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
--   }
--}
--
--/* Force the context `c' to be the current context and associate with it
-- * buffer `b'.
-- */
--GLboolean
--r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
--                   __DRIdrawablePrivate *driDrawPriv,
--                   __DRIdrawablePrivate *driReadPriv )
--{
--   if ( driContextPriv ) {
--      r200ContextPtr newCtx = 
--	 (r200ContextPtr) driContextPriv->driverPrivate;
--
--      if (R200_DEBUG & DEBUG_DRI)
--	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)newCtx->glCtx);
--
--      newCtx->dri.readable = driReadPriv;
--
--      if ( newCtx->dri.drawable != driDrawPriv ||
--           newCtx->lastStamp != driDrawPriv->lastStamp ) {
--	 if (driDrawPriv->swap_interval == (unsigned)-1) {
--	    driDrawPriv->vblFlags = (newCtx->r200Screen->irq != 0)
--	       ? driGetDefaultVBlankFlags(&newCtx->optionCache)
--	       : VBLANK_FLAG_NO_IRQ;
--
--	    driDrawableInitVBlank( driDrawPriv );
--	 }
--
--	 newCtx->dri.drawable = driDrawPriv;
--
--	 r200SetCliprects(newCtx);
--	 r200UpdateViewportOffset( newCtx->glCtx );
--      }
--
--      _mesa_make_current( newCtx->glCtx,
--			  (GLframebuffer *) driDrawPriv->driverPrivate,
--			  (GLframebuffer *) driReadPriv->driverPrivate );
--
--      _mesa_update_state( newCtx->glCtx );
--      r200ValidateState( newCtx->glCtx );
--
--   } else {
--      if (R200_DEBUG & DEBUG_DRI)
--	 fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
--      _mesa_make_current( NULL, NULL, NULL );
--   }
--
--   if (R200_DEBUG & DEBUG_DRI)
--      fprintf(stderr, "End %s\n", __FUNCTION__);
--   return GL_TRUE;
--}
--
- /* Force the context `c' to be unbound from its buffer.
-  */
- GLboolean
-@@ -703,7 +541,7 @@ r200UnbindContext( __DRIcontextPrivate *driContextPriv )
-    r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate;
- 
-    if (R200_DEBUG & DEBUG_DRI)
--      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->glCtx);
-+      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->radeon.glCtx);
- 
-    return GL_TRUE;
- }
-diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h
-index 14a1dda..fcbe725 100644
---- a/src/mesa/drivers/dri/r200/r200_context.h
-+++ b/src/mesa/drivers/dri/r200/r200_context.h
-@@ -53,51 +53,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #error This driver requires a newer libdrm to compile
- #endif
- 
-+#include "radeon_screen.h"
-+#include "radeon_common.h"
-+
-+#include "radeon_lock.h"
-+
- struct r200_context;
- typedef struct r200_context r200ContextRec;
- typedef struct r200_context *r200ContextPtr;
- 
--/* This union is used to avoid warnings/miscompilation
--   with float to uint32_t casts due to strict-aliasing */
--typedef union { GLfloat f; uint32_t ui32; } float_ui32_type;
--
--#include "r200_lock.h"
--#include "radeon_screen.h"
- #include "main/mm.h"
- 
--/* Flags for software fallback cases */
--/* See correponding strings in r200_swtcl.c */
--#define R200_FALLBACK_TEXTURE           0x01
--#define R200_FALLBACK_DRAW_BUFFER       0x02
--#define R200_FALLBACK_STENCIL           0x04
--#define R200_FALLBACK_RENDER_MODE       0x08
--#define R200_FALLBACK_DISABLE           0x10
--#define R200_FALLBACK_BORDER_MODE       0x20
--
--/* The blit width for texture uploads
-- */
--#define BLIT_WIDTH_BYTES 1024
--
--/* Use the templated vertex format:
-- */
--#define COLOR_IS_RGBA
--#define TAG(x) r200##x
--#include "tnl_dd/t_dd_vertex.h"
--#undef TAG
--
--typedef void (*r200_tri_func)( r200ContextPtr,
--				 r200Vertex *,
--				 r200Vertex *,
--				 r200Vertex * );
--
--typedef void (*r200_line_func)( r200ContextPtr,
--				  r200Vertex *,
--				  r200Vertex * );
--
--typedef void (*r200_point_func)( r200ContextPtr,
--				   r200Vertex * );
--
--
- struct r200_vertex_program {
-         struct gl_vertex_program mesa_program; /* Must be first */
-         int translated;
-@@ -112,93 +78,11 @@ struct r200_vertex_program {
-         int fogmode;
- };
- 
--struct r200_colorbuffer_state {
--   GLuint clear;
--#if 000
--   GLint drawOffset, drawPitch;
--#endif
--   int roundEnable;
--};
--
--
--struct r200_depthbuffer_state {
--   GLuint clear;
--   GLfloat scale;
--};
--
--#if 000
--struct r200_pixel_state {
--   GLint readOffset, readPitch;
--};
--#endif
--
--struct r200_scissor_state {
--   drm_clip_rect_t rect;
--   GLboolean enabled;
--
--   GLuint numClipRects;			/* Cliprects active */
--   GLuint numAllocedClipRects;		/* Cliprects available */
--   drm_clip_rect_t *pClipRects;
--};
--
--struct r200_stencilbuffer_state {
--   GLboolean hwBuffer;
--   GLuint clear;			/* rb3d_stencilrefmask value */
--};
--
--struct r200_stipple_state {
--   GLuint mask[32];
--};
--
--
--
--#define TEX_0   0x1
--#define TEX_1   0x2
--#define TEX_2	0x4
--#define TEX_3	0x8
--#define TEX_4	0x10
--#define TEX_5	0x20
--#define TEX_ALL 0x3f
--
--typedef struct r200_tex_obj r200TexObj, *r200TexObjPtr;
--
--/* Texture object in locally shared texture space.
-- */
--struct r200_tex_obj {
--   driTextureObject   base;
--
--   GLuint bufAddr;			/* Offset to start of locally
--					   shared texture block */
--
--   GLuint dirty_state;		        /* Flags (1 per texunit) for
--					   whether or not this texobj
--					   has dirty hardware state
--					   (pp_*) that needs to be
--					   brought into the
--					   texunit. */
--
--   drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
--					/* Six, for the cube faces */
--   GLboolean image_override;		/* Image overridden by GLX_EXT_tfp */
--
--   GLuint pp_txfilter;		        /* hardware register values */
--   GLuint pp_txformat;
--   GLuint pp_txformat_x;
--   GLuint pp_txoffset;		        /* Image location in texmem.
--					   All cube faces follow. */
--   GLuint pp_txsize;		        /* npot only */
--   GLuint pp_txpitch;		        /* npot only */
--   GLuint pp_border_color;
--   GLuint pp_cubic_faces;	        /* cube face 1,2,3,4 log2 sizes */
--
--   GLboolean  border_fallback;
--
--   GLuint tile_bits;			/* hw texture tile bits used on this texture */
--};
-+#define R200_TEX_ALL 0x3f
- 
- 
- struct r200_texture_env_state {
--   r200TexObjPtr texobj;
-+   radeonTexObjPtr texobj;
-    GLuint outputreg;
-    GLuint unitneeded;
- };
-@@ -210,19 +94,6 @@ struct r200_texture_state {
- };
- 
- 
--struct r200_state_atom {
--   struct r200_state_atom *next, *prev;
--   const char *name;		         /* for debug */
--   int cmd_size;		         /* size in bytes */
--   GLuint idx;
--   int *cmd;			         /* one or more cmd's */
--   int *lastcmd;			 /* one or more cmd's */
--   GLboolean dirty;
--   GLboolean (*check)( GLcontext *, int );    /* is this state active? */
--};
--   
--
--
- /* Trying to keep these relatively short as the variables are becoming
-  * extravagently long.  Drop the driver name prefix off the front of
-  * everything - I think we know which driver we're in by now, and keep the
-@@ -597,181 +468,85 @@ struct r200_state_atom {
- 
- 
- struct r200_hw_state {
--   /* Head of the linked list of state atoms. */
--   struct r200_state_atom atomlist;
--
-    /* Hardware state, stored as cmdbuf commands:  
-     *   -- Need to doublebuffer for
-     *           - reviving state after loss of context
-     *           - eliding noop statechange loops? (except line stipple count)
-     */
--   struct r200_state_atom ctx;
--   struct r200_state_atom set;
--   struct r200_state_atom vte;
--   struct r200_state_atom lin;
--   struct r200_state_atom msk;
--   struct r200_state_atom vpt;
--   struct r200_state_atom vap;
--   struct r200_state_atom vtx;
--   struct r200_state_atom tcl;
--   struct r200_state_atom msl;
--   struct r200_state_atom tcg;
--   struct r200_state_atom msc;
--   struct r200_state_atom cst;
--   struct r200_state_atom tam;
--   struct r200_state_atom tf;
--   struct r200_state_atom tex[6];
--   struct r200_state_atom cube[6];
--   struct r200_state_atom zbs;
--   struct r200_state_atom mtl[2];
--   struct r200_state_atom mat[9];
--   struct r200_state_atom lit[8]; /* includes vec, scl commands */
--   struct r200_state_atom ucp[6];
--   struct r200_state_atom pix[6]; /* pixshader stages */
--   struct r200_state_atom eye; /* eye pos */
--   struct r200_state_atom grd; /* guard band clipping */
--   struct r200_state_atom fog;
--   struct r200_state_atom glt;
--   struct r200_state_atom prf;
--   struct r200_state_atom afs[2];
--   struct r200_state_atom pvs;
--   struct r200_state_atom vpi[2];
--   struct r200_state_atom vpp[2];
--   struct r200_state_atom atf;
--   struct r200_state_atom spr;
--   struct r200_state_atom ptp;
--
--   int max_state_size;	/* Number of bytes necessary for a full state emit. */
--   GLboolean is_dirty, all_dirty;
-+   struct radeon_state_atom ctx;
-+   struct radeon_state_atom set;
-+   struct radeon_state_atom vte;
-+   struct radeon_state_atom lin;
-+   struct radeon_state_atom msk;
-+   struct radeon_state_atom vpt;
-+   struct radeon_state_atom vap;
-+   struct radeon_state_atom vtx;
-+   struct radeon_state_atom tcl;
-+   struct radeon_state_atom msl;
-+   struct radeon_state_atom tcg;
-+   struct radeon_state_atom msc;
-+   struct radeon_state_atom cst;
-+   struct radeon_state_atom tam;
-+   struct radeon_state_atom tf;
-+   struct radeon_state_atom tex[6];
-+   struct radeon_state_atom cube[6];
-+   struct radeon_state_atom zbs;
-+   struct radeon_state_atom mtl[2];
-+   struct radeon_state_atom mat[9];
-+   struct radeon_state_atom lit[8]; /* includes vec, scl commands */
-+   struct radeon_state_atom ucp[6];
-+   struct radeon_state_atom pix[6]; /* pixshader stages */
-+   struct radeon_state_atom eye; /* eye pos */
-+   struct radeon_state_atom grd; /* guard band clipping */
-+   struct radeon_state_atom fog;
-+   struct radeon_state_atom glt;
-+   struct radeon_state_atom prf;
-+   struct radeon_state_atom afs[2];
-+   struct radeon_state_atom pvs;
-+   struct radeon_state_atom vpi[2];
-+   struct radeon_state_atom vpp[2];
-+   struct radeon_state_atom atf;
-+   struct radeon_state_atom spr;
-+   struct radeon_state_atom ptp;
- };
- 
- struct r200_state {
-    /* Derived state for internal purposes:
-     */
--   struct r200_colorbuffer_state color;
--   struct r200_depthbuffer_state depth;
--#if 00
--   struct r200_pixel_state pixel;
--#endif
--   struct r200_scissor_state scissor;
--   struct r200_stencilbuffer_state stencil;
--   struct r200_stipple_state stipple;
-+   struct radeon_stipple_state stipple;
-    struct r200_texture_state texture;
-    GLuint envneeded;
- };
- 
--/* Need refcounting on dma buffers:
-- */
--struct r200_dma_buffer {
--   int refcount;		/* the number of retained regions in buf */
--   drmBufPtr buf;
--};
--
--#define GET_START(rvb) (rmesa->r200Screen->gart_buffer_offset +		\
--			(rvb)->address - rmesa->dma.buf0_address +	\
--			(rvb)->start)
--
--/* A retained region, eg vertices for indexed vertices.
-- */
--struct r200_dma_region {
--   struct r200_dma_buffer *buf;
--   char *address;		/* == buf->address */
--   int start, end, ptr;		/* offsets from start of buf */
--   int aos_start;
--   int aos_stride;
--   int aos_size;
--};
--
--
--struct r200_dma {
--   /* Active dma region.  Allocations for vertices and retained
--    * regions come from here.  Also used for emitting random vertices,
--    * these may be flushed by calling flush_current();
--    */
--   struct r200_dma_region current;
--   
--   void (*flush)( r200ContextPtr );
--
--   char *buf0_address;		/* start of buf[0], for index calcs */
--   GLuint nr_released_bufs;	/* flush after so many buffers released */
--};
--
--struct r200_dri_mirror {
--   __DRIcontextPrivate	*context;	/* DRI context */
--   __DRIscreenPrivate	*screen;	/* DRI screen */
--   __DRIdrawablePrivate	*drawable;	/* DRI drawable bound to this ctx */
--   __DRIdrawablePrivate	*readable;	/* DRI readable bound to this ctx */
--
--   drm_context_t hwContext;
--   drm_hw_lock_t *hwLock;
--   int fd;
--   int drmMinor;
--};
--
--
- #define R200_CMD_BUF_SZ  (16*1024) 
- 
--struct r200_store {
--   GLuint statenr;
--   GLuint primnr;
--   char cmd_buf[R200_CMD_BUF_SZ];
--   int cmd_used;   
--   int elts_start;
--};
--
--
-+#define R200_ELT_BUF_SZ  (16*1024) 
- /* r200_tcl.c
-  */
- struct r200_tcl_info {
-    GLuint hw_primitive;
- 
- /* hw can handle 12 components max */
--   struct r200_dma_region *aos_components[12];
-+  struct radeon_aos aos[12];
-    GLuint nr_aos_components;
- 
-    GLuint *Elts;
- 
--   struct r200_dma_region indexed_verts;
--   struct r200_dma_region vertex_data[15];
-+   struct radeon_bo *elt_dma_bo;
-+   int elt_dma_offset; /** Offset into this buffer object, in bytes */
-+   int elt_used;
-+
- };
- 
- 
- /* r200_swtcl.c
-  */
- struct r200_swtcl_info {
--   GLuint RenderIndex;
--   
--   /**
--    * Size of a hardware vertex.  This is calculated when \c ::vertex_attrs is
--    * installed in the Mesa state vector.
--    */
--   GLuint vertex_size;
- 
--   /**
--    * Attributes instructing the Mesa TCL pipeline where / how to put vertex
--    * data in the hardware buffer.
--    */
--   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
- 
--   /**
--    * Number of elements of \c ::vertex_attrs that are actually used.
--    */
--   GLuint vertex_attr_count;
--
--   /**
--    * Cached pointer to the buffer where Mesa will store vertex data.
--    */
--   GLubyte *verts;
--
--   /* Fallback rasterization functions
--    */
--   r200_point_func draw_point;
--   r200_line_func draw_line;
--   r200_tri_func draw_tri;
--
--   GLuint hw_primitive;
--   GLenum render_primitive;
--   GLuint numverts;
-+   radeon_point_func draw_point;
-+   radeon_line_func draw_line;
-+   radeon_tri_func draw_tri;
- 
-    /**
-     * Offset of the 4UB color data within a hardware (swtcl) vertex.
-@@ -787,27 +562,10 @@ struct r200_swtcl_info {
-     * Should Mesa project vertex data or will the hardware do it?
-     */
-    GLboolean needproj;
--
--   struct r200_dma_region indexed_verts;
--};
--
--
--struct r200_ioctl {
--   GLuint vertex_offset;
--   GLuint vertex_size;
- };
- 
- 
- 
--#define R200_MAX_PRIMS 64
--
--
--
--struct r200_prim {
--   GLuint start;
--   GLuint end;
--   GLuint prim;
--};
- 
-    /* A maximum total of 29 elements per vertex:  3 floats for position, 3
-     * floats for normal, 4 floats for color, 4 bytes for secondary color,
-@@ -822,9 +580,8 @@ struct r200_prim {
- 
- #define R200_MAX_VERTEX_SIZE ((3*6)+11)
- 
--
- struct r200_context {
--   GLcontext *glCtx;			/* Mesa context */
-+   struct radeon_context radeon;
- 
-    /* Driver and hardware state management
-     */
-@@ -832,56 +589,15 @@ struct r200_context {
-    struct r200_state state;
-    struct r200_vertex_program *curr_vp_hw;
- 
--   /* Texture object bookkeeping
--    */
--   unsigned              nr_heaps;
--   driTexHeap          * texture_heaps[ RADEON_NR_TEX_HEAPS ];
--   driTextureObject      swapped;
--   int                   texture_depth;
--   float                 initialMaxAnisotropy;
--
--   /* Rasterization and vertex state:
--    */
--   GLuint TclFallback;
--   GLuint Fallback;
--   GLuint NewGLState;
--   DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
--
-    /* Vertex buffers
-     */
--   struct r200_ioctl ioctl;
--   struct r200_dma dma;
--   struct r200_store store;
--   /* A full state emit as of the first state emit in the main store, in case
--    * the context is lost.
--    */
--   struct r200_store backup_store;
--
--   /* Page flipping
--    */
--   GLuint doPageFlip;
--
--   /* Busy waiting
--    */
--   GLuint do_usleeps;
--   GLuint do_irqs;
--   GLuint irqsEmitted;
--   drm_radeon_irq_wait_t iw;
-+   struct radeon_ioctl ioctl;
-+   struct radeon_store store;
- 
-    /* Clientdata textures;
-     */
-    GLuint prefer_gart_client_texturing;
- 
--   /* Drawable, cliprect and scissor information
--    */
--   GLuint numClipRects;			/* Cliprects for the draw buffer */
--   drm_clip_rect_t *pClipRects;
--   unsigned int lastStamp;
--   GLboolean lost_context;
--   GLboolean save_on_next_emit;
--   radeonScreenPtr r200Screen;	/* Screen private DRI data */
--   drm_radeon_sarea_t *sarea;		/* Private SAREA data */
--
-    /* TCL stuff
-     */
-    GLmatrix TexGenMatrix[R200_MAX_TEXTURE_UNITS];
-@@ -893,15 +609,6 @@ struct r200_context {
-    GLuint TexGenCompSel;
-    GLmatrix tmpmat;
- 
--   /* buffer swap
--    */
--   int64_t swap_ust;
--   int64_t swap_missed_ust;
--
--   GLuint swap_count;
--   GLuint swap_missed_count;
--
--
-    /* r200_tcl.c
-     */
-    struct r200_tcl_info tcl;
-@@ -910,14 +617,6 @@ struct r200_context {
-     */
-    struct r200_swtcl_info swtcl;
- 
--   /* Mirrors of some DRI state
--    */
--   struct r200_dri_mirror dri;
--
--   /* Configuration cache
--    */
--   driOptionCache optionCache;
--
-    GLboolean using_hyperz;
-    GLboolean texmicrotile;
- 
-@@ -927,28 +626,10 @@ struct r200_context {
- #define R200_CONTEXT(ctx)		((r200ContextPtr)(ctx->DriverCtx))
- 
- 
--static INLINE GLuint r200PackColor( GLuint cpp,
--					GLubyte r, GLubyte g,
--					GLubyte b, GLubyte a )
--{
--   switch ( cpp ) {
--   case 2:
--      return PACK_COLOR_565( r, g, b );
--   case 4:
--      return PACK_COLOR_8888( a, r, g, b );
--   default:
--      return 0;
--   }
--}
--
--
- extern void r200DestroyContext( __DRIcontextPrivate *driContextPriv );
- extern GLboolean r200CreateContext( const __GLcontextModes *glVisual,
- 				    __DRIcontextPrivate *driContextPriv,
- 				    void *sharedContextPrivate);
--extern void r200SwapBuffers( __DRIdrawablePrivate *dPriv );
--extern void r200CopySubBuffer( __DRIdrawablePrivate * dPriv,
--			       int x, int y, int w, int h );
- extern GLboolean r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
- 				  __DRIdrawablePrivate *driDrawPriv,
- 				  __DRIdrawablePrivate *driReadPriv );
-@@ -957,28 +638,9 @@ extern GLboolean r200UnbindContext( __DRIcontextPrivate *driContextPriv );
- /* ================================================================
-  * Debugging:
-  */
--#define DO_DEBUG		1
- 
--#if DO_DEBUG
--extern int R200_DEBUG;
--#else
--#define R200_DEBUG		0
--#endif
-+#define R200_DEBUG RADEON_DEBUG
-+
- 
--#define DEBUG_TEXTURE	0x001
--#define DEBUG_STATE	0x002
--#define DEBUG_IOCTL	0x004
--#define DEBUG_PRIMS	0x008
--#define DEBUG_VERTS	0x010
--#define DEBUG_FALLBACKS	0x020
--#define DEBUG_VFMT	0x040
--#define DEBUG_CODEGEN	0x080
--#define DEBUG_VERBOSE	0x100
--#define DEBUG_DRI       0x200
--#define DEBUG_DMA       0x400
--#define DEBUG_SANITY    0x800
--#define DEBUG_SYNC      0x1000
--#define DEBUG_PIXEL     0x2000
--#define DEBUG_MEMORY    0x4000
- 
- #endif /* __R200_CONTEXT_H__ */
-diff --git a/src/mesa/drivers/dri/r200/r200_fragshader.c b/src/mesa/drivers/dri/r200/r200_fragshader.c
-index d514b28..85c1b7b 100644
---- a/src/mesa/drivers/dri/r200/r200_fragshader.c
-+++ b/src/mesa/drivers/dri/r200/r200_fragshader.c
-@@ -522,7 +522,7 @@ static void r200UpdateFSConstants( GLcontext *ctx )
- 	 CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.GlobalConstants[i][2]);
- 	 CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.GlobalConstants[i][3]);
-       }
--      rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor (
-+      rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = radeonPackColor (
- 	 4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] );
-    }
- }
-diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
-index 0741e57..c08968f 100644
---- a/src/mesa/drivers/dri/r200/r200_ioctl.c
-+++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
-@@ -41,6 +41,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/context.h"
- #include "swrast/swrast.h"
- 
-+#include "radeon_common.h"
-+#include "radeon_lock.h"
- #include "r200_context.h"
- #include "r200_state.h"
- #include "r200_ioctl.h"
-@@ -54,635 +56,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define R200_TIMEOUT             512
- #define R200_IDLE_RETRY           16
- 
--
--static void r200WaitForIdle( r200ContextPtr rmesa );
--
--
--/* At this point we were in FlushCmdBufLocked but we had lost our context, so
-- * we need to unwire our current cmdbuf, hook the one with the saved state in
-- * it, flush it, and then put the current one back.  This is so commands at the
-- * start of a cmdbuf can rely on the state being kept from the previous one.
-- */
--static void r200BackUpAndEmitLostStateLocked( r200ContextPtr rmesa )
--{
--   GLuint nr_released_bufs;
--   struct r200_store saved_store;
--
--   if (rmesa->backup_store.cmd_used == 0)
--      return;
--
--   if (R200_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "Emitting backup state on lost context\n");
--
--   rmesa->lost_context = GL_FALSE;
--
--   nr_released_bufs = rmesa->dma.nr_released_bufs;
--   saved_store = rmesa->store;
--   rmesa->dma.nr_released_bufs = 0;
--   rmesa->store = rmesa->backup_store;
--   r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
--   rmesa->dma.nr_released_bufs = nr_released_bufs;
--   rmesa->store = saved_store;
--}
--
--int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller )
--{
--   int ret, i;
--   drm_radeon_cmd_buffer_t cmd;
--
--   if (rmesa->lost_context)
--      r200BackUpAndEmitLostStateLocked( rmesa );
--
--   if (R200_DEBUG & DEBUG_IOCTL) {
--      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
--
--      if (0 & R200_DEBUG & DEBUG_VERBOSE) 
--	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
--	    fprintf(stderr, "%d: %x\n", i/4, 
--		    *(int *)(&rmesa->store.cmd_buf[i]));
--   }
--
--   if (R200_DEBUG & DEBUG_DMA)
--      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
--	      rmesa->dma.nr_released_bufs);
--
--
--   if (R200_DEBUG & DEBUG_SANITY) {
--      if (rmesa->state.scissor.enabled) 
--	 ret = r200SanityCmdBuffer( rmesa, 
--				    rmesa->state.scissor.numClipRects,
--				    rmesa->state.scissor.pClipRects);
--      else
--	 ret = r200SanityCmdBuffer( rmesa, 
--				    rmesa->numClipRects,
--				    rmesa->pClipRects);
--      if (ret) {
--	 fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);	 
--	 goto out;
--      }
--   }
--
--
--   if (R200_DEBUG & DEBUG_MEMORY) {
--      if (! driValidateTextureHeaps( rmesa->texture_heaps, rmesa->nr_heaps,
--				     & rmesa->swapped ) ) {
--	 fprintf( stderr, "%s: texture memory is inconsistent - expect "
--		  "mangled textures\n", __FUNCTION__ );
--      }
--   }
--
--
--   cmd.bufsz = rmesa->store.cmd_used;
--   cmd.buf = rmesa->store.cmd_buf;
--
--   if (rmesa->state.scissor.enabled) {
--      cmd.nbox = rmesa->state.scissor.numClipRects;
--      cmd.boxes = (drm_clip_rect_t *)rmesa->state.scissor.pClipRects;
--   } else {
--      cmd.nbox = rmesa->numClipRects;
--      cmd.boxes = (drm_clip_rect_t *)rmesa->pClipRects;
--   }
--
--   ret = drmCommandWrite( rmesa->dri.fd,
--			  DRM_RADEON_CMDBUF,
--			  &cmd, sizeof(cmd) );
--
--   if (ret)
--      fprintf(stderr, "drmCommandWrite: %d\n", ret);
--
--   if (R200_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
--      r200WaitForIdleLocked( rmesa );
--   }
--
--
-- out:
--   rmesa->store.primnr = 0;
--   rmesa->store.statenr = 0;
--   rmesa->store.cmd_used = 0;
--   rmesa->dma.nr_released_bufs = 0;
--   rmesa->save_on_next_emit = 1;
--
--   return ret;
--}
--
--
--/* Note: does not emit any commands to avoid recursion on
-- * r200AllocCmdBuf.
-- */
--void r200FlushCmdBuf( r200ContextPtr rmesa, const char *caller )
--{
--   int ret;
--
--   LOCK_HARDWARE( rmesa );
--
--   ret = r200FlushCmdBufLocked( rmesa, caller );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if (ret) {
--      fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret);
--      exit(ret);
--   }
--}
--
--
--/* =============================================================
-- * Hardware vertex buffer handling
-- */
--
--
--void r200RefillCurrentDmaRegion( r200ContextPtr rmesa )
--{
--   struct r200_dma_buffer *dmabuf;
--   int fd = rmesa->dri.fd;
--   int index = 0;
--   int size = 0;
--   drmDMAReq dma;
--   int ret;
--
--   if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
--      fprintf(stderr, "%s\n", __FUNCTION__);  
--
--   if (rmesa->dma.flush) {
--      rmesa->dma.flush( rmesa );
--   }
--
--   if (rmesa->dma.current.buf)
--      r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
--
--   if (rmesa->dma.nr_released_bufs > 4)
--      r200FlushCmdBuf( rmesa, __FUNCTION__ );
--
--   dma.context = rmesa->dri.hwContext;
--   dma.send_count = 0;
--   dma.send_list = NULL;
--   dma.send_sizes = NULL;
--   dma.flags = 0;
--   dma.request_count = 1;
--   dma.request_size = RADEON_BUFFER_SIZE;
--   dma.request_list = &index;
--   dma.request_sizes = &size;
--   dma.granted_count = 0;
--
--   LOCK_HARDWARE(rmesa);	/* no need to validate */
--
--   while (1) {
--      ret = drmDMA( fd, &dma );
--      if (ret == 0)
--	 break;
--   
--      if (rmesa->dma.nr_released_bufs) {
--	 r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
--      }
--
--      if (rmesa->do_usleeps) {
--	 UNLOCK_HARDWARE( rmesa );
--	 DO_USLEEP( 1 );
--	 LOCK_HARDWARE( rmesa );
--      }
--   }
--
--   UNLOCK_HARDWARE(rmesa);
--
--   if (R200_DEBUG & DEBUG_DMA)
--      fprintf(stderr, "Allocated buffer %d\n", index);
--
--   dmabuf = CALLOC_STRUCT( r200_dma_buffer );
--   dmabuf->buf = &rmesa->r200Screen->buffers->list[index];
--   dmabuf->refcount = 1;
--
--   rmesa->dma.current.buf = dmabuf;
--   rmesa->dma.current.address = dmabuf->buf->address;
--   rmesa->dma.current.end = dmabuf->buf->total;
--   rmesa->dma.current.start = 0;
--   rmesa->dma.current.ptr = 0;
--}
--
--void r200ReleaseDmaRegion( r200ContextPtr rmesa,
--			     struct r200_dma_region *region,
--			     const char *caller )
--{
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
--   
--   if (!region->buf)
--      return;
--
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
--
--   if (--region->buf->refcount == 0) {
--      drm_radeon_cmd_header_t *cmd;
--
--      if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
--	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
--		 region->buf->buf->idx);  
--      
--      cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sizeof(*cmd), 
--						     __FUNCTION__ );
--      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
--      cmd->dma.buf_idx = region->buf->buf->idx;
--      FREE(region->buf);
--      rmesa->dma.nr_released_bufs++;
--   }
--
--   region->buf = NULL;
--   region->start = 0;
--}
--
--/* Allocates a region from rmesa->dma.current.  If there isn't enough
-- * space in current, grab a new buffer (and discard what was left of current)
-- */
--void r200AllocDmaRegion( r200ContextPtr rmesa, 
--			   struct r200_dma_region *region,
--			   int bytes,
--			   int alignment )
--{
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
--
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
--
--   if (region->buf)
--      r200ReleaseDmaRegion( rmesa, region, __FUNCTION__ );
--
--   alignment--;
--   rmesa->dma.current.start = rmesa->dma.current.ptr = 
--      (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
--      r200RefillCurrentDmaRegion( rmesa );
--
--   region->start = rmesa->dma.current.start;
--   region->ptr = rmesa->dma.current.start;
--   region->end = rmesa->dma.current.start + bytes;
--   region->address = rmesa->dma.current.address;
--   region->buf = rmesa->dma.current.buf;
--   region->buf->refcount++;
--
--   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
--   rmesa->dma.current.start = 
--      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
--
--   assert( rmesa->dma.current.ptr <= rmesa->dma.current.end );
--}
--
--/* ================================================================
-- * SwapBuffers with client-side throttling
-- */
--
--static uint32_t r200GetLastFrame(r200ContextPtr rmesa)
--{
--   drm_radeon_getparam_t gp;
--   int ret;
--   uint32_t frame;
--
--   gp.param = RADEON_PARAM_LAST_FRAME;
--   gp.value = (int *)&frame;
--   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
--			      &gp, sizeof(gp) );
--   if ( ret ) {
--      fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--
--   return frame;
--}
--
--static void r200EmitIrqLocked( r200ContextPtr rmesa )
--{
--   drm_radeon_irq_emit_t ie;
--   int ret;
--
--   ie.irq_seq = &rmesa->iw.irq_seq;
--   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, 
--			      &ie, sizeof(ie) );
--   if ( ret ) {
--      fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--}
--
--
--static void r200WaitIrq( r200ContextPtr rmesa )
--{
--   int ret;
--
--   do {
--      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
--			     &rmesa->iw, sizeof(rmesa->iw) );
--   } while (ret && (errno == EINTR || errno == EBUSY));
--
--   if ( ret ) {
--      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--}
--
--
--static void r200WaitForFrameCompletion( r200ContextPtr rmesa )
--{
--   drm_radeon_sarea_t *sarea = rmesa->sarea;
--
--   if (rmesa->do_irqs) {
--      if (r200GetLastFrame(rmesa) < sarea->last_frame) {
--	 if (!rmesa->irqsEmitted) {
--	    while (r200GetLastFrame (rmesa) < sarea->last_frame)
--	       ;
--	 }
--	 else {
--	    UNLOCK_HARDWARE( rmesa ); 
--	    r200WaitIrq( rmesa );	
--	    LOCK_HARDWARE( rmesa ); 
--	 }
--	 rmesa->irqsEmitted = 10;
--      }
--
--      if (rmesa->irqsEmitted) {
--	 r200EmitIrqLocked( rmesa );
--	 rmesa->irqsEmitted--;
--      }
--   } 
--   else {
--      while (r200GetLastFrame (rmesa) < sarea->last_frame) {
--	 UNLOCK_HARDWARE( rmesa ); 
--	 if (rmesa->do_usleeps) 
--	    DO_USLEEP( 1 );
--	 LOCK_HARDWARE( rmesa ); 
--      }
--   }
--}
--
--
--
--/* Copy the back color buffer to the front color buffer.
-- */
--void r200CopyBuffer( __DRIdrawablePrivate *dPriv,
--		      const drm_clip_rect_t	 *rect)
--{
--   r200ContextPtr rmesa;
--   GLint nbox, i, ret;
--   GLboolean   missed_target;
--   int64_t ust;
--   __DRIscreenPrivate *psp = dPriv->driScreenPriv;
--
--   assert(dPriv);
--   assert(dPriv->driContextPriv);
--   assert(dPriv->driContextPriv->driverPrivate);
--
--   rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
--
--   if ( R200_DEBUG & DEBUG_IOCTL ) {
--      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *)rmesa->glCtx );
--   }
--
--   R200_FIREVERTICES( rmesa );
--
--   LOCK_HARDWARE( rmesa );
--
--
--   /* Throttle the frame rate -- only allow one pending swap buffers
--    * request at a time.
--    */
--   r200WaitForFrameCompletion( rmesa );
--   if (!rect)
--   {
--       UNLOCK_HARDWARE( rmesa );
--       driWaitForVBlank( dPriv, & missed_target );
--       LOCK_HARDWARE( rmesa );
--   }
--
--   nbox = dPriv->numClipRects; /* must be in locked region */
--
--   for ( i = 0 ; i < nbox ; ) {
--      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
--      drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
--      GLint n = 0;
--
--      for ( ; i < nr ; i++ ) {
--
--	  *b = box[i];
--
--	  if (rect)
--	  {
--	     if (rect->x1 > b->x1)
--		 b->x1 = rect->x1;
--	     if (rect->y1 > b->y1)
--		 b->y1 = rect->y1;
--	     if (rect->x2 < b->x2)
--		 b->x2 = rect->x2;
--	     if (rect->y2 < b->y2)
--		 b->y2 = rect->y2;
--
--	     if (b->x1 >= b->x2 || b->y1 >= b->y2)
--		 continue;
--	  }
--
--	  b++;
--	  n++;
--      }
--      rmesa->sarea->nbox = n;
--
--      if (!n)
--	 continue;
--
--      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
--
--      if ( ret ) {
--	 fprintf( stderr, "DRM_R200_SWAP_BUFFERS: return = %d\n", ret );
--	 UNLOCK_HARDWARE( rmesa );
--	 exit( 1 );
--      }
--   }
--
--   UNLOCK_HARDWARE( rmesa );
--   if (!rect)
--   {
--       rmesa->hw.all_dirty = GL_TRUE;
--
--       rmesa->swap_count++;
--       (*psp->systemTime->getUST)( & ust );
--       if ( missed_target ) {
--	   rmesa->swap_missed_count++;
--	   rmesa->swap_missed_ust = ust - rmesa->swap_ust;
--       }
--
--       rmesa->swap_ust = ust;
--
--       sched_yield();
--   }
--}
--
--void r200PageFlip( __DRIdrawablePrivate *dPriv )
-+static void r200UserClear(GLcontext *ctx, GLuint flags)
- {
--   r200ContextPtr rmesa;
--   GLint ret;
--   GLboolean   missed_target;
--   __DRIscreenPrivate *psp = dPriv->driScreenPriv;
--
--   assert(dPriv);
--   assert(dPriv->driContextPriv);
--   assert(dPriv->driContextPriv->driverPrivate);
-+   if (flags & (RADEON_FRONT | RADEON_BACK)) {
- 
--   rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
- 
--   if ( R200_DEBUG & DEBUG_IOCTL ) {
--      fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
--	      rmesa->sarea->pfCurrentPage);
--   }
--
--   R200_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
--
--   if (!dPriv->numClipRects) {
--      UNLOCK_HARDWARE( rmesa );
--      usleep( 10000 );		/* throttle invisible client 10ms */
--      return;
-    }
-+	  
-+   if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
-+       && (flags & RADEON_CLEAR_FASTZ)) {
- 
--   /* Need to do this for the perf box placement:
--    */
--   {
--      drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
--      b[0] = box[0];
--      rmesa->sarea->nbox = 1;
--   }
--
--   /* Throttle the frame rate -- only allow a few pending swap buffers
--    * request at a time.
--    */
--   r200WaitForFrameCompletion( rmesa );
--   UNLOCK_HARDWARE( rmesa );
--   driWaitForVBlank( dPriv, & missed_target );
--   if ( missed_target ) {
--      rmesa->swap_missed_count++;
--      (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
-    }
--   LOCK_HARDWARE( rmesa );
- 
--   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if ( ret ) {
--      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
--      exit( 1 );
--   }
--
--   rmesa->swap_count++;
--   (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
--
--#if 000
--   if ( rmesa->sarea->pfCurrentPage == 1 ) {
--	 rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
--	 rmesa->state.color.drawPitch  = rmesa->r200Screen->frontPitch;
--   } else {
--	 rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
--	 rmesa->state.color.drawPitch  = rmesa->r200Screen->backPitch;
--   }
--
--   R200_STATECHANGE( rmesa, ctx );
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset
--					   + rmesa->r200Screen->fbLocation;
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
--   if (rmesa->sarea->tiling_enabled) {
--      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
--   }
--#else
--   /* Get ready for drawing next frame.  Update the renderbuffers'
--    * flippedOffset/Pitch fields so we draw into the right place.
--    */
--   driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
--                        rmesa->sarea->pfCurrentPage);
--
--
--   r200UpdateDrawBuffer(rmesa->glCtx);
--#endif
- }
- 
--
--/* ================================================================
-- * Buffer clear
-- */
--static void r200Clear( GLcontext *ctx, GLbitfield mask )
-+static void r200KernelClear(GLcontext *ctx, GLuint flags)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--   GLuint flags = 0;
--   GLuint color_mask = 0;
--   GLint ret, i;
--   GLint cx, cy, cw, ch;
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-+   GLint cx, cy, cw, ch, ret;
-+   GLuint i;
- 
--   if ( R200_DEBUG & DEBUG_IOCTL ) {
--      fprintf( stderr, "r200Clear\n");
--   }
--
--   {
--      LOCK_HARDWARE( rmesa );
--      UNLOCK_HARDWARE( rmesa );
--      if ( dPriv->numClipRects == 0 ) 
--	 return;
--   }
--
--   r200Flush( ctx );
--
--   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
--      flags |= RADEON_FRONT;
--      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
--      mask &= ~BUFFER_BIT_FRONT_LEFT;
--   }
--
--   if ( mask & BUFFER_BIT_BACK_LEFT ) {
--      flags |= RADEON_BACK;
--      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
--      mask &= ~BUFFER_BIT_BACK_LEFT;
--   }
--
--   if ( mask & BUFFER_BIT_DEPTH ) {
--      flags |= RADEON_DEPTH;
--      mask &= ~BUFFER_BIT_DEPTH;
--   }
--
--   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
--      flags |= RADEON_STENCIL;
--      mask &= ~BUFFER_BIT_STENCIL;
--   }
--
--   if ( mask ) {
--      if (R200_DEBUG & DEBUG_FALLBACKS)
--	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
--      _swrast_Clear( ctx, mask );
--   }
--
--   if ( !flags ) 
--      return;
--
--   if (rmesa->using_hyperz) {
--      flags |= RADEON_USE_COMP_ZBUF;
--/*      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200)
--	 flags |= RADEON_USE_HIERZ; */
--      if (!(rmesa->state.stencil.hwBuffer) ||
--	 ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
--	    ((rmesa->state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
--	  flags |= RADEON_CLEAR_FASTZ;
--      }
--   }
--
--   LOCK_HARDWARE( rmesa );
--
--   /* compute region after locking: */
--   cx = ctx->DrawBuffer->_Xmin;
--   cy = ctx->DrawBuffer->_Ymin;
--   cw = ctx->DrawBuffer->_Xmax - cx;
--   ch = ctx->DrawBuffer->_Ymax - cy;
--
--   /* Flip top to bottom */
--   cx += dPriv->x;
--   cy  = dPriv->y + dPriv->h - cy - ch;
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    /* Throttle the number of clear ioctls we do.
-     */
-@@ -693,7 +88,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
- 
-       gp.param = RADEON_PARAM_LAST_CLEAR;
-       gp.value = (int *)&clear;
--      ret = drmCommandWriteRead( rmesa->dri.fd,
-+      ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
- 		      DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
- 
-       if ( ret ) {
-@@ -703,24 +98,34 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
- 
-       /* Clear throttling needs more thought.
-        */
--      if ( rmesa->sarea->last_clear - clear <= 25 ) {
-+      if ( rmesa->radeon.sarea->last_clear - clear <= 25 ) {
- 	 break;
-       }
-       
--      if (rmesa->do_usleeps) {
--	 UNLOCK_HARDWARE( rmesa );
-+      if (rmesa->radeon.do_usleeps) {
-+	 UNLOCK_HARDWARE( &rmesa->radeon );
- 	 DO_USLEEP( 1 );
--	 LOCK_HARDWARE( rmesa );
-+	 LOCK_HARDWARE( &rmesa->radeon );
-       }
-    }
- 
-    /* Send current state to the hardware */
--   r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
-+   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
-+
-+
-+  /* compute region after locking: */
-+   cx = ctx->DrawBuffer->_Xmin;
-+   cy = ctx->DrawBuffer->_Ymin;
-+   cw = ctx->DrawBuffer->_Xmax - cx;
-+   ch = ctx->DrawBuffer->_Ymax - cy;
- 
-+   /* Flip top to bottom */
-+   cx += dPriv->x;
-+   cy  = dPriv->y + dPriv->h - cy - ch;
-    for ( i = 0 ; i < dPriv->numClipRects ; ) {
-       GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
-       drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
-+      drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
-       drm_radeon_clear_t clear;
-       drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
-       GLint n = 0;
-@@ -755,17 +160,17 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
- 	 }
-       }
- 
--      rmesa->sarea->nbox = n;
-+      rmesa->radeon.sarea->nbox = n;
- 
-       clear.flags       = flags;
--      clear.clear_color = rmesa->state.color.clear;
--      clear.clear_depth = rmesa->state.depth.clear;	/* needed for hyperz */
-+      clear.clear_color = rmesa->radeon.state.color.clear;
-+      clear.clear_depth = rmesa->radeon.state.depth.clear;	/* needed for hyperz */
-       clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
--      clear.depth_mask  = rmesa->state.stencil.clear;
-+      clear.depth_mask  = rmesa->radeon.state.stencil.clear;
-       clear.depth_boxes = depth_boxes;
- 
-       n--;
--      b = rmesa->sarea->boxes;
-+      b = rmesa->radeon.sarea->boxes;
-       for ( ; n >= 0 ; n-- ) {
- 	 depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
- 	 depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
-@@ -774,83 +179,91 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
- 	 depth_boxes[n].f[CLEAR_DEPTH] = ctx->Depth.Clear;
-       }
- 
--      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
-+      ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
- 			     &clear, sizeof(clear));
- 
- 
-       if ( ret ) {
--	 UNLOCK_HARDWARE( rmesa );
-+	 UNLOCK_HARDWARE( &rmesa->radeon );
- 	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
- 	 exit( 1 );
-       }
-    }
--
--   UNLOCK_HARDWARE( rmesa );
--   rmesa->hw.all_dirty = GL_TRUE;
-+   UNLOCK_HARDWARE( &rmesa->radeon );
- }
--
--
--void r200WaitForIdleLocked( r200ContextPtr rmesa )
-+/* ================================================================
-+ * Buffer clear
-+ */
-+static void r200Clear( GLcontext *ctx, GLbitfield mask )
- {
--    int ret;
--    int i = 0;
--    
--    do {
--       ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_CP_IDLE);
--       if (ret) 
--	  DO_USLEEP( 1 );
--    } while (ret && ++i < 100);
--    
--    if ( ret < 0 ) {
--       UNLOCK_HARDWARE( rmesa );
--       fprintf( stderr, "Error: R200 timed out... exiting\n" );
--       exit( -1 );
--    }
--}
-+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-+   GLuint flags = 0;
-+   GLuint color_mask = 0;
-+   GLint ret;
- 
-+   if ( R200_DEBUG & DEBUG_IOCTL ) {
-+      fprintf( stderr, "r200Clear\n");
-+   }
- 
--static void r200WaitForIdle( r200ContextPtr rmesa )
--{
--   LOCK_HARDWARE(rmesa);
--   r200WaitForIdleLocked( rmesa );
--   UNLOCK_HARDWARE(rmesa);
--}
-+   {
-+      LOCK_HARDWARE( &rmesa->radeon );
-+      UNLOCK_HARDWARE( &rmesa->radeon );
-+      if ( dPriv->numClipRects == 0 ) 
-+	 return;
-+   }
- 
-+   radeonFlush( ctx );
- 
--void r200Flush( GLcontext *ctx )
--{
--   r200ContextPtr rmesa = R200_CONTEXT( ctx );
-+   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
-+      flags |= RADEON_FRONT;
-+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
-+      mask &= ~BUFFER_BIT_FRONT_LEFT;
-+   }
- 
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s\n", __FUNCTION__);
-+   if ( mask & BUFFER_BIT_BACK_LEFT ) {
-+      flags |= RADEON_BACK;
-+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
-+      mask &= ~BUFFER_BIT_BACK_LEFT;
-+   }
- 
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
-+   if ( mask & BUFFER_BIT_DEPTH ) {
-+      flags |= RADEON_DEPTH;
-+      mask &= ~BUFFER_BIT_DEPTH;
-+   }
- 
--   r200EmitState( rmesa );
--   
--   if (rmesa->store.cmd_used)
--      r200FlushCmdBuf( rmesa, __FUNCTION__ );
--}
-+   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->radeon.state.stencil.hwBuffer ) {
-+      flags |= RADEON_STENCIL;
-+      mask &= ~BUFFER_BIT_STENCIL;
-+   }
- 
--/* Make sure all commands have been sent to the hardware and have
-- * completed processing.
-- */
--void r200Finish( GLcontext *ctx )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   r200Flush( ctx );
-+   if ( mask ) {
-+      if (R200_DEBUG & DEBUG_FALLBACKS)
-+	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
-+      _swrast_Clear( ctx, mask );
-+   }
-+
-+   if ( !flags ) 
-+      return;
- 
--   if (rmesa->do_irqs) {
--      LOCK_HARDWARE( rmesa );
--      r200EmitIrqLocked( rmesa );
--      UNLOCK_HARDWARE( rmesa );
--      r200WaitIrq( rmesa );
-+   if (rmesa->using_hyperz) {
-+      flags |= RADEON_USE_COMP_ZBUF;
-+/*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
-+	 flags |= RADEON_USE_HIERZ; */
-+      if (!(rmesa->radeon.state.stencil.hwBuffer) ||
-+	 ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
-+	    ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
-+	  flags |= RADEON_CLEAR_FASTZ;
-+      }
-    }
--   else 
--      r200WaitForIdle( rmesa );
--}
- 
-+   if (rmesa->radeon.radeonScreen->kernel_mm)
-+      r200UserClear(ctx, flags);
-+   else
-+      r200KernelClear(ctx, flags);
-+
-+   rmesa->radeon.hw.all_dirty = GL_TRUE;
-+}
- 
- /* This version of AllocateMemoryMESA allocates only GART memory, and
-  * only does so after the point at which the driver has been
-@@ -875,7 +288,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
-       fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, 
- 	      writefreq, priority);
- 
--   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map)
-+   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map)
-       return NULL;
- 
-    if (getenv("R200_NO_ALLOC"))
-@@ -886,7 +299,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
-    alloc.size = size;
-    alloc.region_offset = &region_offset;
- 
--   ret = drmCommandWriteRead( rmesa->r200Screen->driScreen->fd,
-+   ret = drmCommandWriteRead( rmesa->radeon.radeonScreen->driScreen->fd,
- 			      DRM_RADEON_ALLOC,
- 			      &alloc, sizeof(alloc));
-    
-@@ -896,7 +309,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
-    }
-    
-    {
--      char *region_start = (char *)rmesa->r200Screen->gartTextures.map;
-+      char *region_start = (char *)rmesa->radeon.radeonScreen->gartTextures.map;
-       return (void *)(region_start + region_offset);
-    }
- }
-@@ -914,24 +327,24 @@ void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer)
-    if (R200_DEBUG & DEBUG_IOCTL)
-       fprintf(stderr, "%s %p\n", __FUNCTION__, pointer);
- 
--   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map) {
-+   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) {
-       fprintf(stderr, "%s: no context\n", __FUNCTION__);
-       return;
-    }
- 
--   region_offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
-+   region_offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
- 
-    if (region_offset < 0 || 
--       region_offset > rmesa->r200Screen->gartTextures.size) {
-+       region_offset > rmesa->radeon.radeonScreen->gartTextures.size) {
-       fprintf(stderr, "offset %d outside range 0..%d\n", region_offset,
--	      rmesa->r200Screen->gartTextures.size);
-+	      rmesa->radeon.radeonScreen->gartTextures.size);
-       return;
-    }
- 
-    memfree.region = RADEON_MEM_REGION_GART;
-    memfree.region_offset = region_offset;
-    
--   ret = drmCommandWrite( rmesa->r200Screen->driScreen->fd,
-+   ret = drmCommandWrite( rmesa->radeon.radeonScreen->driScreen->fd,
- 			  DRM_RADEON_FREE,
- 			  &memfree, sizeof(memfree));
-    
-@@ -956,16 +369,16 @@ GLuint r200GetMemoryOffsetMESA(__DRIscreen *screen, const GLvoid *pointer)
- 
-    card_offset = r200GartOffsetFromVirtual( rmesa, pointer );
- 
--   return card_offset - rmesa->r200Screen->gart_base;
-+   return card_offset - rmesa->radeon.radeonScreen->gart_base;
- }
- 
- GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
- 			   GLint size )
- {
--   ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
-+   ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
-    int valid = (size >= 0 &&
- 		offset >= 0 &&
--		offset + size < rmesa->r200Screen->gartTextures.size);
-+		offset + size < rmesa->radeon.radeonScreen->gartTextures.size);
- 
-    if (R200_DEBUG & DEBUG_IOCTL)
-       fprintf(stderr, "r200IsGartMemory( %p ) : %d\n", pointer, valid );
-@@ -976,12 +389,12 @@ GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
- 
- GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
- {
--   ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
-+   ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
- 
--   if (offset < 0 || offset > rmesa->r200Screen->gartTextures.size)
-+   if (offset < 0 || offset > rmesa->radeon.radeonScreen->gartTextures.size)
-       return ~0;
-    else
--      return rmesa->r200Screen->gart_texture_offset + offset;
-+      return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
- }
- 
- 
-@@ -989,7 +402,7 @@ GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
- void r200InitIoctlFuncs( struct dd_function_table *functions )
- {
-     functions->Clear = r200Clear;
--    functions->Finish = r200Finish;
--    functions->Flush = r200Flush;
-+    functions->Finish = radeonFinish;
-+    functions->Flush = radeonFlush;
- }
- 
-diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h
-index f7458e4..2a4b8a1 100644
---- a/src/mesa/drivers/dri/r200/r200_ioctl.h
-+++ b/src/mesa/drivers/dri/r200/r200_ioctl.h
-@@ -37,65 +37,30 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "main/simple_list.h"
- #include "radeon_dri.h"
--#include "r200_lock.h"
-+
-+#include "radeon_bocs_wrapper.h"
- 
- #include "xf86drm.h"
- #include "drm.h"
- #include "radeon_drm.h"
- 
--extern void r200EmitState( r200ContextPtr rmesa );
- extern void r200EmitVertexAOS( r200ContextPtr rmesa,
--				 GLuint vertex_size,
--				 GLuint offset );
-+			       GLuint vertex_size,
-+			       struct radeon_bo *bo,
-+			       GLuint offset );
- 
- extern void r200EmitVbufPrim( r200ContextPtr rmesa,
- 				GLuint primitive,
- 				GLuint vertex_nr );
- 
--extern void r200FlushElts( r200ContextPtr rmesa );
-+extern void r200FlushElts(GLcontext *ctx);
- 
- extern GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
- 					   GLuint primitive,
- 					   GLuint min_nr );
- 
--extern void r200EmitAOS( r200ContextPtr rmesa,
--			   struct r200_dma_region **regions,
--			   GLuint n,
--			   GLuint offset );
--
--extern void r200EmitBlit( r200ContextPtr rmesa,
--			  GLuint color_fmt,
--			  GLuint src_pitch,
--			  GLuint src_offset,
--			  GLuint dst_pitch,
--			  GLuint dst_offset,
--			  GLint srcx, GLint srcy,
--			  GLint dstx, GLint dsty,
--			  GLuint w, GLuint h );
--
--extern void r200EmitWait( r200ContextPtr rmesa, GLuint flags );
--
--extern void r200FlushCmdBuf( r200ContextPtr rmesa, const char * );
--extern int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller );
--
--extern void r200RefillCurrentDmaRegion( r200ContextPtr rmesa );
--
--extern void r200AllocDmaRegion( r200ContextPtr rmesa,
--				  struct r200_dma_region *region,
--				  int bytes, 
--				  int alignment );
--
--extern void r200ReleaseDmaRegion( r200ContextPtr rmesa,
--				    struct r200_dma_region *region,
--				    const char *caller );
--
--extern void r200CopyBuffer( __DRIdrawablePrivate *drawable,
--			    const drm_clip_rect_t      *rect);
--extern void r200PageFlip( __DRIdrawablePrivate *drawable );
--extern void r200Flush( GLcontext *ctx );
--extern void r200Finish( GLcontext *ctx );
--extern void r200WaitForIdleLocked( r200ContextPtr rmesa );
--extern void r200WaitForVBlank( r200ContextPtr rmesa );
-+extern void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset);
-+
- extern void r200InitIoctlFuncs( struct dd_function_table *functions );
- 
- extern void *r200AllocateMemoryMESA( __DRIscreen *screen, GLsizei size, GLfloat readfreq,
-@@ -119,8 +84,8 @@ void r200SetUpAtomList( r200ContextPtr rmesa );
-  */
- #define R200_NEWPRIM( rmesa )			\
- do {						\
--   if ( rmesa->dma.flush )			\
--      rmesa->dma.flush( rmesa );	\
-+   if ( rmesa->radeon.dma.flush )			\
-+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
- } while (0)
- 
- /* Can accomodate several state changes and primitive changes without
-@@ -130,7 +95,7 @@ do {						\
- do {								\
-    R200_NEWPRIM( rmesa );					\
-    rmesa->hw.ATOM.dirty = GL_TRUE;				\
--   rmesa->hw.is_dirty = GL_TRUE;				\
-+   rmesa->radeon.hw.is_dirty = GL_TRUE;				\
- } while (0)
- 
- #define R200_DB_STATE( ATOM )			        \
-@@ -139,13 +104,13 @@ do {								\
- 
- static INLINE int R200_DB_STATECHANGE( 
-    r200ContextPtr rmesa,
--   struct r200_state_atom *atom )
-+   struct radeon_state_atom *atom )
- {
-    if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
--      int *tmp;
-+      GLuint *tmp;
-       R200_NEWPRIM( rmesa );
-       atom->dirty = GL_TRUE;
--      rmesa->hw.is_dirty = GL_TRUE;
-+      rmesa->radeon.hw.is_dirty = GL_TRUE;
-       tmp = atom->cmd; 
-       atom->cmd = atom->lastcmd;
-       atom->lastcmd = tmp;
-@@ -156,15 +121,6 @@ static INLINE int R200_DB_STATECHANGE(
- }
- 
- 
--/* Fire the buffered vertices no matter what.
-- */
--#define R200_FIREVERTICES( rmesa )			\
--do {							\
--   if ( rmesa->store.cmd_used || rmesa->dma.flush ) {	\
--      r200Flush( rmesa->glCtx );			\
--   }							\
--} while (0)
--
- /* Command lengths.  Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
-  * are available, you will also be adding an rmesa->state.max_state_size because
-  * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
-@@ -174,36 +130,36 @@ do {							\
- #define ELTS_BUFSZ(nr)	(12 + nr * 2)
- #define VBUF_BUFSZ	(3 * sizeof(int))
- 
--/* Ensure that a minimum amount of space is available in the command buffer.
-- * This is used to ensure atomicity of state updates with the rendering requests
-- * that rely on them.
-- *
-- * An alternative would be to implement a "soft lock" such that when the buffer
-- * wraps at an inopportune time, we grab the lock, flush the current buffer,
-- * and hang on to the lock until the critical section is finished and we flush
-- * the buffer again and unlock.
-- */
--static INLINE void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes )
-+static inline uint32_t cmdpacket3(int cmd_type)
- {
--   if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
--      r200FlushCmdBuf( rmesa, __FUNCTION__ );
--   assert( bytes <= R200_CMD_BUF_SZ );
--}
-+  drm_radeon_cmd_header_t cmd;
- 
--/* Alloc space in the command buffer
-- */
--static INLINE char *r200AllocCmdBuf( r200ContextPtr rmesa,
--					 int bytes, const char *where )
--{
--   char * head;
-+  cmd.i = 0;
-+  cmd.header.cmd_type = cmd_type;
- 
--   if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
--      r200FlushCmdBuf( rmesa, where );
-+  return (uint32_t)cmd.i;
- 
--   head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
--   rmesa->store.cmd_used += bytes;
--   assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
--   return head;
- }
- 
-+#define OUT_BATCH_PACKET3(packet, num_extra) do {	      \
-+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
-+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3));				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    } else {						      \
-+      OUT_BATCH(CP_PACKET2);				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    }							      \
-+  } while(0)
-+
-+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {	      \
-+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
-+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP));	      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    } else {						      \
-+      OUT_BATCH(CP_PACKET2);				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    }							      \
-+  } while(0)
-+
-+
- #endif /* __R200_IOCTL_H__ */
-diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c
-deleted file mode 100644
-index 99661a4..0000000
---- a/src/mesa/drivers/dri/r200/r200_lock.c
-+++ /dev/null
-@@ -1,116 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
-- 
--#include "r200_context.h"
--#include "r200_lock.h"
--#include "r200_tex.h"
--#include "r200_state.h"
--#include "r200_ioctl.h"
--
--#include "drirenderbuffer.h"
--
--
--#if DEBUG_LOCKING
--char *prevLockFile = NULL;
--int prevLockLine = 0;
--#endif
--
--/* Turn on/off page flipping according to the flags in the sarea:
-- */
--static void
--r200UpdatePageFlipping( r200ContextPtr rmesa )
--{
--   rmesa->doPageFlip = rmesa->sarea->pfState;
--   if (rmesa->glCtx->WinSysDrawBuffer) {
--      driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
--                           rmesa->sarea->pfCurrentPage);
--   }
--}
--
--
--
--/* Update the hardware state.  This is called if another main/context.has
-- * grabbed the hardware lock, which includes the X server.  This
-- * function also updates the driver's window state after the X server
-- * moves, resizes or restacks a window -- the change will be reflected
-- * in the drawable position and clip rects.  Since the X server grabs
-- * the hardware lock when it changes the window state, this routine will
-- * automatically be called after such a change.
-- */
--void r200GetLock( r200ContextPtr rmesa, GLuint flags )
--{
--   __DRIdrawablePrivate *drawable = rmesa->dri.drawable;
--   __DRIdrawablePrivate *readable = rmesa->dri.readable;
--   __DRIscreenPrivate *sPriv = rmesa->dri.screen;
--   drm_radeon_sarea_t *sarea = rmesa->sarea;
--   int i;
--
--   drmGetLock( rmesa->dri.fd, rmesa->dri.hwContext, flags );
--
--   /* The window might have moved, so we might need to get new clip
--    * rects.
--    *
--    * NOTE: This releases and regrabs the hw lock to allow the X server
--    * to respond to the DRI protocol request for new drawable info.
--    * Since the hardware state depends on having the latest drawable
--    * clip rects, all state checking must be done _after_ this call.
--    */
--   DRI_VALIDATE_DRAWABLE_INFO( sPriv, drawable );
--   if (drawable != readable) {
--      DRI_VALIDATE_DRAWABLE_INFO( sPriv, readable );
--   }
--
--   if ( rmesa->lastStamp != drawable->lastStamp ) {
--      r200UpdatePageFlipping( rmesa );
--      r200SetCliprects( rmesa );
--      r200UpdateViewportOffset( rmesa->glCtx );
--      driUpdateFramebufferSize(rmesa->glCtx, drawable);
--   }
--
--   R200_STATECHANGE( rmesa, ctx );
--   if (rmesa->sarea->tiling_enabled) {
--      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
--   }
--   else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
--
--   if ( sarea->ctx_owner != rmesa->dri.hwContext ) {
--      sarea->ctx_owner = rmesa->dri.hwContext;
--   }
--
--   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
--      DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] );
--   }
--
--   rmesa->lost_context = GL_TRUE;
--}
-diff --git a/src/mesa/drivers/dri/r200/r200_lock.h b/src/mesa/drivers/dri/r200/r200_lock.h
-deleted file mode 100644
-index 4ff9890..0000000
---- a/src/mesa/drivers/dri/r200/r200_lock.h
-+++ /dev/null
-@@ -1,106 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#ifndef __R200_LOCK_H__
--#define __R200_LOCK_H__
--
--extern void r200GetLock( r200ContextPtr rmesa, GLuint flags );
--
--/* Turn DEBUG_LOCKING on to find locking conflicts.
-- */
--#define DEBUG_LOCKING	0
--
--#if DEBUG_LOCKING
--extern char *prevLockFile;
--extern int prevLockLine;
--
--#define DEBUG_LOCK()							\
--   do {									\
--      prevLockFile = (__FILE__);					\
--      prevLockLine = (__LINE__);					\
--   } while (0)
--
--#define DEBUG_RESET()							\
--   do {									\
--      prevLockFile = 0;							\
--      prevLockLine = 0;							\
--   } while (0)
--
--#define DEBUG_CHECK_LOCK()						\
--   do {									\
--      if ( prevLockFile ) {						\
--	 fprintf( stderr,						\
--		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
--		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
--	 exit( 1 );							\
--      }									\
--   } while (0)
--
--#else
--
--#define DEBUG_LOCK()
--#define DEBUG_RESET()
--#define DEBUG_CHECK_LOCK()
--
--#endif
--
--/*
-- * !!! We may want to separate locks from locks with validation.  This
-- * could be used to improve performance for those things commands that
-- * do not do any drawing !!!
-- */
--
--
--/* Lock the hardware and validate our state.
-- */
--#define LOCK_HARDWARE( rmesa )					\
--   do {								\
--      char __ret = 0;						\
--      DEBUG_CHECK_LOCK();					\
--      DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext,		\
--	       (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret );	\
--      if ( __ret )						\
--	 r200GetLock( rmesa, 0 );				\
--      DEBUG_LOCK();						\
--   } while (0)
--
--#define UNLOCK_HARDWARE( rmesa )					\
--   do {									\
--      DRM_UNLOCK( rmesa->dri.fd,					\
--		  rmesa->dri.hwLock,					\
--		  rmesa->dri.hwContext );				\
--      DEBUG_RESET();							\
--   } while (0)
--
--#endif /* __R200_LOCK_H__ */
-diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
-index 8512b9a..5dbc202 100644
---- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c
-+++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
-@@ -50,110 +50,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r200_maos.h"
- #include "r200_tcl.h"
- 
--
--#if 0
--/* Usage:
-- *   - from r200_tcl_render
-- *   - call r200EmitArrays to ensure uptodate arrays in dma
-- *   - emit primitives (new type?) which reference the data
-- *       -- need to use elts for lineloop, quads, quadstrip/flat
-- *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
-- *
-- */
--static void emit_ubyte_rgba3( GLcontext *ctx,
--		       struct r200_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   r200_color_t *out = (r200_color_t *)(rvb->start + rvb->address);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d out %p\n",
--	      __FUNCTION__, count, stride, (void *)out);
--
--   for (i = 0; i < count; i++) {
--      out->red   = *data;
--      out->green = *(data+1);
--      out->blue  = *(data+2);
--      out->alpha = 0xFF;
--      out++;
--      data += stride;
--   }
--}
--
--static void emit_ubyte_rgba4( GLcontext *ctx,
--			      struct r200_dma_region *rvb,
--			      char *data,
--			      int stride,
--			      int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 4) {
--      for (i = 0; i < count; i++)
--	 ((int *)out)[i] = LE32_TO_CPU(((int *)data)[i]);
--   } else {
--      for (i = 0; i < count; i++) {
--	 *(int *)out++ = LE32_TO_CPU(*(int *)data);
--	 data += stride;
--      }
--   }
--}
--
--
--static void emit_ubyte_rgba( GLcontext *ctx,
--			     struct r200_dma_region *rvb,
--			     char *data,
--			     int size,
--			     int stride,
--			     int count )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
--
--   assert (!rvb->buf);
--
--   if (stride == 0) {
--      r200AllocDmaRegion( rmesa, rvb, 4, 4 );
--      count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = 1;
--   }
--   else {
--      r200AllocDmaRegion( rmesa, rvb, 4 * count, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 1;
--      rvb->aos_size = 1;
--   }
--
--   /* Emit the data
--    */
--   switch (size) {
--   case 3:
--      emit_ubyte_rgba3( ctx, rvb, data, stride, count );
--      break;
--   case 4:
--      emit_ubyte_rgba4( ctx, rvb, data, stride, count );
--      break;
--   default:
--      assert(0);
--      exit(1);
--      break;
--   }
--}
--#endif
--
--
- #if defined(USE_X86_ASM)
- #define COPY_DWORDS( dst, src, nr )					\
- do {									\
-@@ -174,204 +70,34 @@ do {						\
- } while (0)
- #endif
- 
--
--static void emit_vecfog( GLcontext *ctx,
--			 struct r200_dma_region *rvb,
--			 char *data,
--			 int stride,
--			 int count )
-+static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
-+			     GLvoid *data, int stride, int count)
- {
--   int i;
--   GLfloat *out;
--
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   assert (!rvb->buf);
--
--   if (stride == 0) {
--      r200AllocDmaRegion( rmesa, rvb, 4, 4 );
--      count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = 1;
--   }
--   else {
--      r200AllocDmaRegion( rmesa, rvb, count * 4, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 1;
--      rvb->aos_size = 1;
--   }
--
--   /* Emit the data
--    */
--   out = (GLfloat *)(rvb->address + rvb->start);
--   for (i = 0; i < count; i++) {
--      out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data );
--      out++;
--      data += stride;
--   }
--
-+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	uint32_t *out;
-+	int i;
-+	int size = 1;
-+
-+	if (stride == 0) {
-+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
-+		count = 1;
-+		aos->stride = 0;
-+	} else {
-+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
-+		aos->stride = size;
-+	}
-+
-+	aos->components = size;
-+	aos->count = count;
-+
-+	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
-+	for (i = 0; i < count; i++) {
-+	  out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data );
-+	  out++;
-+	  data += stride;
-+	}
- }
- 
--
--static void emit_vec4( GLcontext *ctx,
--		       struct r200_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 4)
--      COPY_DWORDS( out, data, count );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out++;
--	 data += stride;
--      }
--}
--
--
--static void emit_vec8( GLcontext *ctx,
--		       struct r200_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 8)
--      COPY_DWORDS( out, data, count*2 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out += 2;
--	 data += stride;
--      }
--}
--
--static void emit_vec12( GLcontext *ctx,
--		       struct r200_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--	      __FUNCTION__, count, stride, (void *)out, (void *)data);
--
--   if (stride == 12)
--      COPY_DWORDS( out, data, count*3 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out[2] = *(int *)(data+8);
--	 out += 3;
--	 data += stride;
--      }
--}
--
--static void emit_vec16( GLcontext *ctx,
--			struct r200_dma_region *rvb,
--			char *data,
--			int stride,
--			int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 16)
--      COPY_DWORDS( out, data, count*4 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out[2] = *(int *)(data+8);
--	 out[3] = *(int *)(data+12);
--	 out += 4;
--	 data += stride;
--      }
--}
--
--
--static void emit_vector( GLcontext *ctx,
--			 struct r200_dma_region *rvb,
--			 char *data,
--			 int size,
--			 int stride,
--			 int count )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if (R200_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d size %d stride %d\n",
--	      __FUNCTION__, count, size, stride);
--
--   assert (!rvb->buf);
--
--   if (stride == 0) {
--      r200AllocDmaRegion( rmesa, rvb, size * 4, 4 );
--      count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = size;
--   }
--   else {
--      r200AllocDmaRegion( rmesa, rvb, size * count * 4, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = size;
--      rvb->aos_size = size;
--   }
--
--   /* Emit the data
--    */
--   switch (size) {
--   case 1:
--      emit_vec4( ctx, rvb, data, stride, count );
--      break;
--   case 2:
--      emit_vec8( ctx, rvb, data, stride, count );
--      break;
--   case 3:
--      emit_vec12( ctx, rvb, data, stride, count );
--      break;
--   case 4:
--      emit_vec16( ctx, rvb, data, stride, count );
--      break;
--   default:
--      assert(0);
--      exit(1);
--      break;
--   }
--
--}
--
--
--
- /* Emit any changed arrays to new GART memory, re-emit a packet to
-  * update the arrays.  
-  */
-@@ -379,12 +105,12 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
- {
-    r200ContextPtr rmesa = R200_CONTEXT( ctx );
-    struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
--   struct r200_dma_region **component = rmesa->tcl.aos_components;
-    GLuint nr = 0;
-    GLuint vfmt0 = 0, vfmt1 = 0;
-    GLuint count = VB->Count;
-    GLuint i, emitsize;
- 
-+   //   fprintf(stderr,"emit arrays\n");
-    for ( i = 0; i < 15; i++ ) {
-       GLubyte attrib = vimap_rev[i];
-       if (attrib != 255) {
-@@ -416,20 +142,20 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
- 	 case 3:
- 	    /* special handling to fix up fog. Will get us into trouble with vbos...*/
- 	    assert(attrib == VERT_ATTRIB_FOG);
--	    if (!rmesa->tcl.vertex_data[i].buf) {
-+	    if (!rmesa->tcl.aos[i].bo) {
- 	       if (ctx->VertexProgram._Enabled)
--		  emit_vector( ctx,
--			 &(rmesa->tcl.vertex_data[i]),
--			 (char *)VB->AttribPtr[attrib]->data,
--			 1,
--			 VB->AttribPtr[attrib]->stride,
--			 count);
-+		  rcommon_emit_vector( ctx,
-+				       &(rmesa->tcl.aos[nr]),
-+				       (char *)VB->AttribPtr[attrib]->data,
-+				       1,
-+				       VB->AttribPtr[attrib]->stride,
-+				       count);
- 	       else
--		  emit_vecfog( ctx,
--			 &(rmesa->tcl.vertex_data[i]),
--			 (char *)VB->AttribPtr[attrib]->data,
--			 VB->AttribPtr[attrib]->stride,
--			 count);
-+		 r200_emit_vecfog( ctx,
-+				   &(rmesa->tcl.aos[nr]),
-+				   (char *)VB->AttribPtr[attrib]->data,
-+				   VB->AttribPtr[attrib]->stride,
-+				   count);
- 	    }
- 	    vfmt0 |= R200_VTX_DISCRETE_FOG;
- 	    goto after_emit;
-@@ -473,17 +199,17 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
- 	 default:
- 	    assert(0);
- 	 }
--	 if (!rmesa->tcl.vertex_data[i].buf) {
--	    emit_vector( ctx,
--			 &(rmesa->tcl.vertex_data[i]),
--			 (char *)VB->AttribPtr[attrib]->data,
--			 emitsize,
--			 VB->AttribPtr[attrib]->stride,
--			 count );
-+	 if (!rmesa->tcl.aos[nr].bo) {
-+	   rcommon_emit_vector( ctx,
-+				&(rmesa->tcl.aos[nr]),
-+				(char *)VB->AttribPtr[attrib]->data,
-+				emitsize,
-+				VB->AttribPtr[attrib]->stride,
-+				count );
- 	 }
- after_emit:
- 	 assert(nr < 12);
--	 component[nr++] = &rmesa->tcl.vertex_data[i];
-+	 nr++;
-       }
-    }
- 
-@@ -501,12 +227,11 @@ after_emit:
- void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs )
- {
-    r200ContextPtr rmesa = R200_CONTEXT( ctx );
--
--   /* only do it for changed inputs ? */
-    int i;
--   for (i = 0; i < 15; i++) {
--      if (newinputs & (1 << i))
--	 r200ReleaseDmaRegion( rmesa,
--	    &rmesa->tcl.vertex_data[i], __FUNCTION__ );
-+   for (i = 0; i < rmesa->tcl.nr_aos_components; i++) {
-+     if (rmesa->tcl.aos[i].bo) {
-+       radeon_bo_unref(rmesa->tcl.aos[i].bo);
-+       rmesa->tcl.aos[i].bo = NULL;
-+     }
-    }
- }
-diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c
-index be68821..a6c6558 100644
---- a/src/mesa/drivers/dri/r200/r200_pixel.c
-+++ b/src/mesa/drivers/dri/r200/r200_pixel.c
-@@ -51,7 +51,7 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format,
- 	     const void *pixels, GLint sz, GLint pitch )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   GLuint cpp = rmesa->r200Screen->cpp;
-+   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
- 
-    if (R200_DEBUG & DEBUG_PIXEL)
-       fprintf(stderr, "%s\n", __FUNCTION__);
-@@ -137,8 +137,8 @@ clip_pixelrect( const GLcontext *ctx,
-    if (*height <= 0)
-       return GL_FALSE;
- 
--   *size = ((*y + *height - 1) * rmesa->r200Screen->frontPitch +
--	    (*x + *width - 1) * rmesa->r200Screen->cpp);
-+   *size = ((*y + *height - 1) * rmesa->radeon.radeonScreen->frontPitch +
-+	    (*x + *width - 1) * rmesa->radeon.radeonScreen->cpp);
- 
-    return GL_TRUE;
- }
-@@ -153,19 +153,20 @@ r200TryReadPixels( GLcontext *ctx,
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    GLint pitch = pack->RowLength ? pack->RowLength : width;
-    GLint blit_format;
--   GLuint cpp = rmesa->r200Screen->cpp;
-+   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
-    GLint size = width * height * cpp;
- 
-+   return GL_FALSE;
-+#if 0
-    if (R200_DEBUG & DEBUG_PIXEL)
-       fprintf(stderr, "%s\n", __FUNCTION__);
- 
-    /* Only accelerate reading to GART buffers.
-     */
-    if ( !r200IsGartMemory(rmesa, pixels, 
--			 pitch * height * rmesa->r200Screen->cpp ) ) {
-+			 pitch * height * rmesa->radeon.radeonScreen->cpp ) ) {
-       if (R200_DEBUG & DEBUG_PIXEL)
- 	 fprintf(stderr, "%s: dest not GART\n", __FUNCTION__);
--      return GL_FALSE;
-    }
- 
-    /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from
-@@ -180,7 +181,7 @@ r200TryReadPixels( GLcontext *ctx,
-    if (!check_color(ctx, type, format, pack, pixels, size, pitch))
-       return GL_FALSE;
- 
--   switch ( rmesa->r200Screen->cpp ) {
-+   switch ( rmesa->radeon.radeonScreen->cpp ) {
-    case 4:
-       blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
-       break;
-@@ -197,14 +198,14 @@ r200TryReadPixels( GLcontext *ctx,
-     * a full command buffer expects to be called unlocked.  As a
-     * workaround, immediately flush the buffer on aquiring the lock.
-     */
--   LOCK_HARDWARE( rmesa );
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    if (rmesa->store.cmd_used)
--      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
-+      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
- 
-    if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height,
- 		       &size)) {
--      UNLOCK_HARDWARE( rmesa );
-+      UNLOCK_HARDWARE( &rmesa->radeon );
-       if (R200_DEBUG & DEBUG_PIXEL)
- 	 fprintf(stderr, "%s totally clipped -- nothing to do\n",
- 		 __FUNCTION__);
-@@ -212,14 +213,14 @@ r200TryReadPixels( GLcontext *ctx,
-    }
- 
-    {
--      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-+      __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-       driRenderbuffer *drb = (driRenderbuffer *) ctx->ReadBuffer->_ColorReadBuffer;
-       int nbox = dPriv->numClipRects;
-       int src_offset = drb->offset
--		     + rmesa->r200Screen->fbLocation;
-+		     + rmesa->radeon.radeonScreen->fbLocation;
-       int src_pitch = drb->pitch * drb->cpp;
-       int dst_offset = r200GartOffsetFromVirtual( rmesa, pixels );
--      int dst_pitch = pitch * rmesa->r200Screen->cpp;
-+      int dst_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
-       drm_clip_rect_t *box = dPriv->pClipRects;
-       int i;
- 
-@@ -257,12 +258,12 @@ r200TryReadPixels( GLcontext *ctx,
- 		       bw, bh );
-       }
- 
--      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
-+      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
-    }
--   UNLOCK_HARDWARE( rmesa );
--
--   r200Finish( ctx ); /* required by GL */
-+   UNLOCK_HARDWARE( &rmesa->radeon );
- 
-+   radeonFinish( ctx ); /* required by GL */
-+#endif
-    return GL_TRUE;
- }
- 
-@@ -292,7 +293,7 @@ static void do_draw_pix( GLcontext *ctx,
- 			 GLuint planemask)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-    drm_clip_rect_t *box = dPriv->pClipRects;
-    struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0];
-    driRenderbuffer *drb = (driRenderbuffer *) rb;
-@@ -301,12 +302,12 @@ static void do_draw_pix( GLcontext *ctx,
-    int blit_format;
-    int size;
-    int src_offset = r200GartOffsetFromVirtual( rmesa, pixels );
--   int src_pitch = pitch * rmesa->r200Screen->cpp;
-+   int src_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
- 
-    if (R200_DEBUG & DEBUG_PIXEL)
-       fprintf(stderr, "%s\n", __FUNCTION__);
--
--   switch ( rmesa->r200Screen->cpp ) {
-+#if 0
-+   switch ( rmesa->radeon.radeonScreen->cpp ) {
-    case 2:
-       blit_format = R200_CP_COLOR_FORMAT_RGB565;
-       break;
-@@ -318,17 +319,17 @@ static void do_draw_pix( GLcontext *ctx,
-    }
- 
- 
--   LOCK_HARDWARE( rmesa );
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    if (rmesa->store.cmd_used)
--      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
-+      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
- 
-    y -= height;			/* cope with pixel zoom */
-    
-    if (!clip_pixelrect(ctx, ctx->DrawBuffer,
- 		       &x, &y, &width, &height,
- 		       &size)) {
--      UNLOCK_HARDWARE( rmesa );
-+      UNLOCK_HARDWARE( &rmesa->radeon );
-       return;
-    }
- 
-@@ -357,15 +358,16 @@ static void do_draw_pix( GLcontext *ctx,
- 		    blit_format,
- 		    src_pitch, src_offset,
- 		    drb->pitch * drb->cpp,
--		    drb->offset + rmesa->r200Screen->fbLocation,
-+		    drb->offset + rmesa->radeon.radeonScreen->fbLocation,
- 		    bx - x, by - y,
- 		    bx, by,
- 		    bw, bh );
-    }
- 
--   r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
--   r200WaitForIdleLocked( rmesa ); /* required by GL */
--   UNLOCK_HARDWARE( rmesa );
-+   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
-+   radeonWaitForIdleLocked( &rmesa->radeon ); /* required by GL */
-+   UNLOCK_HARDWARE( &rmesa->radeon );
-+#endif
- }
- 
- 
-@@ -381,7 +383,7 @@ r200TryDrawPixels( GLcontext *ctx,
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    GLint pitch = unpack->RowLength ? unpack->RowLength : width;
-    GLuint planemask;
--   GLuint cpp = rmesa->r200Screen->cpp;
-+   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
-    GLint size = height * pitch * cpp;
- 
-    if (R200_DEBUG & DEBUG_PIXEL)
-@@ -395,7 +397,7 @@ r200TryDrawPixels( GLcontext *ctx,
-    case GL_RGB:
-    case GL_RGBA:
-    case GL_BGRA:
--      planemask = r200PackColor(cpp,
-+      planemask = radeonPackColor(cpp,
- 				ctx->Color.ColorMask[RCOMP],
- 				ctx->Color.ColorMask[GCOMP],
- 				ctx->Color.ColorMask[BCOMP],
-@@ -431,7 +433,7 @@ r200TryDrawPixels( GLcontext *ctx,
-       return GL_FALSE;
-    }
- 
--   if ( r200IsGartMemory(rmesa, pixels, size) )
-+   if (0)// r200IsGartMemory(rmesa, pixels, size) )
-    {
-       do_draw_pix( ctx, x, y, width, height, pitch, pixels, planemask );
-       return GL_TRUE;
-@@ -471,7 +473,7 @@ r200Bitmap( GLcontext *ctx, GLint px, GLint py,
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
- 
--   if (rmesa->Fallback)
-+   if (rmesa->radeon.Fallback)
-       _swrast_Bitmap( ctx, px, py, width, height, unpack, bitmap );
-    else
-       r200PointsBitmap( ctx, px, py, width, height, unpack, bitmap );
-diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h
-index 5ce287f..526a624 100644
---- a/src/mesa/drivers/dri/r200/r200_reg.h
-+++ b/src/mesa/drivers/dri/r200/r200_reg.h
-@@ -463,8 +463,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define     R200_VSC_UPDATE_USER_COLOR_1_ENABLE    0x00020000
- /* gap */
- #define R200_SE_TCL_VECTOR_INDX_REG                0x2200
-+#       define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT  16
-+#       define RADEON_VEC_INDX_DWORD_COUNT_SHIFT     28
- #define R200_SE_TCL_VECTOR_DATA_REG                0x2204
- #define R200_SE_TCL_SCALAR_INDX_REG                0x2208
-+#       define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT  16
- #define R200_SE_TCL_SCALAR_DATA_REG                0x220c
- /* gap */
- #define R200_SE_TCL_MATRIX_SEL_0                   0x2230
-@@ -949,6 +952,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define     R200_LOD_BIAS_MASK                        (0xfff80000)
- #define     R200_LOD_BIAS_SHIFT                       19
- #define R200_PP_TXSIZE_0                  0x2c0c /* NPOT only */
-+#define R200_PP_TX_WIDTHMASK_SHIFT 0
-+#define R200_PP_TX_HEIGHTMASK_SHIFT 16
-+
- #define R200_PP_TXPITCH_0                 0x2c10 /* NPOT only */
- #define R200_PP_BORDER_COLOR_0            0x2c14
- #define R200_PP_CUBIC_FACES_0             0x2c18
-diff --git a/src/mesa/drivers/dri/r200/r200_span.c b/src/mesa/drivers/dri/r200/r200_span.c
-deleted file mode 100644
-index 9783678..0000000
---- a/src/mesa/drivers/dri/r200/r200_span.c
-+++ /dev/null
-@@ -1,307 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/colormac.h"
--#include "swrast/swrast.h"
--
--#include "r200_context.h"
--#include "r200_ioctl.h"
--#include "r200_state.h"
--#include "r200_span.h"
--#include "r200_tex.h"
--
--#define DBG 0
--
--/*
-- * Note that all information needed to access pixels in a renderbuffer
-- * should be obtained through the gl_renderbuffer parameter, not per-context
-- * information.
-- */
--#define LOCAL_VARS						\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
--   const GLuint bottom = dPriv->h - 1;				\
--   GLubyte *buf = (GLubyte *) drb->flippedData			\
--      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
--   GLuint p;							\
--   (void) p;
--
--#define LOCAL_DEPTH_VARS				\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
--   const GLuint bottom = dPriv->h - 1;			\
--   GLuint xo = dPriv->x;				\
--   GLuint yo = dPriv->y;				\
--   GLubyte *buf = (GLubyte *) drb->Base.Data;
--
--#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
--
--#define Y_FLIP(Y) (bottom - (Y))
--
--#define HW_LOCK() 
--
--#define HW_UNLOCK()							
--
--
--
--/* ================================================================
-- * Color buffer
-- */
--
--/* 16 bit, RGB565 color spanline and pixel functions
-- */
--#define SPANTMP_PIXEL_FMT GL_RGB
--#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
--
--#define TAG(x)    r200##x##_RGB565
--#define TAG2(x,y) r200##x##_RGB565##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
--#include "spantmp2.h"
--
--/* 32 bit, ARGB8888 color spanline and pixel functions
-- */
--#define SPANTMP_PIXEL_FMT GL_BGRA
--#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
--
--#define TAG(x)    r200##x##_ARGB8888
--#define TAG2(x,y) r200##x##_ARGB8888##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
--#include "spantmp2.h"
--
--
--/* ================================================================
-- * Depth buffer
-- */
--
--/* The Radeon family has depth tiling on all the time, so we have to convert
-- * the x,y coordinates into the memory bus address (mba) in the same
-- * manner as the engine.  In each case, the linear block address (ba)
-- * is calculated, and then wired with x and y to produce the final
-- * memory address.
-- * The chip will do address translation on its own if the surface registers
-- * are set up correctly. It is not quite enough to get it working with hyperz too...
-- */
--
--/* extract bit 'b' of x, result is zero or one */
--#define BIT(x,b) ((x & (1<<b))>>b)
--
--static GLuint
--r200_mba_z32( driRenderbuffer *drb, GLint x, GLint y )
--{
--   GLuint pitch = drb->pitch;
--   if (drb->depthHasSurface) {
--      return 4 * (x + y * pitch);
--   }
--   else {
--      GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 5) + ((x & 0x7FF) >> 5);
--      GLuint a = 
--         (BIT(x,0) << 2) |
--         (BIT(y,0) << 3) |
--         (BIT(x,1) << 4) |
--         (BIT(y,1) << 5) |
--         (BIT(x,3) << 6) |
--         (BIT(x,4) << 7) |
--         (BIT(x,2) << 8) |
--         (BIT(y,2) << 9) |
--         (BIT(y,3) << 10) |
--         (((pitch & 0x20) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
--         ((b >> 1) << 12);
--      return a;
--   }
--}
--
--static GLuint
--r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y )
--{
--   GLuint pitch = drb->pitch;
--   if (drb->depthHasSurface) {
--      return 2 * (x + y * pitch);
--   }
--   else {
--      GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 6) + ((x & 0x7FF) >> 6);
--      GLuint a = 
--         (BIT(x,0) << 1) |
--         (BIT(y,0) << 2) |
--         (BIT(x,1) << 3) |
--         (BIT(y,1) << 4) |
--         (BIT(x,2) << 5) |
--         (BIT(x,4) << 6) |
--         (BIT(x,5) << 7) |
--         (BIT(x,3) << 8) |
--         (BIT(y,2) << 9) |
--         (BIT(y,3) << 10) |
--         (((pitch & 0x40) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
--         ((b >> 1) << 12);
--      return a;
--   }
--}
--
--
--/* 16-bit depth buffer functions
-- */
--#define VALUE_TYPE GLushort
--
--#define WRITE_DEPTH( _x, _y, d )					\
--   *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d;
--
--#define READ_DEPTH( d, _x, _y )						\
--   d = *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo ));
--
--#define TAG(x) r200##x##_z16
--#include "depthtmp.h"
--
--
--/* 24 bit depth, 8 bit stencil depthbuffer functions
-- */
--#define VALUE_TYPE GLuint
--
--#define WRITE_DEPTH( _x, _y, d )					\
--do {									\
--   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0xff000000;							\
--   tmp |= ((d) & 0x00ffffff);						\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--
--#define READ_DEPTH( d, _x, _y )						\
--   d = *(GLuint *)(buf + r200_mba_z32( drb, _x + xo,			\
--					 _y + yo )) & 0x00ffffff;
--
--#define TAG(x) r200##x##_z24_s8
--#include "depthtmp.h"
--
--
--/* ================================================================
-- * Stencil buffer
-- */
--
--/* 24 bit depth, 8 bit stencil depthbuffer functions
-- */
--#define WRITE_STENCIL( _x, _y, d )					\
--do {									\
--   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0x00ffffff;							\
--   tmp |= (((d) & 0xff) << 24);						\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--
--#define READ_STENCIL( d, _x, _y )					\
--do {									\
--   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0xff000000;							\
--   d = tmp >> 24;							\
--} while (0)
--
--#define TAG(x) r200##x##_z24_s8
--#include "stenciltmp.h"
--
--
--/* Move locking out to get reasonable span performance (10x better
-- * than doing this in HW_LOCK above).  WaitForIdle() is the main
-- * culprit.
-- */
--
--static void r200SpanRenderStart( GLcontext *ctx )
--{
--   r200ContextPtr rmesa = R200_CONTEXT( ctx );
--
--   R200_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
--   r200WaitForIdleLocked( rmesa );
--
--   /* Read & rewrite the first pixel in the frame buffer.  This should
--    * be a noop, right?  In fact without this conform fails as reading
--    * from the framebuffer sometimes produces old results -- the
--    * on-card read cache gets mixed up and doesn't notice that the
--    * framebuffer has been updated.
--    *
--    * In the worst case this is buggy too as p might get the wrong
--    * value first time, so really need a hidden pixel somewhere for this.
--    */
--   {
--      int p;
--      driRenderbuffer *drb =
--	 (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
--      volatile int *buf =
--	 (volatile int *)(rmesa->dri.screen->pFB + drb->offset);
--      p = *buf;
--      *buf = p;
--   }
--}
--
--static void r200SpanRenderFinish( GLcontext *ctx )
--{
--   r200ContextPtr rmesa = R200_CONTEXT( ctx );
--   _swrast_flush( ctx );
--   UNLOCK_HARDWARE( rmesa );
--}
--
--void r200InitSpanFuncs( GLcontext *ctx )
--{
--   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
--   swdd->SpanRenderStart          = r200SpanRenderStart;
--   swdd->SpanRenderFinish         = r200SpanRenderFinish; 
--}
--
--
--
--/**
-- * Plug in the Get/Put routines for the given driRenderbuffer.
-- */
--void
--radeonSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
--{
--   if (drb->Base.InternalFormat == GL_RGBA) {
--      if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
--         r200InitPointers_RGB565(&drb->Base);
--      }
--      else {
--         r200InitPointers_ARGB8888(&drb->Base);
--      }
--   }
--   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
--      r200InitDepthPointers_z16(&drb->Base);
--   }
--   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
--      r200InitDepthPointers_z24_s8(&drb->Base);
--   }
--   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
--      r200InitStencilPointers_z24_s8(&drb->Base);
--   }
--}
-diff --git a/src/mesa/drivers/dri/r200/r200_span.h b/src/mesa/drivers/dri/r200/r200_span.h
-deleted file mode 100644
-index bae5644..0000000
---- a/src/mesa/drivers/dri/r200/r200_span.h
-+++ /dev/null
-@@ -1,45 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#ifndef __R200_SPAN_H__
--#define __R200_SPAN_H__
--
--#include "drirenderbuffer.h"
--
--extern void r200InitSpanFuncs( GLcontext *ctx );
--
--extern void
--radeonSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
--
--#endif
-diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
-index 0eaaaf6..126f78b 100644
---- a/src/mesa/drivers/dri/r200/r200_state.c
-+++ b/src/mesa/drivers/dri/r200/r200_state.c
-@@ -47,6 +47,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "tnl/t_pipeline.h"
- #include "swrast_setup/swrast_setup.h"
- 
-+#include "radeon_common.h"
-+#include "radeon_mipmap_tree.h"
- #include "r200_context.h"
- #include "r200_ioctl.h"
- #include "r200_state.h"
-@@ -114,8 +116,8 @@ static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] )
-    CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
-    CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
-    CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
--   if (rmesa->r200Screen->drmSupportsBlendColor)
--      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = r200PackColor( 4, color[0], color[1], color[2], color[3] );
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
-+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = radeonPackColor( 4, color[0], color[1], color[2], color[3] );
- }
- 
- /**
-@@ -213,7 +215,7 @@ static void r200_set_blend_state( GLcontext * ctx )
- 
-    R200_STATECHANGE( rmesa, ctx );
- 
--   if (rmesa->r200Screen->drmSupportsBlendColor) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
-       if (ctx->Color.ColorLogicOpEnabled) {
-          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
-          rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
-@@ -278,7 +280,7 @@ static void r200_set_blend_state( GLcontext * ctx )
-       return;
-    }
- 
--   if (!rmesa->r200Screen->drmSupportsBlendColor) {
-+   if (!rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
-       rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
-       return;
-    }
-@@ -383,10 +385,10 @@ static void r200ClearDepth( GLcontext *ctx, GLclampd d )
- 
-    switch ( format ) {
-    case R200_DEPTH_FORMAT_16BIT_INT_Z:
--      rmesa->state.depth.clear = d * 0x0000ffff;
-+      rmesa->radeon.state.depth.clear = d * 0x0000ffff;
-       break;
-    case R200_DEPTH_FORMAT_24BIT_INT_Z:
--      rmesa->state.depth.clear = d * 0x00ffffff;
-+      rmesa->radeon.state.depth.clear = d * 0x00ffffff;
-       break;
-    }
- }
-@@ -480,7 +482,7 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
-    case GL_FOG_COLOR: 
-       R200_STATECHANGE( rmesa, ctx );
-       UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
--      i = r200PackColor( 4, col[0], col[1], col[2], 0 );
-+      i = radeonPackColor( 4, col[0], col[1], col[2], 0 );
-       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
-       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
-       break;
-@@ -521,102 +523,6 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
-    }
- }
- 
--
--/* =============================================================
-- * Scissoring
-- */
--
--
--static GLboolean intersect_rect( drm_clip_rect_t *out,
--				 drm_clip_rect_t *a,
--				 drm_clip_rect_t *b )
--{
--   *out = *a;
--   if ( b->x1 > out->x1 ) out->x1 = b->x1;
--   if ( b->y1 > out->y1 ) out->y1 = b->y1;
--   if ( b->x2 < out->x2 ) out->x2 = b->x2;
--   if ( b->y2 < out->y2 ) out->y2 = b->y2;
--   if ( out->x1 >= out->x2 ) return GL_FALSE;
--   if ( out->y1 >= out->y2 ) return GL_FALSE;
--   return GL_TRUE;
--}
--
--
--void r200RecalcScissorRects( r200ContextPtr rmesa )
--{
--   drm_clip_rect_t *out;
--   int i;
--
--   /* Grow cliprect store?
--    */
--   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
--      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
--	 rmesa->state.scissor.numAllocedClipRects += 1;	/* zero case */
--	 rmesa->state.scissor.numAllocedClipRects *= 2;
--      }
--
--      if (rmesa->state.scissor.pClipRects)
--	 FREE(rmesa->state.scissor.pClipRects);
--
--      rmesa->state.scissor.pClipRects = 
--	 MALLOC( rmesa->state.scissor.numAllocedClipRects * 
--		 sizeof(drm_clip_rect_t) );
--
--      if ( rmesa->state.scissor.pClipRects == NULL ) {
--	 rmesa->state.scissor.numAllocedClipRects = 0;
--	 return;
--      }
--   }
--   
--   out = rmesa->state.scissor.pClipRects;
--   rmesa->state.scissor.numClipRects = 0;
--
--   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
--      if ( intersect_rect( out, 
--			   &rmesa->pClipRects[i], 
--			   &rmesa->state.scissor.rect ) ) {
--	 rmesa->state.scissor.numClipRects++;
--	 out++;
--      }
--   }
--}
--
--
--static void r200UpdateScissor( GLcontext *ctx )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if ( rmesa->dri.drawable ) {
--      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--
--      int x = ctx->Scissor.X;
--      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
--      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
--      int h = dPriv->h - ctx->Scissor.Y - 1;
--
--      rmesa->state.scissor.rect.x1 = x + dPriv->x;
--      rmesa->state.scissor.rect.y1 = y + dPriv->y;
--      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
--      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
--
--      r200RecalcScissorRects( rmesa );
--   }
--}
--
--
--static void r200Scissor( GLcontext *ctx,
--			   GLint x, GLint y, GLsizei w, GLsizei h )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if ( ctx->Scissor.Enabled ) {
--      R200_FIREVERTICES( rmesa );	/* don't pipeline cliprect changes */
--      r200UpdateScissor( ctx );
--   }
--
--}
--
--
- /* =============================================================
-  * Culling
-  */
-@@ -803,7 +709,7 @@ static void r200ColorMask( GLcontext *ctx,
- 			   GLboolean b, GLboolean a )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   GLuint mask = r200PackColor( rmesa->r200Screen->cpp,
-+   GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
- 				ctx->Color.ColorMask[RCOMP],
- 				ctx->Color.ColorMask[GCOMP],
- 				ctx->Color.ColorMask[BCOMP],
-@@ -834,7 +740,7 @@ static void r200PolygonOffset( GLcontext *ctx,
- 			       GLfloat factor, GLfloat units )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   float_ui32_type constant =  { units * rmesa->state.depth.scale };
-+   float_ui32_type constant =  { units * rmesa->radeon.state.depth.scale };
-    float_ui32_type factoru = { factor };
- 
- /*    factor *= 2; */
-@@ -861,15 +767,15 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
- 
-    /* TODO: push this into cmd mechanism
-     */
--   R200_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
-+   radeon_firevertices(&rmesa->radeon);
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    /* FIXME: Use window x,y offsets into stipple RAM.
-     */
-    stipple.mask = rmesa->state.stipple.mask;
--   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
-+   drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, 
-                     &stipple, sizeof(stipple) );
--   UNLOCK_HARDWARE( rmesa );
-+   UNLOCK_HARDWARE( &rmesa->radeon );
- }
- 
- static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
-@@ -881,7 +787,7 @@ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
-     * cases work. 
-     */
-    TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag);
--   if (rmesa->TclFallback) {
-+   if (rmesa->radeon.TclFallback) {
-       r200ChooseRenderState( ctx );
-       r200ChooseVertexState( ctx );
-    }
-@@ -958,7 +864,7 @@ static void r200UpdateSpecular( GLcontext *ctx )
- 
-    /* Update vertex/render formats
-     */
--   if (rmesa->TclFallback) { 
-+   if (rmesa->radeon.TclFallback) { 
-       r200ChooseRenderState( ctx );
-       r200ChooseVertexState( ctx );
-    }
-@@ -1430,7 +1336,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname,
- 	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE;
- 	 else
- 	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE);
--	 if (rmesa->TclFallback) {
-+	 if (rmesa->radeon.TclFallback) {
- 	    r200ChooseRenderState( ctx );
- 	    r200ChooseVertexState( ctx );
- 	 }
-@@ -1675,7 +1581,7 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
- 
--   rmesa->state.stencil.clear = 
-+   rmesa->radeon.state.stencil.clear = 
-       ((GLuint) (ctx->Stencil.Clear & 0xff) |
-        (0xff << R200_STENCIL_MASK_SHIFT) |
-        ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT));
-@@ -1700,19 +1606,19 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
- void r200UpdateWindow( GLcontext *ctx )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--   GLfloat xoffset = (GLfloat)dPriv->x;
--   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-+   GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
-+   GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
-    const GLfloat *v = ctx->Viewport._WindowMap.m;
- 
-    float_ui32_type sx = { v[MAT_SX] };
-    float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
-    float_ui32_type sy = { - v[MAT_SY] };
-    float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
--   float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
--   float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
-+   float_ui32_type sz = { v[MAT_SZ] * rmesa->radeon.state.depth.scale };
-+   float_ui32_type tz = { v[MAT_TZ] * rmesa->radeon.state.depth.scale };
- 
--   R200_FIREVERTICES( rmesa );
-+   radeon_firevertices(&rmesa->radeon);
-    R200_STATECHANGE( rmesa, vpt );
- 
-    rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
-@@ -1744,7 +1650,7 @@ static void r200DepthRange( GLcontext *ctx, GLclampd nearval,
- void r200UpdateViewportOffset( GLcontext *ctx )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-    GLfloat xoffset = (GLfloat)dPriv->x;
-    GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
-    const GLfloat *v = ctx->Viewport._WindowMap.m;
-@@ -1774,8 +1680,8 @@ void r200UpdateViewportOffset( GLcontext *ctx )
-                 R200_STIPPLE_Y_OFFSET_MASK);
- 
-          /* add magic offsets, then invert */
--         stx = 31 - ((rmesa->dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
--         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
-+         stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
-+         sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1)
-                      & R200_STIPPLE_COORD_MASK);
- 
-          m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) |
-@@ -1788,7 +1694,7 @@ void r200UpdateViewportOffset( GLcontext *ctx )
-       }
-    }
- 
--   r200UpdateScissor( ctx );
-+   radeonUpdateScissor( ctx );
- }
- 
- 
-@@ -1805,7 +1711,7 @@ static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] )
-    CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]);
-    CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]);
-    CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]);
--   rmesa->state.color.clear = r200PackColor( rmesa->r200Screen->cpp,
-+   rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
-                                              color[0], color[1],
-                                              color[2], color[3] );
- }
-@@ -1849,56 +1755,6 @@ static void r200LogicOpCode( GLcontext *ctx, GLenum opcode )
- }
- 
- 
--/*
-- * Set up the cliprects for either front or back-buffer drawing.
-- */
--void r200SetCliprects( r200ContextPtr rmesa )
--{
--   __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
--   __DRIdrawablePrivate *const readable = rmesa->dri.readable;
--   GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
--   GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
--
--   if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BIT_BACK_LEFT) {
--      /* Can't ignore 2d windows if we are page flipping.
--       */
--      if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
--         rmesa->numClipRects = drawable->numClipRects;
--         rmesa->pClipRects = drawable->pClipRects;
--      }
--      else {
--         rmesa->numClipRects = drawable->numBackClipRects;
--         rmesa->pClipRects = drawable->pBackClipRects;
--      }
--   }
--   else {
--     /* front buffer (or none, or multiple buffers) */
--     rmesa->numClipRects = drawable->numClipRects;
--     rmesa->pClipRects = drawable->pClipRects;
--  }
--
--   if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
--      _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
--			       drawable->w, drawable->h);
--      draw_fb->Initialized = GL_TRUE;
--   }
--
--   if (drawable != readable) {
--      if ((read_fb->Width != readable->w) ||
--	  (read_fb->Height != readable->h)) {
--	 _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
--				  readable->w, readable->h);
--	 read_fb->Initialized = GL_TRUE;
--      }
--   }
--
--   if (rmesa->state.scissor.enabled)
--      r200RecalcScissorRects( rmesa );
--
--   rmesa->lastStamp = drawable->lastStamp;
--}
--
--
- static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-@@ -1907,7 +1763,7 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
-       fprintf(stderr, "%s %s\n", __FUNCTION__,
- 	      _mesa_lookup_enum_by_nr( mode ));
- 
--   R200_FIREVERTICES(rmesa);	/* don't pipeline cliprect changes */
-+   radeon_firevertices(&rmesa->radeon);	/* don't pipeline cliprect changes */
- 
-    if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
-       /* 0 (GL_NONE) buffers or multiple color drawing buffers */
-@@ -1925,7 +1781,8 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
-       return;
-    }
- 
--   r200SetCliprects( rmesa );
-+   radeonSetCliprects( &rmesa->radeon );
-+   radeonUpdatePageFlipping(&rmesa->radeon);
- 
-    /* We'll set the drawing engine's offset/pitch parameters later
-     * when we update other state.
-@@ -2013,10 +1870,10 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
-       R200_STATECHANGE(rmesa, ctx );
-       if ( state ) {
- 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_DITHER_ENABLE;
--	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
-+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
-       } else {
- 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE;
--	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->state.color.roundEnable;
-+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
-       }
-       break;
- 
-@@ -2031,7 +1888,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
- 	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
-       }
-       r200UpdateSpecular( ctx ); /* for PK_SPEC */
--      if (rmesa->TclFallback) 
-+      if (rmesa->radeon.TclFallback) 
- 	 r200ChooseVertexState( ctx );
-       _mesa_allow_light_in_model( ctx, !state );
-       break;
-@@ -2068,7 +1925,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
-    case GL_LIGHTING:
-       r200UpdateSpecular(ctx);
-       /* for reflection map fixup - might set recheck_texgen for all units too */
--      rmesa->NewGLState |= _NEW_TEXTURE;
-+      rmesa->radeon.NewGLState |= _NEW_TEXTURE;
-       break;
- 
-    case GL_LINE_SMOOTH:
-@@ -2181,13 +2038,13 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
-    }
- 
-    case GL_SCISSOR_TEST:
--      R200_FIREVERTICES( rmesa );
--      rmesa->state.scissor.enabled = state;
--      r200UpdateScissor( ctx );
-+      radeon_firevertices(&rmesa->radeon);
-+      rmesa->radeon.state.scissor.enabled = state;
-+      radeonUpdateScissor( ctx );
-       break;
- 
-    case GL_STENCIL_TEST:
--      if ( rmesa->state.stencil.hwBuffer ) {
-+      if ( rmesa->radeon.state.stencil.hwBuffer ) {
- 	 R200_STATECHANGE( rmesa, ctx );
- 	 if ( state ) {
- 	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_STENCIL_ENABLE;
-@@ -2443,42 +2300,99 @@ r200UpdateDrawBuffer(GLcontext *ctx)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    struct gl_framebuffer *fb = ctx->DrawBuffer;
--   driRenderbuffer *drb;
-+   struct radeon_renderbuffer *rrb;
- 
-    if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
--      /* draw to front */
--      drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
--   }
--   else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
--      /* draw to back */
--      drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
--   }
--   else {
--      /* drawing to multiple buffers, or none */
--      return;
-+     /* draw to front */
-+     rrb = (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+   } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
-+     /* draw to back */
-+     rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+   } else {
-+     /* drawing to multiple buffers, or none */
-+     return;
-    }
- 
--   assert(drb);
--   assert(drb->flippedPitch);
-+   assert(rrb);
-+   assert(rrb->pitch);
- 
-    R200_STATECHANGE( rmesa, ctx );
- 
-+#if 0
-    /* Note: we used the (possibly) page-flipped values */
-    rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
--     = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
-+     = ((rrb->flippedOffset + rmesa->radeon.radeonScreen->fbLocation)
- 	& R200_COLOROFFSET_MASK);
-    rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
--   if (rmesa->sarea->tiling_enabled) {
-+   if (rmesa->radeon.sarea->tiling_enabled) {
-       rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
-    }
-+#endif
- }
- 
-+static GLboolean r200ValidateBuffers(GLcontext *ctx)
-+{
-+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-+   struct radeon_cs_space_check bos[8];
-+   struct radeon_renderbuffer *rrb;
-+   int num_bo = 0;
-+   int i;
-+   int flushed = 0, ret;
-+again:
-+   num_bo = 0;
-+   
-+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
-+   /* color buffer */
-+   if (rrb && rrb->bo) {
-+      bos[num_bo].bo = rrb->bo;
-+      bos[num_bo].read_domains = 0;
-+      bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
-+      bos[num_bo].new_accounted = 0;
-+      num_bo++;
-+   }
-+
-+   /* depth buffer */
-+   rrb = radeon_get_depthbuffer(&rmesa->radeon);
-+   /* color buffer */
-+   if (rrb && rrb->bo) {
-+      bos[num_bo].bo = rrb->bo;
-+      bos[num_bo].read_domains = 0;
-+      bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
-+      bos[num_bo].new_accounted = 0;
-+      num_bo++;
-+   }
-+
-+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
-+      radeonTexObj *t;
-+      
-+      if (!ctx->Texture.Unit[i]._ReallyEnabled)
-+	 continue;
-+      
-+      t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
-+      bos[num_bo].bo = t->mt->bo;
-+      bos[num_bo].read_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
-+      bos[num_bo].write_domain = 0;
-+      bos[num_bo].new_accounted = 0;
-+      num_bo++;
-+   }
-+   
-+   ret = radeon_cs_space_check(rmesa->radeon.cmdbuf.cs, bos, num_bo);
-+   if (ret == RADEON_CS_SPACE_OP_TO_BIG)
-+      return GL_FALSE;
-+   if (ret == RADEON_CS_SPACE_FLUSH) {
-+      radeonFlush(ctx);
-+      if (flushed)
-+	 return GL_FALSE;
-+      flushed = 1;
-+      goto again;
-+   }
-+   return GL_TRUE;
-+}
- 
--
--void r200ValidateState( GLcontext *ctx )
-+GLboolean r200ValidateState( GLcontext *ctx )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   GLuint new_state = rmesa->NewGLState;
-+   GLuint new_state = rmesa->radeon.NewGLState;
- 
-    if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
-      r200UpdateDrawBuffer(ctx);
-@@ -2486,10 +2400,14 @@ void r200ValidateState( GLcontext *ctx )
- 
-    if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
-       r200UpdateTextureState( ctx );
--      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
-+      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
-       r200UpdateLocalViewer( ctx );
-    }
- 
-+   /* we need to do a space check here */
-+   if (!r200ValidateBuffers(ctx))
-+     return GL_FALSE;
-+
- /* FIXME: don't really need most of these when vertex progs are enabled */
- 
-    /* Need an event driven matrix update?
-@@ -2533,7 +2451,8 @@ void r200ValidateState( GLcontext *ctx )
-       else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0);
-    }
- 
--   rmesa->NewGLState = 0;
-+   rmesa->radeon.NewGLState = 0;
-+   return GL_TRUE;
- }
- 
- 
-@@ -2544,7 +2463,7 @@ static void r200InvalidateState( GLcontext *ctx, GLuint new_state )
-    _vbo_InvalidateState( ctx, new_state );
-    _tnl_InvalidateState( ctx, new_state );
-    _ae_invalidate_state( ctx, new_state );
--   R200_CONTEXT(ctx)->NewGLState |= new_state;
-+   R200_CONTEXT(ctx)->radeon.NewGLState |= new_state;
- }
- 
- /* A hack.  The r200 can actually cope just fine with materials
-@@ -2573,12 +2492,13 @@ static void r200WrapRunPipeline( GLcontext *ctx )
-    GLboolean has_material;
- 
-    if (0)
--      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
-+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
- 
-    /* Validate state:
-     */
--   if (rmesa->NewGLState)
--      r200ValidateState( ctx );
-+   if (rmesa->radeon.NewGLState)
-+      if (!r200ValidateState( ctx ))
-+	 FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
- 
-    has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx );
- 
-@@ -2636,7 +2556,7 @@ void r200InitStateFuncs( struct dd_function_table *functions )
-    functions->PointParameterfv		= r200PointParameter;
-    functions->PointSize			= r200PointSize;
-    functions->RenderMode		= r200RenderMode;
--   functions->Scissor			= r200Scissor;
-+   functions->Scissor			= radeonScissor;
-    functions->ShadeModel		= r200ShadeModel;
-    functions->StencilFuncSeparate	= r200StencilFuncSeparate;
-    functions->StencilMaskSeparate	= r200StencilMaskSeparate;
-diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h
-index a917163..1dddbfd 100644
---- a/src/mesa/drivers/dri/r200/r200_state.h
-+++ b/src/mesa/drivers/dri/r200/r200_state.h
-@@ -43,13 +43,11 @@ extern void r200InitTnlFuncs( GLcontext *ctx );
- 
- extern void r200UpdateMaterial( GLcontext *ctx );
- 
--extern void r200SetCliprects( r200ContextPtr rmesa );
--extern void r200RecalcScissorRects( r200ContextPtr rmesa );
- extern void r200UpdateViewportOffset( GLcontext *ctx );
- extern void r200UpdateWindow( GLcontext *ctx );
- extern void r200UpdateDrawBuffer(GLcontext *ctx);
- 
--extern void r200ValidateState( GLcontext *ctx );
-+extern GLboolean r200ValidateState( GLcontext *ctx );
- 
- extern void r200PrintDirty( r200ContextPtr rmesa,
- 			      const char *msg );
-@@ -59,7 +57,7 @@ extern void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode );
- #define FALLBACK( rmesa, bit, mode ) do {				\
-    if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",		\
- 		     __FUNCTION__, bit, mode );				\
--   r200Fallback( rmesa->glCtx, bit, mode );				\
-+   r200Fallback( rmesa->radeon.glCtx, bit, mode );				\
- } while (0)
- 
- extern void r200LightingSpaceChange( GLcontext *ctx );
-diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
-index 9e4677e..013064d 100644
---- a/src/mesa/drivers/dri/r200/r200_state_init.c
-+++ b/src/mesa/drivers/dri/r200/r200_state_init.c
-@@ -43,6 +43,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "tnl/t_pipeline.h"
- #include "swrast_setup/swrast_setup.h"
- 
-+#include "radeon_common.h"
-+#include "radeon_mipmap_tree.h"
- #include "r200_context.h"
- #include "r200_ioctl.h"
- #include "r200_state.h"
-@@ -52,31 +54,145 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "xmlpool.h"
- 
-+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
-+ * 1.3 cmdbuffers allow all previous state to be updated as well as
-+ * the tcl scalar and vector areas.
-+ */
-+static struct {
-+	int start;
-+	int len;
-+	const char *name;
-+} packet[RADEON_MAX_STATE_PACKETS] = {
-+	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
-+	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
-+	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
-+	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
-+	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
-+	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
-+	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
-+	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
-+	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
-+	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
-+	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
-+	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
-+	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
-+	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
-+	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
-+	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
-+	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
-+	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
-+	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
-+	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
-+	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
-+		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
-+	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
-+	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
-+	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
-+	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
-+	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
-+	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
-+	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
-+	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
-+	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
-+	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
-+	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
-+	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
-+	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
-+	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
-+	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
-+	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
-+	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
-+	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
-+	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
-+	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
-+	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
-+	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
-+	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
-+	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
-+	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
-+	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
-+	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
-+	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
-+	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
-+	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
-+	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
-+	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
-+	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
-+	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
-+	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
-+	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
-+	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
-+	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
-+	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
-+	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
-+	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
-+		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
-+	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
-+	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
-+	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
-+	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
-+	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
-+	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
-+	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
-+	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
-+	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
-+	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
-+	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
-+	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
-+	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
-+	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
-+	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
-+	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
-+	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
-+	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
-+	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
-+	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
-+	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
-+	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
-+	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
-+	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
-+	{R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
-+	{R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
-+	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
-+	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
-+	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
-+	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
-+	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
-+	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
-+	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
-+	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
-+};
-+
- /* =============================================================
-  * State initialization
-  */
- 
- void r200PrintDirty( r200ContextPtr rmesa, const char *msg )
- {
--   struct r200_state_atom *l;
-+   struct radeon_state_atom *l;
- 
-    fprintf(stderr, msg);
-    fprintf(stderr, ": ");
- 
--   foreach(l, &rmesa->hw.atomlist) {
--      if (l->dirty || rmesa->hw.all_dirty)
-+   foreach(l, &rmesa->radeon.hw.atomlist) {
-+      if (l->dirty || rmesa->radeon.hw.all_dirty)
- 	 fprintf(stderr, "%s, ", l->name);
-    }
- 
-    fprintf(stderr, "\n");
- }
- 
--static int cmdpkt( int id ) 
-+static int cmdpkt( r200ContextPtr rmesa, int id ) 
- {
-    drm_radeon_cmd_header_t h;
--   h.i = 0;
--   h.packet.cmd_type = RADEON_CMD_PACKET;
--   h.packet.packet_id = id;
-+
-+   if (rmesa->radeon.radeonScreen->kernel_mm) {
-+     return CP_PACKET0(packet[id].start, packet[id].len - 1);
-+   } else {
-+     h.i = 0;
-+     h.packet.cmd_type = RADEON_CMD_PACKET;
-+     h.packet.packet_id = id;
-+   }
-    return h.i;
- }
- 
-@@ -127,71 +243,353 @@ static int cmdscl2( int offset, int stride, int count )
- }
- 
- #define CHECK( NM, FLAG )				\
--static GLboolean check_##NM( GLcontext *ctx, int idx )	\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
- {							\
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   (void) idx;						\
-    (void) rmesa;					\
--   return FLAG;						\
-+   return (FLAG) ? atom->cmd_size : 0;			\
- }
- 
- #define TCL_CHECK( NM, FLAG )				\
--static GLboolean check_##NM( GLcontext *ctx, int idx )	\
--{							\
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   (void) idx;						\
--   return !rmesa->TclFallback && !ctx->VertexProgram._Enabled && (FLAG);	\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
-+{									\
-+   r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
-+   return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \
- }
- 
- #define TCL_OR_VP_CHECK( NM, FLAG )			\
--static GLboolean check_##NM( GLcontext *ctx, int idx )	\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
- {							\
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   (void) idx;						\
--   return !rmesa->TclFallback && (FLAG);		\
-+   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0;	\
- }
- 
- #define VP_CHECK( NM, FLAG )				\
--static GLboolean check_##NM( GLcontext *ctx, int idx )	\
--{							\
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   (void) idx;						\
--   return !rmesa->TclFallback && ctx->VertexProgram._Enabled && (FLAG);		\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
-+{									\
-+   r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
-+   (void) atom;								\
-+   return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \
- }
- 
--
- CHECK( always, GL_TRUE )
- CHECK( never, GL_FALSE )
- CHECK( tex_any, ctx->Texture._EnabledUnits )
- CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) );
--CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) )
--CHECK( tex, rmesa->state.texture.unit[idx].unitneeded )
-+CHECK( tex_pair, (rmesa->state.texture.unit[atom->idx].unitneeded | rmesa->state.texture.unit[atom->idx & ~1].unitneeded) )
-+CHECK( tex, rmesa->state.texture.unit[atom->idx].unitneeded )
- CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled )
--CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) )
-+   CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled) )
- CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) )
- CHECK( afs, ctx->ATIFragmentShader._Enabled )
--CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT )
-+CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT )
- TCL_CHECK( tcl_fog, ctx->Fog.Enabled )
- TCL_CHECK( tcl, GL_TRUE )
--TCL_CHECK( tcl_tex, rmesa->state.texture.unit[idx].unitneeded )
-+TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded )
- TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
--TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[idx].Enabled )
--TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << idx)) )
-+TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled )
-+TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))) )
- TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE )
- VP_CHECK( tcl_vp, GL_TRUE )
- VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 )
- VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 )
- 
-+#define OUT_VEC(hdr, data) do {			\
-+    drm_radeon_cmd_header_t h;					\
-+    h.i = hdr;								\
-+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
-+    OUT_BATCH(0);							\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
-+    OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));	\
-+    OUT_BATCH_TABLE((data), h.vectors.count);				\
-+  } while(0)
-+
-+#define OUT_VECLINEAR(hdr, data) do {			\
-+    drm_radeon_cmd_header_t h;					\
-+    uint32_t _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8);	\
-+    uint32_t _sz = h.veclinear.count * 4;				\
-+    h.i = hdr;								\
-+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
-+    OUT_BATCH(0);							\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
-+    OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));	\
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1));	\
-+    OUT_BATCH_TABLE((data), _sz);					\
-+  } while(0)
-+
-+#define OUT_SCL(hdr, data) do {					\
-+    drm_radeon_cmd_header_t h;						\
-+    h.i = hdr;								\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
-+    OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
-+    OUT_BATCH_TABLE((data), h.scalars.count);				\
-+  } while(0)
-+
-+#define OUT_SCL2(hdr, data) do {					\
-+    drm_radeon_cmd_header_t h;						\
-+    h.i = hdr;								\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
-+    OUT_BATCH((h.scalars.offset + 0x100) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
-+    OUT_BATCH_TABLE((data), h.scalars.count);				\
-+  } while(0)
-+
-+static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1));
-+   OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18));
-+   END_BATCH();
-+}
-+
-+static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
-+   OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
-+   END_BATCH();
-+}
-+
-+static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1);
-+   OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1);
-+   END_BATCH();
-+}
-+
-+static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VECLINEAR(atom->cmd[0], atom->cmd+1);
-+   END_BATCH();
-+}
-+
-+static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_SCL(atom->cmd[0], atom->cmd+1);
-+   END_BATCH();
-+}
-+
-+
-+static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VEC(atom->cmd[0], atom->cmd+1);
-+   END_BATCH();
-+}
-+
-+static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   struct radeon_renderbuffer *rrb;
-+   uint32_t cbpitch;
-+   uint32_t zbpitch;
-+   uint32_t dwords = atom->cmd_size;
-+   GLframebuffer *fb = r200->radeon.dri.drawable->driverPrivate;
-+
-+   /* output the first 7 bytes of context */
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords+2+2);
-+   OUT_BATCH_TABLE(atom->cmd, 5);
-+
-+   rrb = r200->radeon.state.depth.rrb;
-+   if (!rrb) {
-+     OUT_BATCH(0);
-+     OUT_BATCH(0);
-+   } else {
-+     zbpitch = (rrb->pitch / rrb->cpp);
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+     OUT_BATCH(zbpitch);
-+   }
-+     
-+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
-+   OUT_BATCH(atom->cmd[CTX_CMD_1]);
-+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
-+   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
-+
-+   rrb = r200->radeon.state.color.rrb;
-+   if (r200->radeon.radeonScreen->driScreen->dri2.enabled) {
-+      rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+   }
-+   if (!rrb || !rrb->bo) {
-+     OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
-+   } else {
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+   }
-+
-+   OUT_BATCH(atom->cmd[CTX_CMD_2]);
-+
-+   if (!rrb || !rrb->bo) {
-+     OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
-+   } else {
-+     cbpitch = (rrb->pitch / rrb->cpp);
-+     if (rrb->cpp == 4)
-+       ;
-+     else
-+       ;
-+     if (r200->radeon.sarea->tiling_enabled)
-+       cbpitch |= R200_COLOR_TILE_ENABLE;
-+     OUT_BATCH(cbpitch);
-+   }
-+
-+   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
-+     OUT_BATCH_TABLE((atom->cmd + 14), 4);
-+
-+   END_BATCH();
-+}
-+
-+static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   struct radeon_renderbuffer *rrb, *drb;
-+   uint32_t cbpitch = 0;
-+   uint32_t zbpitch = 0;
-+   uint32_t dwords = atom->cmd_size;
-+   GLframebuffer *fb = r200->radeon.dri.drawable->driverPrivate;
-+
-+   rrb = r200->radeon.state.color.rrb;
-+   if (r200->radeon.radeonScreen->driScreen->dri2.enabled) {
-+      rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+   }
-+   if (rrb) {
-+     assert(rrb->bo != NULL);
-+     cbpitch = (rrb->pitch / rrb->cpp);
-+     if (r200->radeon.sarea->tiling_enabled)
-+       cbpitch |= R200_COLOR_TILE_ENABLE;
-+   }
-+
-+   drb = r200->radeon.state.depth.rrb;
-+   if (drb)
-+     zbpitch = (drb->pitch / drb->cpp);
-+
-+   /* output the first 7 bytes of context */
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+
-+   /* In the CS case we need to split this up */
-+   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
-+   OUT_BATCH_TABLE((atom->cmd + 1), 4);
-+
-+   if (drb) {
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
-+     OUT_BATCH(zbpitch);
-+   }
-+
-+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
-+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
-+   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
-+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
-+   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
-+
-+
-+   if (rrb) {
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+   }
-+
-+   if (rrb) {
-+     if (rrb->cpp == 4)
-+       ;
-+     else
-+       ;
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
-+     OUT_BATCH(cbpitch);
-+   }
-+
-+   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
-+     OUT_BATCH_TABLE((atom->cmd + 14), 4);
-+   }
-+
-+   END_BATCH();
-+}
-+
-+static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+   int i = atom->idx;
-+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
-+
-+   if (t && t->mt && !t->image_override)
-+     dwords += 2;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_BATCH_TABLE(atom->cmd, 10);
-+   if (t && !t->image_override) {
-+     OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
-+		     RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+   } else if (!t) {
-+     /* workaround for old CS mechanism */
-+     OUT_BATCH(r200->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
-+   } else if (t->image_override)
-+     OUT_BATCH(t->override_offset);
-+
-+   END_BATCH();
-+}
-+
-+static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r200ContextPtr r200 = R200_CONTEXT(ctx);
-+   BATCH_LOCALS(&r200->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+   int i = atom->idx;
-+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
-+   GLuint size;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords + (2 * 5));
-+   OUT_BATCH_TABLE(atom->cmd, 3);
-+
-+   if (t && !t->image_override) {
-+     size = t->mt->totalsize / 6;
-+     OUT_BATCH_RELOC(0, t->mt->bo, size, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     OUT_BATCH_RELOC(0, t->mt->bo, size * 2, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     OUT_BATCH_RELOC(0, t->mt->bo, size * 3, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     OUT_BATCH_RELOC(0, t->mt->bo, size * 4, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     OUT_BATCH_RELOC(0, t->mt->bo, size * 5, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+   }
-+   END_BATCH();
-+}
- 
- /* Initialize the context's hardware state.
-  */
- void r200InitState( r200ContextPtr rmesa )
- {
--   GLcontext *ctx = rmesa->glCtx;
-+   GLcontext *ctx = rmesa->radeon.glCtx;
-    GLuint color_fmt, depth_fmt, i;
-    GLint drawPitch, drawOffset;
- 
--   switch ( rmesa->r200Screen->cpp ) {
-+   switch ( rmesa->radeon.radeonScreen->cpp ) {
-    case 2:
-       color_fmt = R200_COLOR_FORMAT_RGB565;
-       break;
-@@ -203,20 +601,20 @@ void r200InitState( r200ContextPtr rmesa )
-       exit( -1 );
-    }
- 
--   rmesa->state.color.clear = 0x00000000;
-+   rmesa->radeon.state.color.clear = 0x00000000;
- 
-    switch ( ctx->Visual.depthBits ) {
-    case 16:
--      rmesa->state.depth.clear = 0x0000ffff;
--      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
-+      rmesa->radeon.state.depth.clear = 0x0000ffff;
-+      rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffff;
-       depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z;
--      rmesa->state.stencil.clear = 0x00000000;
-+      rmesa->radeon.state.stencil.clear = 0x00000000;
-       break;
-    case 24:
--      rmesa->state.depth.clear = 0x00ffffff;
--      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
-+      rmesa->radeon.state.depth.clear = 0x00ffffff;
-+      rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffffff;
-       depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z;
--      rmesa->state.stencil.clear = 0xffff0000;
-+      rmesa->radeon.state.stencil.clear = 0xffff0000;
-       break;
-    default:
-       fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
-@@ -225,52 +623,57 @@ void r200InitState( r200ContextPtr rmesa )
-    }
- 
-    /* Only have hw stencil when depth buffer is 24 bits deep */
--   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
-+   rmesa->radeon.state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
- 				     ctx->Visual.depthBits == 24 );
- 
--   rmesa->Fallback = 0;
-+   rmesa->radeon.Fallback = 0;
- 
--   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
--      drawOffset = rmesa->r200Screen->backOffset;
--      drawPitch  = rmesa->r200Screen->backPitch;
-+   if ( ctx->Visual.doubleBufferMode && rmesa->radeon.sarea->pfCurrentPage == 0 ) {
-+      drawOffset = rmesa->radeon.radeonScreen->backOffset;
-+      drawPitch  = rmesa->radeon.radeonScreen->backPitch;
-    } else {
--      drawOffset = rmesa->r200Screen->frontOffset;
--      drawPitch  = rmesa->r200Screen->frontPitch;
-+      drawOffset = rmesa->radeon.radeonScreen->frontOffset;
-+      drawPitch  = rmesa->radeon.radeonScreen->frontPitch;
-    }
- #if 000
-    if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
--      rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
--      rmesa->state.color.drawPitch  = rmesa->r200Screen->backPitch;
-+      rmesa->radeon.state.color.drawOffset = rmesa->radeon.radeonScreen->backOffset;
-+      rmesa->radeon.state.color.drawPitch  = rmesa->radeon.radeonScreen->backPitch;
-    } else {
--      rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
--      rmesa->state.color.drawPitch  = rmesa->r200Screen->frontPitch;
-+      rmesa->radeon.state.color.drawOffset = rmesa->radeon.radeonScreen->frontOffset;
-+      rmesa->radeon.state.color.drawPitch  = rmesa->radeon.radeonScreen->frontPitch;
-    }
- 
--   rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
--   rmesa->state.pixel.readPitch  = rmesa->state.color.drawPitch;
-+   rmesa->state.pixel.readOffset = rmesa->radeon.state.color.drawOffset;
-+   rmesa->state.pixel.readPitch  = rmesa->radeon.state.color.drawPitch;
- #endif
- 
--   rmesa->hw.max_state_size = 0;
-+   rmesa->radeon.hw.max_state_size = 0;
- 
- #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX )				\
-    do {								\
-       rmesa->hw.ATOM.cmd_size = SZ;				\
--      rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));	\
--      rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));	\
-+      rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
-+      rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
-       rmesa->hw.ATOM.name = NM;					\
-       rmesa->hw.ATOM.idx = IDX;					\
-       rmesa->hw.ATOM.check = check_##CHK;			\
-       rmesa->hw.ATOM.dirty = GL_FALSE;				\
--      rmesa->hw.max_state_size += SZ * sizeof(int);		\
-+      rmesa->radeon.hw.max_state_size += SZ * sizeof(int);		\
-    } while (0)
- 
- 
-    /* Allocate state buffers:
-     */
--   if (rmesa->r200Screen->drmSupportsBlendColor)
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
-       ALLOC_STATE( ctx, always, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
-    else
-       ALLOC_STATE( ctx, always, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 );
-+
-+   if (rmesa->radeon.radeonScreen->kernel_mm)
-+     rmesa->hw.ctx.emit = ctx_emit_cs;
-+   else
-+     rmesa->hw.ctx.emit = ctx_emit;
-    ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
-    ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
-    ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
-@@ -282,8 +685,8 @@ void r200InitState( r200ContextPtr rmesa )
-    ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
-    ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
-    ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
--   if (rmesa->r200Screen->drmSupportsFragShader) {
--      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
-+      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
-       /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
- 	 ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
- 	 ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
-@@ -303,7 +706,7 @@ void r200InitState( r200ContextPtr rmesa )
-       ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
-    }
-    else {
--      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
-+      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
- 	 ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
- 	 ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
- 	 ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
-@@ -321,13 +724,18 @@ void r200InitState( r200ContextPtr rmesa )
-       ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
-       ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
-    }
--   if (rmesa->r200Screen->drmSupportsCubeMapsR200) {
-+
-+   for (i = 0; i < 5; i++)
-+     rmesa->hw.tex[i].emit = tex_emit;
-+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) {
-       ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
-       ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
-       ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 );
-       ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
-       ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
-       ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
-+      for (i = 0; i < 5; i++)
-+	rmesa->hw.cube[i].emit = cube_emit;
-    }
-    else {
-       ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
-@@ -337,7 +745,8 @@ void r200InitState( r200ContextPtr rmesa )
-       ALLOC_STATE( cube[4], never, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
-       ALLOC_STATE( cube[5], never, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
-    }
--   if (rmesa->r200Screen->drmSupportsVertexProgram) {
-+
-+   if (rmesa->radeon.radeonScreen->drmSupportsVertexProgram) {
-       ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
-       ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
-       ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
-@@ -390,13 +799,13 @@ void r200InitState( r200ContextPtr rmesa )
-    ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
-    ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 );
-    ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 );
--   if (rmesa->r200Screen->drmSupportsTriPerf) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsTriPerf) {
-       ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
-    }
-    else {
-       ALLOC_STATE( prf, never, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
-    }
--   if (rmesa->r200Screen->drmSupportsPointSprites) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) {
-       ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
-       ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 );
-    }
-@@ -409,87 +818,115 @@ void r200InitState( r200ContextPtr rmesa )
- 
-    /* Fill in the packet headers:
-     */
--   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
--   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
--   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
--   if (rmesa->r200Screen->drmSupportsBlendColor)
--      rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(R200_EMIT_RB3D_BLENDCOLOR);
--   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
--   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
--   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
--   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
--   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
--   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
--   rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(R200_EMIT_PP_CNTL_X);
--   rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(R200_EMIT_RB3D_DEPTHXY_OFFSET);
--   rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(R200_EMIT_RE_AUX_SCISSOR_CNTL);
--   rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(R200_EMIT_RE_SCISSOR_TL_0);
--   rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(R200_EMIT_SE_VAP_CNTL_STATUS);
--   rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE);
--   rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
--   rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3);
--   rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0);
--   if (rmesa->r200Screen->drmSupportsFragShader) {
--      rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR);
--      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0);
--      rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
--      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1);
--      rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
--      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2);
--      rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
--      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3);
--      rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
--      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4);
--      rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
--      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5);
--      rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
-+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
-+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
-+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
-+      rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(rmesa, R200_EMIT_RB3D_BLENDCOLOR);
-+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
-+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
-+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
-+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
-+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
-+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
-+   rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CNTL_X);
-+   rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(rmesa, R200_EMIT_RB3D_DEPTHXY_OFFSET);
-+   rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(rmesa, R200_EMIT_RE_AUX_SCISSOR_CNTL);
-+   rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(rmesa, R200_EMIT_RE_SCISSOR_TL_0);
-+   rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(rmesa, R200_EMIT_SE_VAP_CNTL_STATUS);
-+   rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(rmesa, R200_EMIT_RE_POINTSIZE);
-+   rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(rmesa, R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
-+   rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TAM_DEBUG3);
-+   rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(rmesa, R200_EMIT_TFACTOR_0);
-+   if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
-+      rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(rmesa, R200_EMIT_ATF_TFACTOR);
-+      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_0);
-+      rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
-+      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_1);
-+      rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
-+      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_2);
-+      rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
-+      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_3);
-+      rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
-+      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_4);
-+      rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
-+      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_5);
-+      rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
-    } else {
--      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
--      rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
--      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
--      rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
--      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2);
--      rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
--      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3);
--      rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
--      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4);
--      rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
--      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5);
--      rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
--   }
--   rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0);
--   rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1);
--   rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(R200_EMIT_VAP_PVS_CNTL);
--   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0);
--   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0);
--   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1);
--   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_1);
--   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_2);
--   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_2);
--   rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_3);
--   rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_3);
--   rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_4);
--   rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_4);
--   rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_5);
--   rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_5);
--   rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_0);
--   rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_1);
--   rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_2);
--   rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_3);
--   rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_4);
--   rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_5);
--   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
--   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
--   rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
--   rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(R200_EMIT_TEX_PROC_CTL_2);
--   rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(R200_EMIT_MATRIX_SELECT_0);
--   rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(R200_EMIT_VAP_CTL);
--   rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(R200_EMIT_VTX_FMT_0);
--   rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(R200_EMIT_OUTPUT_VTX_COMP_SEL);
--   rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(R200_EMIT_SE_VTX_STATE_CNTL);
--   rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(R200_EMIT_VTE_CNTL);
--   rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(R200_EMIT_PP_TRI_PERF_CNTL);
--   rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(R200_EMIT_TCL_POINT_SPRITE_CNTL);
-+      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_0);
-+      rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
-+      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_1);
-+      rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
-+      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_2);
-+      rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
-+      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_3);
-+      rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
-+      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_4);
-+      rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
-+      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_5);
-+      rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
-+   }
-+   rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_0);
-+   rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_1);
-+   rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_PVS_CNTL);
-+   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_0);
-+   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_0);
-+   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_1);
-+   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_1);
-+   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_2);
-+   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_2);
-+   rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_3);
-+   rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_3);
-+   rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_4);
-+   rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_4);
-+   rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_5);
-+   rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_5);
-+   rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_0);
-+   rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_1);
-+   rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_2);
-+   rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_3);
-+   rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_4);
-+   rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_5);
-+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
-+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
-+   rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(rmesa, R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
-+   rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(rmesa, R200_EMIT_TEX_PROC_CTL_2);
-+   rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(rmesa, R200_EMIT_MATRIX_SELECT_0);
-+   rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_CTL);
-+   rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTX_FMT_0);
-+   rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(rmesa, R200_EMIT_OUTPUT_VTX_COMP_SEL);
-+   rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(rmesa, R200_EMIT_SE_VTX_STATE_CNTL);
-+   rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL);
-+   rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL);
-+   rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL);
-+   if (rmesa->radeon.radeonScreen->kernel_mm) {
-+	rmesa->hw.mtl[0].emit = mtl_emit;
-+	rmesa->hw.mtl[1].emit = mtl_emit;
-+
-+	rmesa->hw.vpi[0].emit = veclinear_emit;
-+	rmesa->hw.vpi[1].emit = veclinear_emit;
-+	rmesa->hw.vpp[0].emit = veclinear_emit;
-+	rmesa->hw.vpp[1].emit = veclinear_emit;
-+
-+	rmesa->hw.grd.emit = scl_emit;
-+	rmesa->hw.fog.emit = vec_emit;
-+	rmesa->hw.glt.emit = vec_emit;
-+	rmesa->hw.eye.emit = vec_emit;
-+
-+	for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++)
-+	  rmesa->hw.mat[i].emit = vec_emit;
-+
-+	for (i = 0; i < 8; i++)
-+	  rmesa->hw.lit[i].emit = lit_emit;
-+
-+	for (i = 0; i < 6; i++)
-+	  rmesa->hw.ucp[i].emit = vec_emit;
-+
-+	rmesa->hw.ptp.emit = ptp_emit;
-+   }
-+
-+
-+   
-    rmesa->hw.mtl[0].cmd[MTL_CMD_0] = 
-       cmdvec( R200_VS_MAT_0_EMISS, 1, 16 );
-    rmesa->hw.mtl[0].cmd[MTL_CMD_1] = 
-@@ -567,7 +1004,7 @@ void r200InitState( r200ContextPtr rmesa )
- 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
- 				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
- 
--   if (rmesa->r200Screen->drmSupportsBlendColor) {
-+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
-       rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000;
-       rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
- 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
-@@ -578,10 +1015,10 @@ void r200InitState( r200ContextPtr rmesa )
-    }
- 
-    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
--      rmesa->r200Screen->depthOffset + rmesa->r200Screen->fbLocation;
-+      rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation;
- 
-    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
--      ((rmesa->r200Screen->depthPitch &
-+      ((rmesa->radeon.radeonScreen->depthPitch &
- 	R200_DEPTHPITCH_MASK) |
-        R200_DEPTH_ENDIAN_NO_SWAP);
-    
-@@ -599,7 +1036,7 @@ void r200InitState( r200ContextPtr rmesa )
-    if (rmesa->using_hyperz) {
-       rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE |
- 						  R200_Z_DECOMPRESSION_ENABLE;
--/*      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200)
-+/*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
- 	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
-    }
- 
-@@ -607,7 +1044,7 @@ void r200InitState( r200ContextPtr rmesa )
-  				     | R200_TEX_BLEND_0_ENABLE);
- 
-    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = color_fmt;
--   switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) {
-+   switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
-    case DRI_CONF_DITHER_XERRORDIFFRESET:
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT;
-       break;
-@@ -615,28 +1052,28 @@ void r200InitState( r200ContextPtr rmesa )
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE;
-       break;
-    }
--   if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) ==
-+   if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
- 	DRI_CONF_ROUND_ROUND )
--      rmesa->state.color.roundEnable = R200_ROUND_ENABLE;
-+      rmesa->radeon.state.color.roundEnable = R200_ROUND_ENABLE;
-    else
--      rmesa->state.color.roundEnable = 0;
--   if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) ==
-+      rmesa->radeon.state.color.roundEnable = 0;
-+   if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
- 	DRI_CONF_COLOR_REDUCTION_DITHER )
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE;
-    else
--      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
-+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
- 
- #if 000
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->state.color.drawOffset +
--					       rmesa->r200Screen->fbLocation)
-+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->radeon.state.color.drawOffset +
-+					       rmesa->radeon.radeonScreen->fbLocation)
- 					      & R200_COLOROFFSET_MASK);
- 
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch &
-+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->radeon.state.color.drawPitch &
- 					      R200_COLORPITCH_MASK) |
- 					     R200_COLOR_ENDIAN_NO_SWAP);
- #else
-    rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
--					       rmesa->r200Screen->fbLocation)
-+					       rmesa->radeon.radeonScreen->fbLocation)
- 					      & R200_COLOROFFSET_MASK);
- 
-    rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
-@@ -644,12 +1081,12 @@ void r200InitState( r200ContextPtr rmesa )
- 					     R200_COLOR_ENDIAN_NO_SWAP);
- #endif
-    /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
--   if (rmesa->sarea->tiling_enabled) {
-+   if (rmesa->radeon.sarea->tiling_enabled) {
-       rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
-    }
- 
-    rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK * 
--			driQueryOptionf (&rmesa->optionCache,"texture_blend_quality");
-+			driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality");
-    rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0;
- 
-    rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW |
-@@ -704,7 +1141,7 @@ void r200InitState( r200ContextPtr rmesa )
- 						R200_VC_NO_SWAP;
- #endif
- 
--   if (!(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) {
-+   if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
-       /* Bypass TCL */
-       rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8);
-    }
-@@ -743,28 +1180,28 @@ void r200InitState( r200ContextPtr rmesa )
-       rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
-          (/* R200_TEXCOORD_PROJ | */
-           0x100000);	/* Small default bias */
--      if (rmesa->r200Screen->drmSupportsFragShader) {
-+      if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
- 	 rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
--	     rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	     rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
- 	 rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
- 	 rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
-       }
-       else {
- 	  rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] =
--	     rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	     rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-      }
- 
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =
--         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] =
--         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] =
--         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] =
--         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] =
--         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
- 
-       rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] =
-          (R200_TXC_ARG_A_ZERO |
-@@ -967,5 +1404,7 @@ void r200InitState( r200ContextPtr rmesa )
- 
-    r200LightingSpaceChange( ctx );
- 
--   rmesa->hw.all_dirty = GL_TRUE;
-+   rmesa->radeon.hw.all_dirty = GL_TRUE;
-+
-+   rcommonInitCmdBuf(&rmesa->radeon);
- }
-diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
-index b25f028..b006409 100644
---- a/src/mesa/drivers/dri/r200/r200_swtcl.c
-+++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
-@@ -55,27 +55,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r200_tcl.h"
- 
- 
--static void flush_last_swtcl_prim( r200ContextPtr rmesa  );
--
--
- /***********************************************************************
-  *                         Initialization 
-  ***********************************************************************/
- 
- #define EMIT_ATTR( ATTR, STYLE, F0 )					\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
-    fmt_0 |= F0;								\
- } while (0)
- 
- #define EMIT_PAD( N )							\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
- } while (0)
- 
- static void r200SetVertexFormat( GLcontext *ctx )
-@@ -100,7 +97,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
-    }
- 
-    assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
--   rmesa->swtcl.vertex_attr_count = 0;
-+   rmesa->radeon.swtcl.vertex_attr_count = 0;
- 
-    /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
-     * build up a hardware vertex.
-@@ -185,7 +182,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
-       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_SPEC_ALPHA;
-    }
- 
--   if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) ||
-+   if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
- 	(rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) ||
- 	(rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
-       R200_NEWPRIM(rmesa);
-@@ -193,26 +190,20 @@ static void r200SetVertexFormat( GLcontext *ctx )
-       rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0;
-       rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1;
- 
--      rmesa->swtcl.vertex_size =
-+      rmesa->radeon.swtcl.vertex_size =
- 	  _tnl_install_attrs( ctx,
--			      rmesa->swtcl.vertex_attrs, 
--			      rmesa->swtcl.vertex_attr_count,
-+			      rmesa->radeon.swtcl.vertex_attrs, 
-+			      rmesa->radeon.swtcl.vertex_attr_count,
- 			      NULL, 0 );
--      rmesa->swtcl.vertex_size /= 4;
--      RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-+      rmesa->radeon.swtcl.vertex_size /= 4;
-+      RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
-    }
- }
- 
- 
- static void r200RenderStart( GLcontext *ctx )
- {
--   r200ContextPtr rmesa = R200_CONTEXT( ctx );
--
-    r200SetVertexFormat( ctx );
--
--   if (rmesa->dma.flush != 0 && 
--       rmesa->dma.flush != flush_last_swtcl_prim)
--      rmesa->dma.flush( rmesa );
- }
- 
- 
-@@ -232,7 +223,7 @@ void r200ChooseVertexState( GLcontext *ctx )
-     * rasterization fallback.  As this function will be called again when we
-     * leave a rasterization fallback, we can just skip it for now.
-     */
--   if (rmesa->Fallback != 0)
-+   if (rmesa->radeon.Fallback != 0)
-       return;
- 
-    vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
-@@ -273,78 +264,27 @@ void r200ChooseVertexState( GLcontext *ctx )
-    }
- }
- 
--
--/* Flush vertices in the current dma region.
-- */
--static void flush_last_swtcl_prim( r200ContextPtr rmesa  )
--{
--   if (R200_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   rmesa->dma.flush = NULL;
--
--   if (rmesa->dma.current.buf) {
--      struct r200_dma_region *current = &rmesa->dma.current;
--      GLuint current_offset = (rmesa->r200Screen->gart_buffer_offset +
--			       current->buf->buf->idx * RADEON_BUFFER_SIZE + 
--			       current->start);
--
--      assert (!(rmesa->swtcl.hw_primitive & R200_VF_PRIM_WALK_IND));
--
--      assert (current->start + 
--	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--	      current->ptr);
--
--      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
--	 r200EnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
--			        rmesa->hw.max_state_size + VBUF_BUFSZ );
--	 r200EmitVertexAOS( rmesa,
--			      rmesa->swtcl.vertex_size,
--			      current_offset);
--
--	 r200EmitVbufPrim( rmesa,
--			   rmesa->swtcl.hw_primitive,
--			   rmesa->swtcl.numverts);
--      }
--
--      rmesa->swtcl.numverts = 0;
--      current->start = current->ptr;
--   }
--}
--
--
--/* Alloc space in the current dma region.
-- */
--static INLINE void *
--r200AllocDmaLowVerts( r200ContextPtr rmesa, int nverts, int vsize )
-+void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
- {
--   GLuint bytes = vsize * nverts;
--
--   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
--      r200RefillCurrentDmaRegion( rmesa );
--
--   if (!rmesa->dma.flush) {
--      rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
--      rmesa->dma.flush = flush_last_swtcl_prim;
--   }
-+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-+   rcommonEnsureCmdBufSpace(&rmesa->radeon,
-+			    rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
-+			    __FUNCTION__);
- 
--   ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
--   ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
--   ASSERT( rmesa->dma.current.start + 
--	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--	   rmesa->dma.current.ptr );
- 
-+   radeonEmitState(&rmesa->radeon);
-+   r200EmitVertexAOS( rmesa,
-+		      rmesa->radeon.swtcl.vertex_size,
-+		      rmesa->radeon.dma.current,
-+		      current_offset);
- 
--   {
--      GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
--      rmesa->dma.current.ptr += bytes;
--      rmesa->swtcl.numverts += nverts;
--      return head;
--   }
-+		      
-+   r200EmitVbufPrim( rmesa,
-+		     rmesa->radeon.swtcl.hw_primitive,
-+		     rmesa->radeon.swtcl.numverts);
- 
- }
- 
--
- /**************************************************************************/
- 
- 
-@@ -392,13 +332,13 @@ static void r200ResetLineStipple( GLcontext *ctx );
- #undef LOCAL_VARS
- #undef ALLOC_VERTS
- #define CTX_ARG r200ContextPtr rmesa
--#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
--#define ALLOC_VERTS( n, size ) r200AllocDmaLowVerts( rmesa, n, size * 4 )
-+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
-+#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 )
- #define LOCAL_VARS						\
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   const char *r200verts = (char *)rmesa->swtcl.verts;
--#define VERT(x) (r200Vertex *)(r200verts + ((x) * vertsize * sizeof(int)))
--#define VERTEX r200Vertex 
-+   const char *r200verts = (char *)rmesa->radeon.swtcl.verts;
-+#define VERT(x) (radeonVertex *)(r200verts + ((x) * vertsize * sizeof(int)))
-+#define VERTEX radeonVertex 
- #define DO_DEBUG_VERTS (1 && (R200_DEBUG & DEBUG_VERTS))
- 
- #undef TAG
-@@ -456,11 +396,11 @@ static struct {
- #define VERT_Y(_v) _v->v.y
- #define VERT_Z(_v) _v->v.z
- #define AREA_IS_CCW( a ) (a < 0)
--#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
-+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
- 
- #define VERT_SET_RGBA( v, c )  					\
- do {								\
--   r200_color_t *color = (r200_color_t *)&((v)->ui[coloroffset]);	\
-+   radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]);	\
-    UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
-    UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
-    UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
-@@ -472,7 +412,7 @@ do {								\
- #define VERT_SET_SPEC( v, c )					\
- do {								\
-    if (specoffset) {						\
--      r200_color_t *spec = (r200_color_t *)&((v)->ui[specoffset]);	\
-+      radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]);	\
-       UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);	\
-       UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);	\
-       UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);	\
-@@ -481,8 +421,8 @@ do {								\
- #define VERT_COPY_SPEC( v0, v1 )			\
- do {							\
-    if (specoffset) {					\
--      r200_color_t *spec0 = (r200_color_t *)&((v0)->ui[specoffset]);	\
--      r200_color_t *spec1 = (r200_color_t *)&((v1)->ui[specoffset]);	\
-+      radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]);	\
-+      radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]);	\
-       spec0->red   = spec1->red;	\
-       spec0->green = spec1->green;	\
-       spec0->blue  = spec1->blue; 	\
-@@ -513,7 +453,7 @@ do {							\
-  ***********************************************************************/
- 
- #define RASTERIZE(x) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, x) )
--#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
-+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
- #undef TAG
- #define TAG(x) x
- #include "tnl_dd/t_dd_unfilled.h"
-@@ -569,8 +509,8 @@ static void init_rast_tab( void )
- #undef LOCAL_VARS
- #define LOCAL_VARS						\
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
--   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
--   const char *r200verts = (char *)rmesa->swtcl.verts;		\
-+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
-+   const char *r200verts = (char *)rmesa->radeon.swtcl.verts;		\
-    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
-    const GLboolean stipple = ctx->Line.StippleFlag;		\
-    (void) elt; (void) stipple;
-@@ -599,13 +539,13 @@ void r200ChooseRenderState( GLcontext *ctx )
-    GLuint index = 0;
-    GLuint flags = ctx->_TriangleCaps;
- 
--   if (!rmesa->TclFallback || rmesa->Fallback) 
-+   if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) 
-       return;
- 
-    if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R200_TWOSIDE_BIT;
-    if (flags & DD_TRI_UNFILLED)      index |= R200_UNFILLED_BIT;
- 
--   if (index != rmesa->swtcl.RenderIndex) {
-+   if (index != rmesa->radeon.swtcl.RenderIndex) {
-       tnl->Driver.Render.Points = rast_tab[index].points;
-       tnl->Driver.Render.Line = rast_tab[index].line;
-       tnl->Driver.Render.ClippedLine = rast_tab[index].line;
-@@ -622,7 +562,7 @@ void r200ChooseRenderState( GLcontext *ctx )
- 	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
-       }
- 
--      rmesa->swtcl.RenderIndex = index;
-+      rmesa->radeon.swtcl.RenderIndex = index;
-    }
- }
- 
-@@ -636,7 +576,7 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
- 
--   if (rmesa->swtcl.hw_primitive != hwprim) {
-+   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
-       /* need to disable perspective-correct texturing for point sprites */
-       if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) {
- 	 if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) {
-@@ -649,14 +589,14 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
- 	 rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE;
-       }
-       R200_NEWPRIM( rmesa );
--      rmesa->swtcl.hw_primitive = hwprim;
-+      rmesa->radeon.swtcl.hw_primitive = hwprim;
-    }
- }
- 
- static void r200RenderPrimitive( GLcontext *ctx, GLenum prim )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   rmesa->swtcl.render_primitive = prim;
-+   rmesa->radeon.swtcl.render_primitive = prim;
-    if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
-       r200RasterPrimitive( ctx, reduced_hw_prim(ctx, prim) );
- }
-@@ -701,15 +641,15 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
--   GLuint oldfallback = rmesa->Fallback;
-+   GLuint oldfallback = rmesa->radeon.Fallback;
- 
-    if (mode) {
--      rmesa->Fallback |= bit;
-+      rmesa->radeon.Fallback |= bit;
-       if (oldfallback == 0) {
--	 R200_FIREVERTICES( rmesa );
-+	 radeon_firevertices(&rmesa->radeon);
- 	 TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE );
- 	 _swsetup_Wakeup( ctx );
--	 rmesa->swtcl.RenderIndex = ~0;
-+	 rmesa->radeon.swtcl.RenderIndex = ~0;
-          if (R200_DEBUG & DEBUG_FALLBACKS) {
-             fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n",
-                     bit, getFallbackString(bit));
-@@ -717,7 +657,7 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
-       }
-    }
-    else {
--      rmesa->Fallback &= ~bit;
-+      rmesa->radeon.Fallback &= ~bit;
-       if (oldfallback == bit) {
- 
- 	 _swrast_flush( ctx );
-@@ -731,14 +671,14 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- 
- 	 tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple;
- 	 TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_FALSE );
--	 if (rmesa->TclFallback) {
--	    /* These are already done if rmesa->TclFallback goes to
-+	 if (rmesa->radeon.TclFallback) {
-+	    /* These are already done if rmesa->radeon.TclFallback goes to
- 	     * zero above. But not if it doesn't (R200_NO_TCL for
- 	     * example?)
- 	     */
- 	    _tnl_invalidate_vertex_state( ctx, ~0 );
- 	    _tnl_invalidate_vertices( ctx, ~0 );
--	    RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
-+	    RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
- 	    r200ChooseVertexState( ctx );
- 	    r200ChooseRenderState( ctx );
- 	 }
-@@ -772,7 +712,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    const GLfloat *rc = ctx->Current.RasterColor; 
-    GLint row, col;
--   r200Vertex vert;
-+   radeonVertex vert;
-    GLuint orig_vte;
-    GLuint h;
- 
-@@ -794,7 +734,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
-       vte |= R200_VTX_W0_FMT;
-       vap &= ~R200_VAP_FORCE_W_TO_ONE;
- 
--      rmesa->swtcl.vertex_size = 5;
-+      rmesa->radeon.swtcl.vertex_size = 5;
- 
-       if ( (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0)
- 	   || (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
-@@ -871,10 +811,10 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
- 
-    /* Update window height
-     */
--   LOCK_HARDWARE( rmesa );
--   UNLOCK_HARDWARE( rmesa );
--   h = rmesa->dri.drawable->h + rmesa->dri.drawable->y;
--   px += rmesa->dri.drawable->x;
-+   LOCK_HARDWARE( &rmesa->radeon );
-+   UNLOCK_HARDWARE( &rmesa->radeon );
-+   h = rmesa->radeon.dri.drawable->h + rmesa->radeon.dri.drawable->y;
-+   px += rmesa->radeon.dri.drawable->x;
- 
-    /* Clipping handled by existing mechansims in r200_ioctl.c?
-     */
-@@ -929,7 +869,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
- 
-    /* Need to restore vertexformat?
-     */
--   if (rmesa->TclFallback)
-+   if (rmesa->radeon.TclFallback)
-       r200ChooseVertexState( ctx );
- }
- 
-@@ -962,17 +902,13 @@ void r200InitSwtcl( GLcontext *ctx )
-    _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
- 		       36 * sizeof(GLfloat) );
-    
--   rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
--   rmesa->swtcl.RenderIndex = ~0;
--   rmesa->swtcl.render_primitive = GL_TRIANGLES;
--   rmesa->swtcl.hw_primitive = 0;
-+   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
-+   rmesa->radeon.swtcl.RenderIndex = ~0;
-+   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
-+   rmesa->radeon.swtcl.hw_primitive = 0;
- }
- 
- 
- void r200DestroySwtcl( GLcontext *ctx )
- {
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if (rmesa->swtcl.indexed_verts.buf) 
--      r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ );
- }
-diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.h b/src/mesa/drivers/dri/r200/r200_swtcl.h
-index 8c29fd0..a4051a4 100644
---- a/src/mesa/drivers/dri/r200/r200_swtcl.h
-+++ b/src/mesa/drivers/dri/r200/r200_swtcl.h
-@@ -52,15 +52,11 @@ extern void r200BuildVertices( GLcontext *ctx, GLuint start, GLuint count,
- extern void r200PrintSetupFlags(char *msg, GLuint flags );
- 
- 
--extern void r200_emit_indexed_verts( GLcontext *ctx,
--				       GLuint start,
--				       GLuint count );
--
- extern void r200_translate_vertex( GLcontext *ctx, 
--				     const r200Vertex *src, 
-+				     const radeonVertex *src, 
- 				     SWvertex *dst );
- 
--extern void r200_print_vertex( GLcontext *ctx, const r200Vertex *v );
-+extern void r200_print_vertex( GLcontext *ctx, const radeonVertex *v );
- 
- extern void r200_import_float_colors( GLcontext *ctx );
- extern void r200_import_float_spec_colors( GLcontext *ctx );
-@@ -70,5 +66,5 @@ extern void r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
- 			      const struct gl_pixelstore_attrib *unpack,
- 			      const GLubyte *bitmap );
- 
--
-+void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
- #endif
-diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
-index 99aecfe..8e0fb14 100644
---- a/src/mesa/drivers/dri/r200/r200_tcl.c
-+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
-@@ -123,7 +123,7 @@ static GLboolean discrete_prim[0x10] = {
- 
- #define RESET_STIPPLE() do {			\
-    R200_STATECHANGE( rmesa, lin );		\
--   r200EmitState( rmesa );			\
-+   radeonEmitState(&rmesa->radeon);			\
- } while (0)
- 
- #define AUTO_STIPPLE( mode )  do {		\
-@@ -134,7 +134,7 @@ static GLboolean discrete_prim[0x10] = {
-    else						\
-       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
- 	 ~R200_LINE_PATTERN_AUTO_RESET;	\
--   r200EmitState( rmesa );			\
-+   radeonEmitState(&rmesa->radeon);			\
- } while (0)
- 
- 
-@@ -142,25 +142,23 @@ static GLboolean discrete_prim[0x10] = {
- 
- static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) 
- {
--   if (rmesa->dma.flush == r200FlushElts &&
--       rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) {
-+   if (rmesa->radeon.dma.flush == r200FlushElts &&
-+       rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) {
- 
--      GLushort *dest = (GLushort *)(rmesa->store.cmd_buf +
--				    rmesa->store.cmd_used);
-+      GLushort *dest = (GLushort *)(rmesa->tcl.elt_dma_bo->ptr +
-+				    rmesa->tcl.elt_used);
- 
--      rmesa->store.cmd_used += nr*2;
-+      rmesa->tcl.elt_used += nr*2;
- 
-       return dest;
-    }
-    else {
--      if (rmesa->dma.flush)
--	 rmesa->dma.flush( rmesa );
-+      if (rmesa->radeon.dma.flush)
-+	 rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- 
--      r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
--			     rmesa->hw.max_state_size + ELTS_BUFSZ(nr) );
-+      rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->tcl.nr_aos_components), __FUNCTION__);
- 
-       r200EmitAOS( rmesa,
--		   rmesa->tcl.aos_components,
- 		   rmesa->tcl.nr_aos_components, 0 );
- 
-       return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr );
-@@ -188,13 +186,14 @@ static void r200EmitPrim( GLcontext *ctx,
-    r200ContextPtr rmesa = R200_CONTEXT( ctx );
-    r200TclPrimitive( ctx, prim, hwprim );
-    
--   r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
--			  rmesa->hw.max_state_size + VBUF_BUFSZ );
-+   //   fprintf(stderr,"Emit prim %d\n", rmesa->tcl.nr_aos_components);
-+   rcommonEnsureCmdBufSpace( &rmesa->radeon,
-+			     AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
-+			     rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ );
- 
-    r200EmitAOS( rmesa,
--		  rmesa->tcl.aos_components,
--		  rmesa->tcl.nr_aos_components,
--		  start );
-+		rmesa->tcl.nr_aos_components,
-+		start );
-    
-    /* Why couldn't this packet have taken an offset param?
-     */
-@@ -394,7 +393,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
- 
-    /* TODO: separate this from the swtnl pipeline 
-     */
--   if (rmesa->TclFallback)
-+   if (rmesa->radeon.TclFallback)
-       return GL_TRUE;	/* fallback to software t&l */
- 
-    if (R200_DEBUG & DEBUG_PRIMS)
-@@ -405,8 +404,9 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
- 
-    /* Validate state:
-     */
--   if (rmesa->NewGLState)
--      r200ValidateState( ctx );
-+   if (rmesa->radeon.NewGLState)
-+      if (!r200ValidateState( ctx ))
-+         return GL_TRUE; /* fallback to sw t&l */
- 
-    if (!ctx->VertexProgram._Enabled) {
-    /* NOTE: inputs != tnl->render_inputs - these are the untransformed
-@@ -565,15 +565,11 @@ static void transition_to_hwtnl( GLcontext *ctx )
- 
-    tnl->Driver.NotifyMaterialChange = r200UpdateMaterial;
- 
--   if ( rmesa->dma.flush )			
--      rmesa->dma.flush( rmesa );	
-+   if ( rmesa->radeon.dma.flush )			
-+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	
- 
--   rmesa->dma.flush = NULL;
-+   rmesa->radeon.dma.flush = NULL;
-    
--   if (rmesa->swtcl.indexed_verts.buf) 
--      r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
--			      __FUNCTION__ );
--
-    R200_STATECHANGE( rmesa, vap );
-    rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE;
-    rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE;
-@@ -631,10 +627,10 @@ static char *getFallbackString(GLuint bit)
- void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   GLuint oldfallback = rmesa->TclFallback;
-+   GLuint oldfallback = rmesa->radeon.TclFallback;
- 
-    if (mode) {
--      rmesa->TclFallback |= bit;
-+      rmesa->radeon.TclFallback |= bit;
-       if (oldfallback == 0) {
- 	 if (R200_DEBUG & DEBUG_FALLBACKS) 
- 	    fprintf(stderr, "R200 begin tcl fallback %s\n",
-@@ -643,7 +639,7 @@ void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
-       }
-    }
-    else {
--      rmesa->TclFallback &= ~bit;
-+      rmesa->radeon.TclFallback &= ~bit;
-       if (oldfallback == bit) {
- 	 if (R200_DEBUG & DEBUG_FALLBACKS) 
- 	    fprintf(stderr, "R200 end tcl fallback %s\n",
-diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
-index 5a4db33..19a6cad 100644
---- a/src/mesa/drivers/dri/r200/r200_tex.c
-+++ b/src/mesa/drivers/dri/r200/r200_tex.c
-@@ -43,8 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/teximage.h"
- #include "main/texobj.h"
- 
--#include "texmem.h"
--
-+#include "radeon_mipmap_tree.h"
- #include "r200_context.h"
- #include "r200_state.h"
- #include "r200_ioctl.h"
-@@ -63,10 +62,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  * \param twrap Wrap mode for the \a t texture coordinate
-  */
- 
--static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
-+static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
- {
-    GLboolean  is_clamp = GL_FALSE;
-    GLboolean  is_clamp_to_border = GL_FALSE;
-+   struct gl_texture_object *tObj = &t->base;
- 
-    t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D);
- 
-@@ -103,7 +103,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum
-       _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
-    }
- 
--   if (t->base.tObj->Target != GL_TEXTURE_1D) {
-+   if (tObj->Target != GL_TEXTURE_1D) {
-       switch ( twrap ) {
-       case GL_REPEAT:
-          t->pp_txfilter |= R200_CLAMP_T_WRAP;
-@@ -180,7 +180,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum
-    t->border_fallback = (is_clamp && is_clamp_to_border);
- }
- 
--static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
-+static void r200SetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max )
- {
-    t->pp_txfilter &= ~R200_MAX_ANISO_MASK;
- 
-@@ -205,10 +205,13 @@ static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
-  * \param magf Texture magnification mode
-  */
- 
--static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf )
-+static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
- {
-    GLuint anisotropy = (t->pp_txfilter & R200_MAX_ANISO_MASK);
- 
-+   /* Force revalidation to account for switches from/to mipmapping. */
-+   t->validated = GL_FALSE;
-+
-    t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK);
-    t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK;
- 
-@@ -267,693 +270,12 @@ static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf )
-    }
- }
- 
--static void r200SetTexBorderColor( r200TexObjPtr t, GLubyte c[4] )
--{
--   t->pp_border_color = r200PackColor( 4, c[0], c[1], c[2], c[3] );
--}
--
--
--/**
-- * Allocate space for and load the mesa images into the texture memory block.
-- * This will happen before drawing with a new texture, or drawing with a
-- * texture after it was swapped out or teximaged again.
-- */
--
--static r200TexObjPtr r200AllocTexObj( struct gl_texture_object *texObj )
--{
--   r200TexObjPtr t;
--
--   t = CALLOC_STRUCT( r200_tex_obj );
--   texObj->DriverData = t;
--   if ( t != NULL ) {
--      if ( R200_DEBUG & DEBUG_TEXTURE ) {
--	 fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, 
--		  (void *)t );
--      }
--
--      /* Initialize non-image-dependent parts of the state:
--       */
--      t->base.tObj = texObj;
--      t->border_fallback = GL_FALSE;
--
--      make_empty_list( & t->base );
--
--      r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR );
--      r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
--      r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
--      r200SetTexBorderColor( t, texObj->_BorderChan );
--   }
--
--   return t;
--}
--
--/* try to find a format which will only need a memcopy */
--static const struct gl_texture_format *
--r200Choose8888TexFormat( GLenum srcFormat, GLenum srcType )
--{
--   const GLuint ui = 1;
--   const GLubyte littleEndian = *((const GLubyte *) &ui);
--
--   if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
--       (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
--       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
--       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
--      return &_mesa_texformat_rgba8888;
--   }
--   else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
--       (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
--       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
--       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
--      return &_mesa_texformat_rgba8888_rev;
--   }
--   else return _dri_texformat_argb8888;
--}
--
--static const struct gl_texture_format *
--r200ChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
--                           GLenum format, GLenum type )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   const GLboolean do32bpt =
--       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
--   const GLboolean force16bpt =
--       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
--   (void) format;
--
--   switch ( internalFormat ) {
--   case 4:
--   case GL_RGBA:
--   case GL_COMPRESSED_RGBA:
--      switch ( type ) {
--      case GL_UNSIGNED_INT_10_10_10_2:
--      case GL_UNSIGNED_INT_2_10_10_10_REV:
--	 return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555;
--      case GL_UNSIGNED_SHORT_4_4_4_4:
--      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--	 return _dri_texformat_argb4444;
--      case GL_UNSIGNED_SHORT_5_5_5_1:
--      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--	 return _dri_texformat_argb1555;
--      default:
--         return do32bpt ?
--	    r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444;
--      }
--
--   case 3:
--   case GL_RGB:
--   case GL_COMPRESSED_RGB:
--      switch ( type ) {
--      case GL_UNSIGNED_SHORT_4_4_4_4:
--      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--	 return _dri_texformat_argb4444;
--      case GL_UNSIGNED_SHORT_5_5_5_1:
--      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--	 return _dri_texformat_argb1555;
--      case GL_UNSIGNED_SHORT_5_6_5:
--      case GL_UNSIGNED_SHORT_5_6_5_REV:
--	 return _dri_texformat_rgb565;
--      default:
--         return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
--      }
--
--   case GL_RGBA8:
--   case GL_RGB10_A2:
--   case GL_RGBA12:
--   case GL_RGBA16:
--      return !force16bpt ?
--	  r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444;
--
--   case GL_RGBA4:
--   case GL_RGBA2:
--      return _dri_texformat_argb4444;
--
--   case GL_RGB5_A1:
--      return _dri_texformat_argb1555;
--
--   case GL_RGB8:
--   case GL_RGB10:
--   case GL_RGB12:
--   case GL_RGB16:
--      return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
--
--   case GL_RGB5:
--   case GL_RGB4:
--   case GL_R3_G3_B2:
--      return _dri_texformat_rgb565;
--
--   case GL_ALPHA:
--   case GL_ALPHA4:
--   case GL_ALPHA8:
--   case GL_ALPHA12:
--   case GL_ALPHA16:
--   case GL_COMPRESSED_ALPHA:
--   /* can't use a8 format since interpreting hw I8 as a8 would result
--      in wrong rgb values (same as alpha value instead of 0). */
--      return _dri_texformat_al88;
--
--   case 1:
--   case GL_LUMINANCE:
--   case GL_LUMINANCE4:
--   case GL_LUMINANCE8:
--   case GL_LUMINANCE12:
--   case GL_LUMINANCE16:
--   case GL_COMPRESSED_LUMINANCE:
--      return _dri_texformat_l8;
--
--   case 2:
--   case GL_LUMINANCE_ALPHA:
--   case GL_LUMINANCE4_ALPHA4:
--   case GL_LUMINANCE6_ALPHA2:
--   case GL_LUMINANCE8_ALPHA8:
--   case GL_LUMINANCE12_ALPHA4:
--   case GL_LUMINANCE12_ALPHA12:
--   case GL_LUMINANCE16_ALPHA16:
--   case GL_COMPRESSED_LUMINANCE_ALPHA:
--      return _dri_texformat_al88;
--
--   case GL_INTENSITY:
--   case GL_INTENSITY4:
--   case GL_INTENSITY8:
--   case GL_INTENSITY12:
--   case GL_INTENSITY16:
--   case GL_COMPRESSED_INTENSITY:
--       return _dri_texformat_i8;
--
--   case GL_YCBCR_MESA:
--      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
--          type == GL_UNSIGNED_BYTE)
--         return &_mesa_texformat_ycbcr;
--      else
--         return &_mesa_texformat_ycbcr_rev;
--
--   case GL_RGB_S3TC:
--   case GL_RGB4_S3TC:
--   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
--      return &_mesa_texformat_rgb_dxt1;
--
--   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
--      return &_mesa_texformat_rgba_dxt1;
--
--   case GL_RGBA_S3TC:
--   case GL_RGBA4_S3TC:
--   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
--      return &_mesa_texformat_rgba_dxt3;
--
--   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
--      return &_mesa_texformat_rgba_dxt5;
--
--   default:
--      _mesa_problem(ctx,
--         "unexpected internalFormat 0x%x in r200ChooseTextureFormat",
--         (int) internalFormat);
--      return NULL;
--   }
--
--   return NULL; /* never get here */
--}
--
--
--static GLboolean
--r200ValidateClientStorage( GLcontext *ctx, GLenum target,
--			   GLint internalFormat,
--			   GLint srcWidth, GLint srcHeight, 
--                           GLenum format, GLenum type,  const void *pixels,
--			   const struct gl_pixelstore_attrib *packing,
--			   struct gl_texture_object *texObj,
--			   struct gl_texture_image *texImage)
--
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if ( R200_DEBUG & DEBUG_TEXTURE )
--      fprintf(stderr, "intformat %s format %s type %s\n",
--	      _mesa_lookup_enum_by_nr( internalFormat ),
--	      _mesa_lookup_enum_by_nr( format ),
--	      _mesa_lookup_enum_by_nr( type ));
--
--   if (!ctx->Unpack.ClientStorage)
--      return 0;
--
--   if (ctx->_ImageTransferState ||
--       texImage->IsCompressed ||
--       texObj->GenerateMipmap)
--      return 0;
--
--
--   /* This list is incomplete, may be different on ppc???
--    */
--   switch ( internalFormat ) {
--   case GL_RGBA:
--      if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
--	 texImage->TexFormat = _dri_texformat_argb8888;
--      }
--      else
--	 return 0;
--      break;
--
--   case GL_RGB:
--      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
--	 texImage->TexFormat = _dri_texformat_rgb565;
--      }
--      else
--	 return 0;
--      break;
--
--   case GL_YCBCR_MESA:
--      if ( format == GL_YCBCR_MESA && 
--	   type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) {
--	 texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
--      }
--      else if ( format == GL_YCBCR_MESA && 
--		(type == GL_UNSIGNED_SHORT_8_8_APPLE || 
--		 type == GL_UNSIGNED_BYTE)) {
--	 texImage->TexFormat = &_mesa_texformat_ycbcr;
--      }
--      else
--	 return 0;
--      break;
--
--   default:
--      return 0;
--   }
--
--   /* Could deal with these packing issues, but currently don't:
--    */
--   if (packing->SkipPixels || 
--       packing->SkipRows || 
--       packing->SwapBytes ||
--       packing->LsbFirst) {
--      return 0;
--   }
--
--   {      
--      GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
--						  format, type);
--
--      
--      if ( R200_DEBUG & DEBUG_TEXTURE )
--	 fprintf(stderr, "%s: srcRowStride %d/%x\n", 
--		 __FUNCTION__, srcRowStride, srcRowStride);
--
--      /* Could check this later in upload, pitch restrictions could be
--       * relaxed, but would need to store the image pitch somewhere,
--       * as packing details might change before image is uploaded:
--       */
--      if (!r200IsGartMemory( rmesa, pixels, srcHeight * srcRowStride ) ||
--	  (srcRowStride & 63))
--	 return 0;
--
--
--      /* Have validated that _mesa_transfer_teximage would be a straight
--       * memcpy at this point.  NOTE: future calls to TexSubImage will
--       * overwrite the client data.  This is explicitly mentioned in the
--       * extension spec.
--       */
--      texImage->Data = (void *)pixels;
--      texImage->IsClientData = GL_TRUE;
--      texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
--
--      return 1;
--   }
--}
--
--
--static void r200TexImage1D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint border,
--                              GLenum format, GLenum type, const GLvoid *pixels,
--                              const struct gl_pixelstore_attrib *packing,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
--         return;
--      }
--   }
--
--   /* Note, this will call ChooseTextureFormat */
--   _mesa_store_teximage1d(ctx, target, level, internalFormat,
--                          width, border, format, type, pixels,
--                          &ctx->Unpack, texObj, texImage);
--
--   t->dirty_images[0] |= (1 << level);
--}
--
--
--static void r200TexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset,
--                                 GLsizei width,
--                                 GLenum format, GLenum type,
--                                 const GLvoid *pixels,
--                                 const struct gl_pixelstore_attrib *packing,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
--         return;
--      }
--   }
--
--   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
--			     format, type, pixels, packing, texObj,
--			     texImage);
--
--   t->dirty_images[0] |= (1 << level);
--}
--
--
--static void r200TexImage2D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint height, GLint border,
--                              GLenum format, GLenum type, const GLvoid *pixels,
--                              const struct gl_pixelstore_attrib *packing,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   if ( t != NULL ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
--         return;
--      }
--   }
--
--   texImage->IsClientData = GL_FALSE;
--
--   if (r200ValidateClientStorage( ctx, target, 
--				  internalFormat, 
--				  width, height, 
--				  format, type, pixels, 
--				  packing, texObj, texImage)) {
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); 
--   }
--   else {
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); 
--
--      /* Normal path: copy (to cached memory) and eventually upload
--       * via another copy to GART memory and then a blit...  Could
--       * eliminate one copy by going straight to (permanent) GART.
--       *
--       * Note, this will call r200ChooseTextureFormat.
--       */
--      _mesa_store_teximage2d(ctx, target, level, internalFormat,
--			     width, height, border, format, type, pixels,
--			     &ctx->Unpack, texObj, texImage);
--      
--      t->dirty_images[face] |= (1 << level);
--   }
--}
--
--
--static void r200TexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset, GLint yoffset,
--                                 GLsizei width, GLsizei height,
--                                 GLenum format, GLenum type,
--                                 const GLvoid *pixels,
--                                 const struct gl_pixelstore_attrib *packing,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
--         return;
--      }
--   }
--
--   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
--			     height, format, type, pixels, packing, texObj,
--			     texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
--
--static void r200CompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint height, GLint border,
--                              GLsizei imageSize, const GLvoid *data,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
-+static void r200SetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] )
- {
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   if ( t != NULL ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
--         return;
--      }
--   }
--
--   texImage->IsClientData = GL_FALSE;
--/* can't call this, different parameters. Would never evaluate to true anyway currently
--   if (r200ValidateClientStorage( ctx, target, 
--				  internalFormat,
--				  width, height,
--				  format, type, pixels,
--				  packing, texObj, texImage)) {
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__);
--   }
--   else */{
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__);
--
--      /* Normal path: copy (to cached memory) and eventually upload
--       * via another copy to GART memory and then a blit...  Could
--       * eliminate one copy by going straight to (permanent) GART.
--       *
--       * Note, this will call r200ChooseTextureFormat.
--       */
--      _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
--                                 height, border, imageSize, data, texObj, texImage);
--
--      t->dirty_images[face] |= (1 << level);
--   }
--}
--
--
--static void r200CompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset, GLint yoffset,
--                                 GLsizei width, GLsizei height,
--                                 GLenum format,
--                                 GLsizei imageSize, const GLvoid *data,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D");
--         return;
--      }
--   }
--
--   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
--                            height, format, imageSize, data, texObj, texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
--
--#if ENABLE_HW_3D_TEXTURE
--static void r200TexImage3D( GLcontext *ctx, GLenum target, GLint level,
--                            GLint internalFormat,
--                            GLint width, GLint height, GLint depth,
--                            GLint border,
--                            GLenum format, GLenum type, const GLvoid *pixels,
--                            const struct gl_pixelstore_attrib *packing,
--                            struct gl_texture_object *texObj,
--                            struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
--         return;
--      }
--   }
--
--   texImage->IsClientData = GL_FALSE;
--
--#if 0
--   if (r200ValidateClientStorage( ctx, target, 
--				  internalFormat, 
--				  width, height, 
--				  format, type, pixels, 
--				  packing, texObj, texImage)) {
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); 
--   }
--   else
--#endif
--   {
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); 
--
--      /* Normal path: copy (to cached memory) and eventually upload
--       * via another copy to GART memory and then a blit...  Could
--       * eliminate one copy by going straight to (permanent) GART.
--       *
--       * Note, this will call r200ChooseTextureFormat.
--       */
--      _mesa_store_teximage3d(ctx, target, level, internalFormat,
--			     width, height, depth, border,
--                             format, type, pixels,
--			     &ctx->Unpack, texObj, texImage);
--      
--      t->dirty_images[0] |= (1 << level);
--   }
-+   t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
- }
--#endif
--
- 
--#if ENABLE_HW_3D_TEXTURE
--static void
--r200TexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
--                   GLint xoffset, GLint yoffset, GLint zoffset,
--                   GLsizei width, GLsizei height, GLsizei depth,
--                   GLenum format, GLenum type,
--                   const GLvoid *pixels,
--                   const struct gl_pixelstore_attrib *packing,
--                   struct gl_texture_object *texObj,
--                   struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--/*     fprintf(stderr, "%s\n", __FUNCTION__); */
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) r200AllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
--         return;
--      }
--      texObj->DriverData = t;
--   }
- 
--   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
--                             width, height, depth,
--                             format, type, pixels, packing, texObj, texImage);
--
--   t->dirty_images[0] |= (1 << level);
--}
--#endif
- 
- 
- 
-@@ -978,7 +300,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target,
-       GLubyte c[4];
-       GLuint envColor;
-       UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
--      envColor = r200PackColor( 4, c[0], c[1], c[2], c[3] );
-+      envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
-       if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) {
- 	 R200_STATECHANGE( rmesa, tf );
- 	 rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor;
-@@ -997,7 +319,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target,
-        * NOTE: Add a small bias to the bias for conform mipsel.c test.
-        */
-       bias = *param + .01;
--      min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ?
-+      min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
- 	  0.0 : -16.0;
-       bias = CLAMP( bias, min, 16.0 );
-       b = (int)(bias * fixed_one) & R200_LOD_BIAS_MASK;
-@@ -1034,7 +356,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target,
- 				struct gl_texture_object *texObj,
- 				GLenum pname, const GLfloat *params )
- {
--   r200TexObjPtr t = (r200TexObjPtr) texObj->DriverData;
-+   radeonTexObj* t = radeon_tex_obj(texObj);
- 
-    if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
-       fprintf( stderr, "%s( %s )\n", __FUNCTION__,
-@@ -1068,59 +390,46 @@ static void r200TexParameter( GLcontext *ctx, GLenum target,
-        * we just have to rely on loading the right subset of mipmap levels
-        * to simulate a clamped LOD.
-        */
--      driSwapOutTextureObject( (driTextureObject *) t );
-+      if (t->mt) {
-+         radeon_miptree_unreference(t->mt);
-+	 t->mt = 0;
-+	 t->validated = GL_FALSE;
-+      }
-       break;
- 
-    default:
-       return;
-    }
--
--   /* Mark this texobj as dirty (one bit per tex unit)
--    */
--   t->dirty_state = TEX_ALL;
- }
- 
- 
--
--static void r200BindTexture( GLcontext *ctx, GLenum target,
--			       struct gl_texture_object *texObj )
--{
--   if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
--      fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj,
--	       ctx->Texture.CurrentUnit );
--   }
--
--   if ( (target == GL_TEXTURE_1D)
--	|| (target == GL_TEXTURE_2D) 
--#if ENABLE_HW_3D_TEXTURE
--	|| (target == GL_TEXTURE_3D)
--#endif
--	|| (target == GL_TEXTURE_CUBE_MAP)
--	|| (target == GL_TEXTURE_RECTANGLE_NV) ) {
--      assert( texObj->DriverData != NULL );
--   }
--}
--
--
--static void r200DeleteTexture( GLcontext *ctx,
--				 struct gl_texture_object *texObj )
-+static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
--      fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
--	       _mesa_lookup_enum_by_nr( texObj->Target ) );
-+   radeonTexObj* t = radeon_tex_obj(texObj);
-+
-+   if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
-+      fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-+	      (void *)texObj,
-+	      _mesa_lookup_enum_by_nr(texObj->Target));
-+   }
-+   
-+   if (rmesa) {
-+      int i;
-+      radeon_firevertices(&rmesa->radeon);
-+      for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
-+	 if ( t == rmesa->state.texture.unit[i].texobj ) {
-+	    rmesa->state.texture.unit[i].texobj = NULL;
-+	    rmesa->hw.tex[i].dirty = GL_FALSE;
-+	    rmesa->hw.cube[i].dirty = GL_FALSE;
-+	 }
-+      }      
-    }
--
--   if ( t != NULL ) {
--      if ( rmesa ) {
--         R200_FIREVERTICES( rmesa );
--      }
--
--      driDestroyTextureObject( t );
-+   
-+   if (t->mt) {
-+      radeon_miptree_unreference(t->mt);
-+      t->mt = 0;
-    }
--   /* Free mipmap images and the texture object itself */
-    _mesa_delete_texture_object(ctx, texObj);
- }
- 
-@@ -1150,46 +459,59 @@ static void r200TexGen( GLcontext *ctx,
-  * Called via ctx->Driver.NewTextureObject.
-  * Note: this function will be called during context creation to
-  * allocate the default texture objects.
-- * Note: we could use containment here to 'derive' the driver-specific
-- * texture object from the core mesa gl_texture_object.  Not done at this time.
-  * Fixup MaxAnisotropy according to user preference.
-  */
--static struct gl_texture_object *
--r200NewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
-+static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx,
-+						      GLuint name,
-+						      GLenum target)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_object *obj;
--   obj = _mesa_new_texture_object(ctx, name, target);
--   if (!obj)
--      return NULL;
--   obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
--   r200AllocTexObj( obj );
--   return obj;
-+   radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
-+
-+
-+   if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
-+     fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-+	     t, _mesa_lookup_enum_by_nr(target));
-+   }
-+
-+   _mesa_initialize_texture_object(&t->base, name, target);
-+   t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
-+
-+   /* Initialize hardware state */
-+   r200SetTexWrap( t, t->base.WrapS, t->base.WrapT, t->base.WrapR );
-+   r200SetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
-+   r200SetTexFilter(t, t->base.MinFilter, t->base.MagFilter);
-+   r200SetTexBorderColor(t, t->base._BorderChan);
-+
-+   return &t->base;
- }
- 
- 
-+
- void r200InitTextureFuncs( struct dd_function_table *functions )
- {
-    /* Note: we only plug in the functions we implement in the driver
-     * since _mesa_init_driver_functions() was already called.
-     */
--   functions->ChooseTextureFormat	= r200ChooseTextureFormat;
--   functions->TexImage1D		= r200TexImage1D;
--   functions->TexImage2D		= r200TexImage2D;
-+   functions->ChooseTextureFormat	= radeonChooseTextureFormat;
-+   functions->TexImage1D		= radeonTexImage1D;
-+   functions->TexImage2D		= radeonTexImage2D;
- #if ENABLE_HW_3D_TEXTURE
--   functions->TexImage3D		= r200TexImage3D;
-+   functions->TexImage3D		= radeonTexImage3D;
- #else
-    functions->TexImage3D		= _mesa_store_teximage3d;
- #endif
--   functions->TexSubImage1D		= r200TexSubImage1D;
--   functions->TexSubImage2D		= r200TexSubImage2D;
-+   functions->TexSubImage1D		= radeonTexSubImage1D;
-+   functions->TexSubImage2D		= radeonTexSubImage2D;
- #if ENABLE_HW_3D_TEXTURE
--   functions->TexSubImage3D		= r200TexSubImage3D;
-+   functions->TexSubImage3D		= radeonTexSubImage3D;
- #else
-    functions->TexSubImage3D		= _mesa_store_texsubimage3d;
- #endif
-+   functions->GetTexImage               = radeonGetTexImage;
-+   functions->GetCompressedTexImage     = radeonGetCompressedTexImage;
-    functions->NewTextureObject		= r200NewTextureObject;
--   functions->BindTexture		= r200BindTexture;
-+   //   functions->BindTexture		= r200BindTexture;
-    functions->DeleteTexture		= r200DeleteTexture;
-    functions->IsTextureResident		= driIsTextureResident;
- 
-@@ -1197,22 +519,16 @@ void r200InitTextureFuncs( struct dd_function_table *functions )
-    functions->TexParameter		= r200TexParameter;
-    functions->TexGen			= r200TexGen;
- 
--   functions->CompressedTexImage2D	= r200CompressedTexImage2D;
--   functions->CompressedTexSubImage2D	= r200CompressedTexSubImage2D;
-+   functions->CompressedTexImage2D	= radeonCompressedTexImage2D;
-+   functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
-+
-+   functions->GenerateMipmap = radeonGenerateMipmap;
-+
-+   functions->NewTextureImage = radeonNewTextureImage;
-+   functions->FreeTexImageData = radeonFreeTexImageData;
-+   functions->MapTexture = radeonMapTexture;
-+   functions->UnmapTexture = radeonUnmapTexture;
- 
-    driInitTextureFormats();
- 
--#if 000
--   /* moved or obsolete code */
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   driInitTextureObjects( ctx, & rmesa->swapped,
--			  DRI_TEXMGR_DO_TEXTURE_1D
--			  | DRI_TEXMGR_DO_TEXTURE_2D );
--
--   /* Hack: r200NewTextureObject is not yet installed when the
--    * default textures are created. Therefore set MaxAnisotropy of the
--    * default 2D texture now. */
--   ctx->Shared->Default2D->MaxAnisotropy = driQueryOptionf (&rmesa->optionCache,
--							    "def_max_anisotropy");
--#endif
- }
-diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h
-index 10ff8e8..55592ed 100644
---- a/src/mesa/drivers/dri/r200/r200_tex.h
-+++ b/src/mesa/drivers/dri/r200/r200_tex.h
-@@ -41,9 +41,9 @@ extern void r200SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
- 
- extern void r200UpdateTextureState( GLcontext *ctx );
- 
--extern int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face );
-+extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint face );
- 
--extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t );
-+extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t );
- 
- extern void r200InitTextureFuncs( struct dd_function_table *functions );
- 
-diff --git a/src/mesa/drivers/dri/r200/r200_texmem.c b/src/mesa/drivers/dri/r200/r200_texmem.c
-deleted file mode 100644
-index 3b81ac0..0000000
---- a/src/mesa/drivers/dri/r200/r200_texmem.c
-+++ /dev/null
-@@ -1,530 +0,0 @@
--/**************************************************************************
--
--Copyright (C) Tungsten Graphics 2002.  All Rights Reserved.  
--The Weather Channel, Inc. funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86
--license. This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation on the rights to use, copy, modify, merge, publish,
--distribute, sub license, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
--NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
--SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
--IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
--IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
--SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Kevin E. Martin <martin@valinux.com>
-- *   Gareth Hughes <gareth@valinux.com>
-- *
-- */
-- 
--#include <errno.h>
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/context.h"
--#include "main/colormac.h"
--#include "main/macros.h"
--#include "r200_context.h"
--#include "r200_ioctl.h"
--#include "r200_tex.h"
--#include "radeon_reg.h"
--
--#include <unistd.h>  /* for usleep() */
--
--
--/**
-- * Destroy any device-dependent state associated with the texture.  This may
-- * include NULLing out hardware state that points to the texture.
-- */
--void
--r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t )
--{
--   if ( R200_DEBUG & DEBUG_TEXTURE ) {
--      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, 
--	       (void *)t, (void *)t->base.tObj );
--   }
--
--   if ( rmesa != NULL ) {
--      unsigned   i;
--
--
--      for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
--	 if ( t == rmesa->state.texture.unit[i].texobj ) {
--	    rmesa->state.texture.unit[i].texobj = NULL;
--	    rmesa->hw.tex[i].dirty = GL_FALSE;
--	    rmesa->hw.cube[i].dirty = GL_FALSE;
--	 }
--      }
--   }
--}
--
--
--/* ------------------------------------------------------------
-- * Texture image conversions
-- */
--
--
--static void r200UploadGARTClientSubImage( r200ContextPtr rmesa,
--					  r200TexObjPtr t, 
--					  struct gl_texture_image *texImage,
--					  GLint hwlevel,
--					  GLint x, GLint y, 
--					  GLint width, GLint height )
--{
--   const struct gl_texture_format *texFormat = texImage->TexFormat;
--   GLuint srcPitch, dstPitch;
--   int blit_format;
--   int srcOffset;
--
--   /*
--    * XXX it appears that we always upload the full image, not a subimage.
--    * I.e. x==0, y==0, width=texWidth, height=texWidth.  If this is ever
--    * changed, the src pitch will have to change.
--    */
--   switch ( texFormat->TexelBytes ) {
--   case 1:
--      blit_format = R200_CP_COLOR_FORMAT_CI8;
--      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--      break;
--   case 2:
--      blit_format = R200_CP_COLOR_FORMAT_RGB565;
--      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--      break;
--   case 4:
--      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
--      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--      break;
--   default:
--      return;
--   }
--
--   t->image[0][hwlevel].data = texImage->Data;
--   srcOffset = r200GartOffsetFromVirtual( rmesa, texImage->Data );
--
--   assert( srcOffset != ~0 );
--
--   /* Don't currently need to cope with small pitches?
--    */
--   width = texImage->Width;
--   height = texImage->Height;
--
--   r200EmitWait( rmesa, RADEON_WAIT_3D );
--
--   r200EmitBlit( rmesa, blit_format, 
--		 srcPitch,  
--		 srcOffset,   
--		 dstPitch,
--		 t->bufAddr,
--		 x, 
--		 y, 
--		 t->image[0][hwlevel].x + x,
--		 t->image[0][hwlevel].y + y, 
--		 width,
--		 height );
--
--   r200EmitWait( rmesa, RADEON_WAIT_2D );
--}
--
--static void r200UploadRectSubImage( r200ContextPtr rmesa,
--				    r200TexObjPtr t, 
--				    struct gl_texture_image *texImage,
--				    GLint x, GLint y, 
--				    GLint width, GLint height )
--{
--   const struct gl_texture_format *texFormat = texImage->TexFormat;
--   int blit_format, dstPitch, done;
--
--   switch ( texFormat->TexelBytes ) {
--   case 1:
--      blit_format = R200_CP_COLOR_FORMAT_CI8;
--      break;
--   case 2:
--      blit_format = R200_CP_COLOR_FORMAT_RGB565;
--      break;
--   case 4:
--      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
--      break;
--   default:
--      return;
--   }
--
--   t->image[0][0].data = texImage->Data;
--
--   /* Currently don't need to cope with small pitches.
--    */
--   width = texImage->Width;
--   height = texImage->Height;
--   dstPitch = t->pp_txpitch + 32;
--
--   if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
--      /* In this case, could also use GART texturing.  This is
--       * currently disabled, but has been tested & works.
--       */
--      if ( !t->image_override )
--         t->pp_txoffset = r200GartOffsetFromVirtual( rmesa, texImage->Data );
--      t->pp_txpitch = texImage->RowStride * texFormat->TexelBytes - 32;
--
--      if (R200_DEBUG & DEBUG_TEXTURE)
--	 fprintf(stderr, 
--		 "Using GART texturing for rectangular client texture\n");
--
--      /* Release FB memory allocated for this image:
--       */
--      /* FIXME This may not be correct as driSwapOutTextureObject sets
--       * FIXME dirty_images.  It may be fine, though.
--       */
--      if ( t->base.memBlock ) {
--	 driSwapOutTextureObject( (driTextureObject *) t );
--      }
--   }
--   else if (texImage->IsClientData) {
--      /* Data already in GART memory, with usable pitch.
--       */
--      GLuint srcPitch;
--      srcPitch = texImage->RowStride * texFormat->TexelBytes;
--      r200EmitBlit( rmesa, 
--		    blit_format, 
--		    srcPitch,
--		    r200GartOffsetFromVirtual( rmesa, texImage->Data ),   
--		    dstPitch, t->bufAddr,
--		    0, 0, 
--		    0, 0, 
--		    width, height );
--   }
--   else {
--      /* Data not in GART memory, or bad pitch.
--       */
--      for (done = 0; done < height ; ) {
--	 struct r200_dma_region region;
--	 int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
--	 int src_pitch;
--	 char *tex;
--
--         src_pitch = texImage->RowStride * texFormat->TexelBytes;
--
--	 tex = (char *)texImage->Data + done * src_pitch;
--
--	 memset(&region, 0, sizeof(region));
--	 r200AllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
--
--	 /* Copy texdata to dma:
--	  */
--	 if (0)
--	    fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
--		    __FUNCTION__, src_pitch, dstPitch);
--
--	 if (src_pitch == dstPitch) {
--	    memcpy( region.address + region.start, tex, lines * src_pitch );
--	 } 
--	 else {
--	    char *buf = region.address + region.start;
--	    int i;
--	    for (i = 0 ; i < lines ; i++) {
--	       memcpy( buf, tex, src_pitch );
--	       buf += dstPitch;
--	       tex += src_pitch;
--	    }
--	 }
--
--	 r200EmitWait( rmesa, RADEON_WAIT_3D );
--
--	 /* Blit to framebuffer
--	  */
--	 r200EmitBlit( rmesa,
--		       blit_format,
--		       dstPitch, GET_START( &region ),
--		       dstPitch | (t->tile_bits >> 16),
--		       t->bufAddr,
--		       0, 0,
--		       0, done,
--		       width, lines );
--	 
--	 r200EmitWait( rmesa, RADEON_WAIT_2D );
--
--	 r200ReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
--	 done += lines;
--      }
--   }
--}
--
--
--/**
-- * Upload the texture image associated with texture \a t at the specified
-- * level at the address relative to \a start.
-- */
--static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, 
--			    GLint hwlevel,
--			    GLint x, GLint y, GLint width, GLint height,
--			    GLuint face )
--{
--   struct gl_texture_image *texImage = NULL;
--   GLuint offset;
--   GLint imageWidth, imageHeight;
--   GLint ret;
--   drm_radeon_texture_t tex;
--   drm_radeon_tex_image_t tmp;
--   const int level = hwlevel + t->base.firstLevel;
--
--   if ( R200_DEBUG & DEBUG_TEXTURE ) {
--      fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", 
--	       __FUNCTION__, (void *)t, (void *)t->base.tObj,
--	       level, width, height, face );
--   }
--
--   ASSERT(face < 6);
--
--   /* Ensure we have a valid texture to upload */
--   if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
--      _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
--      return;
--   }
--
--   texImage = t->base.tObj->Image[face][level];
--
--   if ( !texImage ) {
--      if ( R200_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
--      return;
--   }
--   if ( !texImage->Data ) {
--      if ( R200_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
--      return;
--   }
--
--
--   if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--      assert(level == 0);
--      assert(hwlevel == 0);
--      if ( R200_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
--      r200UploadRectSubImage( rmesa, t, texImage, x, y, width, height );
--      return;
--   }
--   else if (texImage->IsClientData) {
--      if ( R200_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: image data is in GART client storage\n",
--		  __FUNCTION__);
--      r200UploadGARTClientSubImage( rmesa, t, texImage, hwlevel,
--				   x, y, width, height );
--      return;
--   }
--   else if ( R200_DEBUG & DEBUG_TEXTURE )
--      fprintf( stderr, "%s: image data is in normal memory\n",
--	       __FUNCTION__);
--      
--
--   imageWidth = texImage->Width;
--   imageHeight = texImage->Height;
--
--   offset = t->bufAddr + t->base.totalSize / 6 * face;
--
--   if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
--      GLint imageX = 0;
--      GLint imageY = 0;
--      GLint blitX = t->image[face][hwlevel].x;
--      GLint blitY = t->image[face][hwlevel].y;
--      GLint blitWidth = t->image[face][hwlevel].width;
--      GLint blitHeight = t->image[face][hwlevel].height;
--      fprintf( stderr, "   upload image: %d,%d at %d,%d\n",
--	       imageWidth, imageHeight, imageX, imageY );
--      fprintf( stderr, "   upload  blit: %d,%d at %d,%d\n",
--	       blitWidth, blitHeight, blitX, blitY );
--      fprintf( stderr, "       blit ofs: 0x%07x level: %d/%d\n",
--	       (GLuint)offset, hwlevel, level );
--   }
--
--   t->image[face][hwlevel].data = texImage->Data;
--
--   /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
--    * NOTE: we're always use a 1KB-wide blit and I8 texture format.
--    * We used to use 1, 2 and 4-byte texels and used to use the texture
--    * width to dictate the blit width - but that won't work for compressed
--    * textures. (Brian)
--    * NOTE: can't do that with texture tiling. (sroland)
--    */
--   tex.offset = offset;
--   tex.image = &tmp;
--   /* copy (x,y,width,height,data) */
--   memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) );
--   
--   if (texImage->TexFormat->TexelBytes) {
--      /* use multi-byte upload scheme */
--      tex.height = imageHeight;
--      tex.width = imageWidth;
--      tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK;
--      if (tex.format == R200_TXFORMAT_ABGR8888) {
--	 /* drm will refuse abgr8888 textures. */
--	 tex.format = R200_TXFORMAT_ARGB8888;
--      }
--      tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
--      tex.offset += tmp.x & ~1023;
--      tmp.x = tmp.x % 1024;
--      if (t->tile_bits & R200_TXO_MICRO_TILE) {
--	 /* need something like "tiled coordinates" ? */
--	 tmp.y = tmp.x / (tex.pitch * 128) * 2;
--	 tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
--	 tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
--      }
--      else {
--	 tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
--      }
--      if ((t->tile_bits & R200_TXO_MACRO_TILE) &&
--	 (texImage->Width * texImage->TexFormat->TexelBytes >= 256) &&
--	 ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) ||
--	    (texImage->Height >= 16))) {
--	 /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
--	    OR if height is smaller than 8 automatically, but if micro tiling is active
--	    the limit is height 16 instead ? */
--	 tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
--      }
--   }
--   else {
--      /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
--         needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
--      /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
--         so the kernel module reads the right amount of data. */
--      tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */
--      tex.pitch = (BLIT_WIDTH_BYTES / 64);
--      tex.height = (imageHeight + 3) / 4;
--      tex.width = (imageWidth + 3) / 4;
--      switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) {
--      case R200_TXFORMAT_DXT1:
--           tex.width *= 8;
--           break;
--      case R200_TXFORMAT_DXT23:
--      case R200_TXFORMAT_DXT45:
--           tex.width *= 16;
--           break;
--      default:
--          fprintf(stderr, "unknown compressed tex format in uploadSubImage\n");
--      }
--   }
--
--   LOCK_HARDWARE( rmesa );
--   do {
--      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
--                                 &tex, sizeof(drm_radeon_texture_t) );
--      if (ret) {
--	 if (R200_DEBUG & DEBUG_IOCTL)
--	    fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
--	 usleep(1);
--      }
--   } while ( ret == -EAGAIN );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if ( ret ) {
--      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
--      fprintf( stderr, "   offset=0x%08x\n",
--	       offset );
--      fprintf( stderr, "   image width=%d height=%d\n",
--	       imageWidth, imageHeight );
--      fprintf( stderr, "    blit width=%d height=%d data=%p\n",
--	       t->image[face][hwlevel].width, t->image[face][hwlevel].height,
--	       t->image[face][hwlevel].data );
--      exit( 1 );
--   }
--}
--
--
--/**
-- * Upload the texture images associated with texture \a t.  This might
-- * require the allocation of texture memory.
-- * 
-- * \param rmesa Context pointer
-- * \param t Texture to be uploaded
-- * \param face Cube map face to be uploaded.  Zero for non-cube maps.
-- */
--
--int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face )
--{
--   const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--   if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
--      fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
--	       (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize,
--	       t->base.firstLevel, t->base.lastLevel );
--   }
--
--   if ( !t || t->base.totalSize == 0 || t->image_override )
--      return 0;
--
--   if (R200_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
--      r200Finish( rmesa->glCtx );
--   }
--
--   LOCK_HARDWARE( rmesa );
--
--   if ( t->base.memBlock == NULL ) {
--      int heap;
--
--      heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
--				 (driTextureObject *) t );
--      if ( heap == -1 ) {
--	 UNLOCK_HARDWARE( rmesa );
--	 return -1;
--      }
--
--      /* Set the base offset of the texture image */
--      t->bufAddr = rmesa->r200Screen->texOffset[heap] 
--	   + t->base.memBlock->ofs;
--      t->pp_txoffset = t->bufAddr;
--       
--      if (!(t->base.tObj->Image[0][0]->IsClientData)) {
--	 /* hope it's safe to add that here... */
--	 t->pp_txoffset |= t->tile_bits;
--      }
--
--      /* Mark this texobj as dirty on all units:
--       */
--      t->dirty_state = TEX_ALL;
--   }
--
--   /* Let the world know we've used this memory recently.
--    */
--   driUpdateTextureLRU( (driTextureObject *) t );
--   UNLOCK_HARDWARE( rmesa );
--
--   /* Upload any images that are new */
--   if (t->base.dirty_images[face]) {
--      int i;
--      for ( i = 0 ; i < numLevels ; i++ ) {
--         if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
--            uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
--			    t->image[face][i].height, face );
--         }
--      }
--      t->base.dirty_images[face] = 0;
--   }
--
--
--   if (R200_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
--      r200Finish( rmesa->glCtx );
--   }
--
--   return 0;
--}
-diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
-index 3f9a2f4..6432068 100644
---- a/src/mesa/drivers/dri/r200/r200_texstate.c
-+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
-@@ -40,6 +40,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/texobj.h"
- #include "main/enums.h"
- 
-+#include "radeon_common.h"
-+#include "radeon_mipmap_tree.h"
- #include "r200_context.h"
- #include "r200_state.h"
- #include "r200_ioctl.h"
-@@ -139,257 +141,6 @@ static const struct tx_table tx_table_le[] =
- #undef _ALPHA
- #undef _INVALID
- 
--/**
-- * This function computes the number of bytes of storage needed for
-- * the given texture object (all mipmap levels, all cube faces).
-- * The \c image[face][level].x/y/width/height parameters for upload/blitting
-- * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
-- * too.
-- * 
-- * \param rmesa Context pointer
-- * \param tObj GL texture object whose images are to be posted to
-- *                 hardware state.
-- */
--static void r200SetTexImages( r200ContextPtr rmesa,
--			      struct gl_texture_object *tObj )
--{
--   r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData;
--   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
--   GLint curOffset, blitWidth;
--   GLint i, texelBytes;
--   GLint numLevels;
--   GLint log2Width, log2Height, log2Depth;
--
--   /* Set the hardware texture format
--    */
--   if ( !t->image_override ) {
--      if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
--	 const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
--								tx_table_be;
--
--         t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
--                             R200_TXFORMAT_ALPHA_IN_MAP);
--         t->pp_txfilter &= ~R200_YUV_TO_RGB;
--
--	 t->pp_txformat |= table[ baseImage->TexFormat->MesaFormat ].format;
--	 t->pp_txfilter |= table[ baseImage->TexFormat->MesaFormat ].filter;
--      }
--      else {
--         _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
--         return;
--      }
--   }
--
--   texelBytes = baseImage->TexFormat->TexelBytes;
--
--   /* Compute which mipmap levels we really want to send to the hardware.
--    */
--
--   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
--   log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
--   log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
--   log2Depth  = tObj->Image[0][t->base.firstLevel]->DepthLog2;
--
--   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
--
--   /* Calculate mipmap offsets and dimensions for blitting (uploading)
--    * The idea is that we lay out the mipmap levels within a block of
--    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
--    */
--   curOffset = 0;
--   blitWidth = BLIT_WIDTH_BYTES;
--   t->tile_bits = 0;
--
--   /* figure out if this texture is suitable for tiling. */
--   if (texelBytes) {
--      if (rmesa->texmicrotile  && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
--      /* texrect might be able to use micro tiling too in theory? */
--	 (baseImage->Height > 1)) {
--	 /* allow 32 (bytes) x 1 mip (which will use two times the space
--	 the non-tiled version would use) max if base texture is large enough */
--	 if ((numLevels == 1) ||
--	   (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
--	       (baseImage->Width * texelBytes > 64)) ||
--	    ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
--	    t->tile_bits |= R200_TXO_MICRO_TILE;
--	 }
--      }
--      if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
--	 /* we can set macro tiling even for small textures, they will be untiled anyway */
--	 t->tile_bits |= R200_TXO_MACRO_TILE;
--      }
--   }
--
--   for (i = 0; i < numLevels; i++) {
--      const struct gl_texture_image *texImage;
--      GLuint size;
--
--      texImage = tObj->Image[0][i + t->base.firstLevel];
--      if ( !texImage )
--	 break;
--
--      /* find image size in bytes */
--      if (texImage->IsCompressed) {
--      /* need to calculate the size AFTER padding even though the texture is
--         submitted without padding.
--         Only handle pot textures currently - don't know if npot is even possible,
--         size calculation would certainly need (trivial) adjustments.
--         Align (and later pad) to 32byte, not sure what that 64byte blit width is
--         good for? */
--         if ((t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) == R200_TXFORMAT_DXT1) {
--            /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
--            if ((texImage->Width + 3) < 8) /* width one block */
--               size = texImage->CompressedSize * 4;
--            else if ((texImage->Width + 3) < 16)
--               size = texImage->CompressedSize * 2;
--            else size = texImage->CompressedSize;
--         }
--         else /* DXT3/5, 16 bytes per block */
--            if ((texImage->Width + 3) < 8)
--               size = texImage->CompressedSize * 2;
--            else size = texImage->CompressedSize;
--      }
--      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--	 size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
--      }
--      else if (t->tile_bits & R200_TXO_MICRO_TILE) {
--	 /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
--	    though the actual offset may be different (if texture is less than
--	    32 bytes width) to the untiled case */
--	 int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
--	 size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
--	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--      }
--      else {
--	 int w = (texImage->Width * texelBytes + 31) & ~31;
--	 size = w * texImage->Height * texImage->Depth;
--	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--      }
--      assert(size > 0);
--
--      /* Align to 32-byte offset.  It is faster to do this unconditionally
--       * (no branch penalty).
--       */
--
--      curOffset = (curOffset + 0x1f) & ~0x1f;
--
--      if (texelBytes) {
--	 t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
--	 t->image[0][i].y = 0;
--	 t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
--	 t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
--      }
--      else {
--         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
--         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
--         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
--         t->image[0][i].height = size / t->image[0][i].width;     
--      }
--
--#if 0
--      /* for debugging only and only  applicable to non-rectangle targets */
--      assert(size % t->image[0][i].width == 0);
--      assert(t->image[0][i].x == 0
--             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
--#endif
--
--      if (0)
--         fprintf(stderr,
--                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
--                 i, texImage->Width, texImage->Height,
--                 t->image[0][i].x, t->image[0][i].y,
--                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
--
--      curOffset += size;
--
--   }
--
--   /* Align the total size of texture memory block.
--    */
--   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
--
--   /* Setup remaining cube face blits, if needed */
--   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--      const GLuint faceSize = t->base.totalSize;
--      GLuint face;
--      /* reuse face 0 x/y/width/height - just update the offset when uploading */
--      for (face = 1; face < 6; face++) {
--         for (i = 0; i < numLevels; i++) {
--            t->image[face][i].x =  t->image[0][i].x;
--            t->image[face][i].y =  t->image[0][i].y;
--            t->image[face][i].width  = t->image[0][i].width;
--            t->image[face][i].height = t->image[0][i].height;
--         }
--      }
--      t->base.totalSize = 6 * faceSize; /* total texmem needed */
--   }
--
--
--   /* Hardware state:
--    */
--   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
--   t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT;
--
--   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
--		       R200_TXFORMAT_HEIGHT_MASK |
--                       R200_TXFORMAT_CUBIC_MAP_ENABLE |
--                       R200_TXFORMAT_F5_WIDTH_MASK |
--                       R200_TXFORMAT_F5_HEIGHT_MASK);
--   t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
--		      (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
--
--   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
--   if (tObj->Target == GL_TEXTURE_3D) {
--      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
--      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
--   }
--   else if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--      ASSERT(log2Width == log2Height);
--      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
--                         (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
--/* don't think we need this bit, if it exists at all - fglrx does not set it */
--                         (R200_TXFORMAT_CUBIC_MAP_ENABLE));
--      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
--      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
--                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
--                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
--                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
--                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
--                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
--                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
--                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
--   }
--   else {
--      /* If we don't in fact send enough texture coordinates, q will be 1,
--       * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
--       */
--      t->pp_txformat_x |= R200_TEXCOORD_PROJ;
--   }
--
--   t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
--                   ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
--
--   /* Only need to round to nearest 32 for textures, but the blitter
--    * requires 64-byte aligned pitches, and we may/may not need the
--    * blitter.   NPOT only!
--    */
--   if ( !t->image_override ) {
--      if (baseImage->IsCompressed)
--         t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
--      else
--         t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
--      t->pp_txpitch -= 32;
--   }
--
--   t->dirty_state = TEX_ALL;
--
--   /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */
--}
--
--
--
- /* ================================================================
-  * Texture combine functions
-  */
-@@ -981,20 +732,19 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
- {
- 	r200ContextPtr rmesa = pDRICtx->driverPrivate;
- 	struct gl_texture_object *tObj =
--	    _mesa_lookup_texture(rmesa->glCtx, texname);
--	r200TexObjPtr t;
-+	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
-+	radeonTexObjPtr t = radeon_tex_obj(tObj);
- 
- 	if (!tObj)
- 		return;
- 
--	t = (r200TexObjPtr) tObj->DriverData;
--
- 	t->image_override = GL_TRUE;
- 
- 	if (!offset)
- 		return;
- 
--	t->pp_txoffset = offset;
-+	t->bo = NULL;
-+	t->override_offset = offset;
- 	t->pp_txpitch = pitch - 32;
- 
- 	switch (depth) {
-@@ -1207,12 +957,41 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
-                                 R200_VOLUME_FILTER_MASK)
- 
- 
-+static void disable_tex_obj_state( r200ContextPtr rmesa, 
-+				   int unit )
-+{
-+   
-+   R200_STATECHANGE( rmesa, vtx );
-+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
-+
-+   if (rmesa->radeon.TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
-+      TCL_FALLBACK( rmesa->radeon.glCtx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
-+   }
-+
-+   /* Actually want to keep all units less than max active texture
-+    * enabled, right?  Fix this for >2 texunits.
-+    */
-+
-+   {
-+      GLuint tmp = rmesa->TexGenEnabled;
-+
-+      rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
-+      rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
-+      rmesa->TexGenNeedNormals[unit] = GL_FALSE;
-+      rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
-+
-+      if (tmp != rmesa->TexGenEnabled) {
-+	 rmesa->recheck_texgen[unit] = GL_TRUE;
-+	 rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
-+      }
-+   }
-+}
- static void import_tex_obj_state( r200ContextPtr rmesa,
- 				  int unit,
--				  r200TexObjPtr texobj )
-+				  radeonTexObjPtr texobj )
- {
- /* do not use RADEON_DB_STATE to avoid stale texture caches */
--   int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
-+   GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
- 
-    R200_STATECHANGE( rmesa, tex[unit] );
- 
-@@ -1225,36 +1004,21 @@ static void import_tex_obj_state( r200ContextPtr rmesa,
-    cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
-    cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
-    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
--   if (rmesa->r200Screen->drmSupportsFragShader) {
--      cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset;
--   }
--   else {
--      cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset;
--   }
- 
--   if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
--      int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
--      GLuint bytesPerFace = texobj->base.totalSize / 6;
--      ASSERT(texobj->base.totalSize % 6 == 0);
-+   if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
-+      GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
- 
-       R200_STATECHANGE( rmesa, cube[unit] );
-       cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
--      if (rmesa->r200Screen->drmSupportsFragShader) {
-+      if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
- 	 /* that value is submitted twice. could change cube atom
- 	    to not include that command when new drm is used */
- 	 cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
-       }
--      cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace;
--      cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace;
--      cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace;
--      cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace;
--      cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace;
-    }
- 
--   texobj->dirty_state &= ~(1<<unit);
- }
- 
--
- static void set_texgen_matrix( r200ContextPtr rmesa, 
- 			       GLuint unit,
- 			       const GLfloat *s_plane,
-@@ -1377,7 +1141,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
-    } else {
-       tgcm |= R200_TEXGEN_COMP_T << (unit * 4);
-    }
--
-    if (texUnit->TexGenEnabled & R_BIT) {
-       if (texUnit->GenModeR != mode)
- 	 mixed_fallback = GL_TRUE;
-@@ -1513,52 +1276,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
-    return GL_TRUE;
- }
- 
--
--static void disable_tex( GLcontext *ctx, int unit )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--
--   if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit)) {
--      /* Texture unit disabled */
--      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
--	 /* The old texture is no longer bound to this texture unit.
--	  * Mark it as such.
--	  */
--
--	 rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
--	 rmesa->state.texture.unit[unit].texobj = NULL;
--      }
--
--      R200_STATECHANGE( rmesa, ctx );
--      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit);
--	 
--      R200_STATECHANGE( rmesa, vtx );
--      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
--	 
--      if (rmesa->TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
--	 TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
--      }
--
--      /* Actually want to keep all units less than max active texture
--       * enabled, right?  Fix this for >2 texunits.
--       */
--
--      {
--	 GLuint tmp = rmesa->TexGenEnabled;
--
--	 rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
--	 rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
--	 rmesa->TexGenNeedNormals[unit] = GL_FALSE;
--	 rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
--
--	 if (tmp != rmesa->TexGenEnabled) {
--	    rmesa->recheck_texgen[unit] = GL_TRUE;
--	    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
--	 }
--      }
--   }
--}
--
- void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-@@ -1575,237 +1292,165 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
-    }
- }
- 
--static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
--
--   /* Need to load the 2d images associated with this unit.
--    */
--   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
--      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
--      t->base.dirty_images[0] = ~0;
--   }
--
--   ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
--
--   if ( t->base.dirty_images[0] ) {
--      R200_FIREVERTICES( rmesa );
--      r200SetTexImages( rmesa, tObj );
--      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
--      if ( !t->base.memBlock && !t->image_override ) 
--	 return GL_FALSE;
--   }
--
--   set_re_cntl_d3d( ctx, unit, GL_FALSE );
--
--   return GL_TRUE;
--}
--
--#if ENABLE_HW_3D_TEXTURE
--static GLboolean enable_tex_3d( GLcontext *ctx, int unit )
-+/**
-+ * Compute the cached hardware register values for the given texture object.
-+ *
-+ * \param rmesa Context pointer
-+ * \param t the r300 texture object
-+ */
-+static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
- {
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
--
--   /* Need to load the 3d images associated with this unit.
--    */
--   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
--      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
--      t->base.dirty_images[0] = ~0;
-+   const struct gl_texture_image *firstImage =
-+      t->base.Image[0][t->mt->firstLevel];
-+   GLint log2Width, log2Height, log2Depth, texelBytes;
-+   
-+   log2Width  = firstImage->WidthLog2;
-+   log2Height = firstImage->HeightLog2;
-+   log2Depth  = firstImage->DepthLog2;
-+   texelBytes = firstImage->TexFormat->TexelBytes;
-+
-+
-+   if (!t->image_override) {
-+      if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
-+	 const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
-+	    tx_table_be;
-+	 
-+	 t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
-+			     R200_TXFORMAT_ALPHA_IN_MAP);
-+	 t->pp_txfilter &= ~R200_YUV_TO_RGB;
-+	 
-+	 t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
-+	 t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
-+      } else {
-+	 _mesa_problem(NULL, "unexpected texture format in %s",
-+		       __FUNCTION__);
-+	 return;
-+      }
-    }
-+   
-+   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
-+   t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << R200_MAX_MIP_LEVEL_SHIFT;
-+	
-+   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
-+		       R200_TXFORMAT_HEIGHT_MASK |
-+		       R200_TXFORMAT_CUBIC_MAP_ENABLE |
-+		       R200_TXFORMAT_F5_WIDTH_MASK |
-+		       R200_TXFORMAT_F5_HEIGHT_MASK);
-+   t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
-+		      (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
-+   
-+   t->tile_bits = 0;
-+   
-+   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
-+   if (t->base.Target == GL_TEXTURE_3D) {
-+      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
-+      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
- 
--   ASSERT(tObj->Target == GL_TEXTURE_3D);
--
--   /* R100 & R200 do not support mipmaps for 3D textures.
--    */
--   if ( (tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR) ) {
--      return GL_FALSE;
-    }
--
--   if ( t->base.dirty_images[0] ) {
--      R200_FIREVERTICES( rmesa );
--      r200SetTexImages( rmesa, tObj );
--      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
--      if ( !t->base.memBlock ) 
--	 return GL_FALSE;
-+   else if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
-+      ASSERT(log2Width == log2Height);
-+      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
-+			 (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
-+			 /* don't think we need this bit, if it exists at all - fglrx does not set it */
-+			 (R200_TXFORMAT_CUBIC_MAP_ENABLE));
-+      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
-+      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
-+                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
-+                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
-+                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
-+                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
-+                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
-+                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
-+                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
-    }
--
--   set_re_cntl_d3d( ctx, unit, GL_TRUE );
--
--   return GL_TRUE;
--}
--#endif
--
--static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
--   GLuint face;
--
--   /* Need to load the 2d images associated with this unit.
--    */
--   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
--      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
--      for (face = 0; face < 6; face++)
--         t->base.dirty_images[face] = ~0;
-+   else {
-+      /* If we don't in fact send enough texture coordinates, q will be 1,
-+       * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
-+       */
-+      t->pp_txformat_x |= R200_TEXCOORD_PROJ;
-    }
- 
--   ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
--
--   if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
--        t->base.dirty_images[2] || t->base.dirty_images[3] ||
--        t->base.dirty_images[4] || t->base.dirty_images[5] ) {
--      /* flush */
--      R200_FIREVERTICES( rmesa );
--      /* layout memory space, once for all faces */
--      r200SetTexImages( rmesa, tObj );
--   }
-+   t->pp_txsize = (((firstImage->Width - 1) << R200_PP_TX_WIDTHMASK_SHIFT)
-+		   | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT));
- 
--   /* upload (per face) */
--   for (face = 0; face < 6; face++) {
--      if (t->base.dirty_images[face]) {
--         r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, face );
--      }
--   }
--      
--   if ( !t->base.memBlock ) {
--      /* texmem alloc failed, use s/w fallback */
--      return GL_FALSE;
-+   if ( !t->image_override ) {
-+      if (firstImage->IsCompressed)
-+         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
-+      else
-+         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
-+      t->pp_txpitch -= 32;
-    }
- 
--   set_re_cntl_d3d( ctx, unit, GL_TRUE );
--
--   return GL_TRUE;
--}
--
--static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
--{
--   r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
--
--   if (!(t->pp_txformat & R200_TXFORMAT_NON_POWER2)) {
-+   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
-       t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
--      t->base.dirty_images[0] = ~0;
-    }
- 
--   ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
--
--   if ( t->base.dirty_images[0] ) {
--      R200_FIREVERTICES( rmesa );
--      r200SetTexImages( rmesa, tObj );
--      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
--      if ( !t->base.memBlock &&
--           !t->image_override &&
--           !rmesa->prefer_gart_client_texturing ) 
--	 return GL_FALSE;
--   }
--
--   set_re_cntl_d3d( ctx, unit, GL_FALSE );
--
--   return GL_TRUE;
- }
- 
--
--static GLboolean update_tex_common( GLcontext *ctx, int unit )
-+static GLboolean r200_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
--
--   /* Fallback if there's a texture border */
--   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 )
--       return GL_FALSE;
--
--   /* Update state if this is a different texture object to last
--    * time.
--    */
--   if ( rmesa->state.texture.unit[unit].texobj != t ) {
--      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
--	 /* The old texture is no longer bound to this texture unit.
--	  * Mark it as such.
--	  */
--
--	 rmesa->state.texture.unit[unit].texobj->base.bound &= 
--	     ~(1UL << unit);
--      }
-+   radeonTexObj *t = radeon_tex_obj(texObj);
- 
--      rmesa->state.texture.unit[unit].texobj = t;
--      t->base.bound |= (1UL << unit);
--      t->dirty_state |= 1<<unit;
--      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
--   }
--
--
--   /* Newly enabled?
--    */
--   if ( 1|| !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit))) {
--      R200_STATECHANGE( rmesa, ctx );
--      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
-+   if (!radeon_validate_texture_miptree(ctx, texObj))
-+      return GL_FALSE;
- 
--      R200_STATECHANGE( rmesa, vtx );
--      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
--      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
-+   r200_validate_texgen(ctx, unit);
-+   /* Configure the hardware registers (more precisely, the cached version
-+    * of the hardware registers). */
-+   setup_hardware_state(rmesa, t);
-+
-+   if (texObj->Target == GL_TEXTURE_RECTANGLE_NV ||
-+       texObj->Target == GL_TEXTURE_2D ||
-+       texObj->Target == GL_TEXTURE_1D)
-+      set_re_cntl_d3d( ctx, unit, GL_FALSE );
-+   else
-+      set_re_cntl_d3d( ctx, unit, GL_TRUE );
-+   R200_STATECHANGE( rmesa, ctx );
-+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
-+   
-+   R200_STATECHANGE( rmesa, vtx );
-+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
-+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
- 
--      rmesa->recheck_texgen[unit] = GL_TRUE;
--   }
--
--   if (t->dirty_state & (1<<unit)) {
--      import_tex_obj_state( rmesa, unit, t );
--   }
-+   rmesa->recheck_texgen[unit] = GL_TRUE;
-+   import_tex_obj_state( rmesa, unit, t );
- 
-    if (rmesa->recheck_texgen[unit]) {
-       GLboolean fallback = !r200_validate_texgen( ctx, unit );
-       TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
-       rmesa->recheck_texgen[unit] = 0;
--      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
-    }
- 
--   FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback );
--   return !t->border_fallback;
--}
-+   t->validated = GL_TRUE;
- 
-+   FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
- 
-+   return !t->border_fallback;
-+}
- 
--static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit )
-+static GLboolean r200UpdateTextureUnit(GLcontext *ctx, int unit)
- {
-    r200ContextPtr rmesa = R200_CONTEXT(ctx);
-    GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;
- 
--   if ( unitneeded & (TEXTURE_RECT_BIT) ) {
--      return (enable_tex_rect( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--   else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
--      return (enable_tex_2d( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--#if ENABLE_HW_3D_TEXTURE
--   else if ( unitneeded & (TEXTURE_3D_BIT) ) {
--      return (enable_tex_3d( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--#endif
--   else if ( unitneeded & (TEXTURE_CUBE_BIT) ) {
--      return (enable_tex_cube( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--   else if ( unitneeded ) {
--      return GL_FALSE;
--   }
--   else {
--      disable_tex( ctx, unit );
--      return GL_TRUE;
-+   if (!unitneeded) {
-+      /* disable the unit */
-+     disable_tex_obj_state(rmesa, unit);
-+     return GL_TRUE;
-    }
-+
-+   if (!r200_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
-+    _mesa_warning(ctx,
-+		  "failed to validate texture for unit %d.\n",
-+		  unit);
-+    rmesa->state.texture.unit[unit].texobj = NULL;
-+    return GL_FALSE;
-+  }
-+
-+   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
-+  return GL_TRUE;
- }
- 
- 
-@@ -1846,11 +1491,11 @@ void r200UpdateTextureState( GLcontext *ctx )
- 
-    FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
- 
--   if (rmesa->TclFallback)
-+   if (rmesa->radeon.TclFallback)
-       r200ChooseVertexState( ctx );
- 
- 
--   if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
-+   if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
- 
-       /*
-        * T0 hang workaround -------------
-@@ -1863,7 +1508,7 @@ void r200UpdateTextureState( GLcontext *ctx )
- 	 R200_STATECHANGE(rmesa, tex[1]);
- 	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
- 	 if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
--	    rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
-+	   rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
- 	 rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
-       }
-       else if (!ctx->ATIFragmentShader._Enabled) {
-diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
-index 562992f..888f91d 100644
---- a/src/mesa/drivers/dri/r200/r200_vertprog.c
-+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
-@@ -1110,9 +1110,9 @@ void r200SetupVertexProg( GLcontext *ctx ) {
-    }
-    /* could optimize setting up vertex progs away for non-tcl hw */
-    fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
--      rmesa->r200Screen->drmSupportsVertexProgram);
-+      rmesa->radeon.radeonScreen->drmSupportsVertexProgram);
-    TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
--   if (rmesa->TclFallback) return;
-+   if (rmesa->radeon.TclFallback) return;
- 
-    R200_STATECHANGE( rmesa, vap );
-    /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
-diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
-index 6ca9342..497b1ec 100644
---- a/src/mesa/drivers/dri/r300/Makefile
-+++ b/src/mesa/drivers/dri/r300/Makefile
-@@ -3,6 +3,8 @@
- TOP = ../../../../..
- include $(TOP)/configs/current
- 
-+CFLAGS += $(RADEON_CFLAGS)
-+
- LIBNAME = r300_dri.so
- 
- MINIGLX_SOURCES = server/radeon_dri.c
-@@ -20,20 +22,24 @@ COMMON_SOURCES = \
- 	../common/xmlconfig.c \
- 	../common/dri_util.c
- 
-+RADEON_COMMON_SOURCES = \
-+	radeon_texture.c \
-+	radeon_common_context.c \
-+	radeon_common.c \
-+	radeon_dma.c \
-+	radeon_lock.c \
-+	radeon_bo_legacy.c \
-+	radeon_cs_legacy.c \
-+	radeon_mipmap_tree.c \
-+	radeon_span.c
-+
- DRIVER_SOURCES = \
- 		 radeon_screen.c \
--		 radeon_context.c \
--		 radeon_ioctl.c \
--		 radeon_lock.c \
--		 radeon_span.c \
--		 radeon_state.c \
--		 r300_mem.c \
- 		 r300_context.c \
- 		 r300_ioctl.c \
- 		 r300_cmdbuf.c \
- 		 r300_state.c \
- 		 r300_render.c \
--		 r300_texmem.c \
- 		 r300_tex.c \
- 		 r300_texstate.c \
- 		 radeon_program.c \
-@@ -49,12 +55,15 @@ DRIVER_SOURCES = \
- 		 r300_shader.c \
- 		 r300_emit.c \
- 		 r300_swtcl.c \
-+		 $(RADEON_COMMON_SOURCES) \
- 		 $(EGL_SOURCES)
- 
- C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
- 
- DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
--	-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300
-+	-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 \
-+#	-DRADEON_BO_TRACK \
-+	-Wall
- 
- SYMLINKS = \
- 	server/radeon_dri.c \
-@@ -68,7 +77,28 @@ COMMON_SYMLINKS = \
- 	radeon_chipset.h \
- 	radeon_screen.c \
- 	radeon_screen.h \
--	radeon_span.h
-+	radeon_span.h \
-+	radeon_span.c \
-+	radeon_bo_legacy.c \
-+	radeon_cs_legacy.c \
-+	radeon_bo_legacy.h \
-+	radeon_cs_legacy.h \
-+	radeon_bocs_wrapper.h \
-+	radeon_lock.c \
-+	radeon_lock.h \
-+	radeon_common.c \
-+	radeon_common.h \
-+	radeon_common_context.c \
-+	radeon_common_context.h \
-+	radeon_cmdbuf.h \
-+	radeon_dma.c \
-+	radeon_dma.h \
-+	radeon_mipmap_tree.c \
-+	radeon_mipmap_tree.h \
-+	radeon_texture.c \
-+	radeon_texture.h
-+
-+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
- 
- ##### TARGETS #####
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-index c9e1dfe..ac1c128 100644
---- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-@@ -44,235 +44,252 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "drm.h"
- #include "radeon_drm.h"
- 
--#include "radeon_ioctl.h"
- #include "r300_context.h"
- #include "r300_ioctl.h"
- #include "radeon_reg.h"
- #include "r300_reg.h"
- #include "r300_cmdbuf.h"
- #include "r300_emit.h"
-+#include "radeon_bocs_wrapper.h"
-+#include "radeon_mipmap_tree.h"
- #include "r300_state.h"
-+#include "radeon_reg.h"
- 
--// Set this to 1 for extremely verbose debugging of command buffers
--#define DEBUG_CMDBUF		0
-+#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200
-+#   define RADEON_ONE_REG_WR        (1 << 15)
- 
--/**
-- * Send the current command buffer via ioctl to the hardware.
-+/** # of dwords reserved for additional instructions that may need to be written
-+ * during flushing.
-  */
--int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
-+#define SPACE_FOR_FLUSHING	4
-+
-+static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt)
- {
--	int ret;
--	int i;
--	drm_radeon_cmd_buffer_t cmd;
--	int start;
--
--	if (r300->radeon.lost_context) {
--		start = 0;
--		r300->radeon.lost_context = GL_FALSE;
--	} else
--		start = r300->cmdbuf.count_reemit;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL) {
--		fprintf(stderr, "%s from %s - %i cliprects\n",
--			__FUNCTION__, caller, r300->radeon.numClipRects);
--
--		if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
--			for (i = start; i < r300->cmdbuf.count_used; ++i)
--				fprintf(stderr, "%d: %08x\n", i,
--					r300->cmdbuf.cmd_buf[i]);
--	}
-+    if (r300->radeon.radeonScreen->kernel_mm) {
-+        return ((((*pkt) >> 16) & 0x3FFF) + 1);
-+    } else {
-+        drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt;
-+        return t->packet0.count;
-+    }
-+    return 0;
-+}
- 
--	cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start);
--	cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
-+#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
-+#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
- 
--	if (r300->radeon.state.scissor.enabled) {
--		cmd.nbox = r300->radeon.state.scissor.numClipRects;
--		cmd.boxes =
--		    (drm_clip_rect_t *) r300->radeon.state.scissor.pClipRects;
--	} else {
--		cmd.nbox = r300->radeon.numClipRects;
--		cmd.boxes = (drm_clip_rect_t *) r300->radeon.pClipRects;
-+void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom)
-+{
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(&r300->radeon);
-+	drm_r300_cmd_header_t cmd;
-+	uint32_t addr, ndw, i;
-+	
-+	if (!r300->radeon.radeonScreen->kernel_mm) {
-+		uint32_t dwords;
-+		dwords = (*atom->check) (ctx, atom);
-+		BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+		OUT_BATCH_TABLE(atom->cmd, dwords);
-+		END_BATCH();
-+		return;
- 	}
--
--	ret = drmCommandWrite(r300->radeon.dri.fd,
--			      DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
--
--	if (RADEON_DEBUG & DEBUG_SYNC) {
--		fprintf(stderr, "Syncing in %s (from %s)\n\n",
--			__FUNCTION__, caller);
--		radeonWaitForIdleLocked(&r300->radeon);
-+	
-+	cmd.u = atom->cmd[0];
-+	addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
-+	ndw = cmd.vpu.count * 4;
-+	if (ndw) {
-+		BEGIN_BATCH_NO_AUTOSTATE(13 + ndw);
-+
-+		/* flush processing vertices */
-+		OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0);
-+		OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-+		OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
-+		OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff);
-+		OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 1);
-+		OUT_BATCH_REGVAL(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
-+		OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR);
-+		for (i = 0; i < ndw; i++) {
-+			OUT_BATCH(atom->cmd[i+1]);
-+		}
-+		OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-+		END_BATCH();
- 	}
--
--	r300->dma.nr_released_bufs = 0;
--	r300->cmdbuf.count_used = 0;
--	r300->cmdbuf.count_reemit = 0;
--
--	return ret;
- }
- 
--int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
-+void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom)
- {
--	int ret;
--
--	LOCK_HARDWARE(&r300->radeon);
--
--	ret = r300FlushCmdBufLocked(r300, caller);
--
--	UNLOCK_HARDWARE(&r300->radeon);
--
--	if (ret) {
--		fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret);
--		_mesa_exit(ret);
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(&r300->radeon);
-+	drm_r300_cmd_header_t cmd;
-+	uint32_t addr, ndw, i, sz;
-+	int type, clamp, stride;
-+
-+	if (!r300->radeon.radeonScreen->kernel_mm) {
-+		uint32_t dwords;
-+		dwords = (*atom->check) (ctx, atom);
-+		BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+		OUT_BATCH_TABLE(atom->cmd, dwords);
-+		END_BATCH();
-+		return;
- 	}
- 
--	return ret;
--}
-+	cmd.u = atom->cmd[0];
-+	sz = cmd.r500fp.count;
-+	addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo;
-+	type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
-+	clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
- 
--static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state)
--{
--	int i;
--	int dwords = (*state->check) (r300, state);
-+	addr |= (type << 16);
-+	addr |= (clamp << 17);
- 
--	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords,
--		state->cmd_size);
-+	stride = type ? 4 : 6;
- 
--	if (RADEON_DEBUG & DEBUG_VERBOSE) {
--		for (i = 0; i < dwords; i++) {
--			fprintf(stderr, "      %s[%d]: %08x\n",
--				state->name, i, state->cmd[i]);
-+	ndw = sz * stride;
-+	if (ndw) {
-+
-+		BEGIN_BATCH_NO_AUTOSTATE(3 + ndw);
-+		OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0));
-+		OUT_BATCH(addr);
-+		OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR);
-+		for (i = 0; i < ndw; i++) {
-+			OUT_BATCH(atom->cmd[i+1]);
- 		}
-+		END_BATCH();
- 	}
- }
- 
--/**
-- * Emit all atoms with a dirty field equal to dirty.
-- *
-- * The caller must have ensured that there is enough space in the command
-- * buffer.
-- */
--static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
-+static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
- {
--	struct r300_state_atom *atom;
--	uint32_t *dest;
--	int dwords;
--
--	dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
--
--	/* Emit WAIT */
--	*dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN);
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	/* Emit cache flush */
--	*dest = cmdpacket0(R300_TX_INVALTAGS, 1);
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	*dest = R300_TX_FLUSH;
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	/* Emit END3D */
--	*dest = cmdpacify();
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	/* Emit actual atoms */
--
--	foreach(atom, &r300->hw.atomlist) {
--		if ((atom->dirty || r300->hw.all_dirty) == dirty) {
--			dwords = (*atom->check) (r300, atom);
--			if (dwords) {
--				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
--					r300PrintStateAtom(r300, atom);
--				}
--				memcpy(dest, atom->cmd, dwords * 4);
--				dest += dwords;
--				r300->cmdbuf.count_used += dwords;
--				atom->dirty = GL_FALSE;
--			} else {
--				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
--					fprintf(stderr, "  skip state %s\n",
--						atom->name);
--				}
--			}
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(&r300->radeon);
-+	int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
-+
-+	if (numtmus) {
-+		int i;
-+
-+		for(i = 0; i < numtmus; ++i) {
-+		    radeonTexObj *t = r300->hw.textures[i];
-+		    if (t && !t->image_override) {
-+		            BEGIN_BATCH_NO_AUTOSTATE(4);
-+		            OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
-+			    OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
-+					    RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+		            END_BATCH();
-+		    } else if (!t) {
-+			    //assert(0);
-+		            BEGIN_BATCH_NO_AUTOSTATE(4);
-+		            OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
-+			    OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
-+			    END_BATCH();
-+		    } else {
-+			    if (t->bo) {
-+		            	    BEGIN_BATCH_NO_AUTOSTATE(4);
-+		                    OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
-+				    OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
-+						    RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+		                    END_BATCH();
-+			    } else if (!r300->radeon.radeonScreen->kernel_mm) {
-+		            	    BEGIN_BATCH_NO_AUTOSTATE(2);
-+		                    OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
-+				    OUT_BATCH(t->override_offset);
-+				    END_BATCH();
-+			    }
-+		    }
- 		}
- 	}
- }
- 
--/**
-- * Copy dirty hardware state atoms into the command buffer.
-- *
-- * We also copy out clean state if we're at the start of a buffer. That makes
-- * it easy to recover from lost contexts.
-- */
--void r300EmitState(r300ContextPtr r300)
-+static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
- {
--	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS))
--		fprintf(stderr, "%s\n", __FUNCTION__);
--
--	if (r300->cmdbuf.count_used && !r300->hw.is_dirty
--	    && !r300->hw.all_dirty)
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(&r300->radeon);
-+	struct radeon_renderbuffer *rrb;
-+	uint32_t cbpitch;
-+
-+	rrb = radeon_get_colorbuffer(&r300->radeon);
-+	if (!rrb || !rrb->bo) {
-+		fprintf(stderr, "no rrb\n");
- 		return;
--
--	/* To avoid going across the entire set of states multiple times, just check
--	 * for enough space for the case of emitting all state, and inline the
--	 * r300AllocCmdBuf code here without all the checks.
--	 */
--	r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__);
--
--	if (!r300->cmdbuf.count_used) {
--		if (RADEON_DEBUG & DEBUG_STATE)
--			fprintf(stderr, "Begin reemit state\n");
--
--		r300EmitAtoms(r300, GL_FALSE);
--		r300->cmdbuf.count_reemit = r300->cmdbuf.count_used;
- 	}
- 
--	if (RADEON_DEBUG & DEBUG_STATE)
--		fprintf(stderr, "Begin dirty state\n");
-+	cbpitch = (rrb->pitch / rrb->cpp);
-+	if (rrb->cpp == 4)
-+		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
-+	else
-+		cbpitch |= R300_COLOR_FORMAT_RGB565;
-+
-+	if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
-+		cbpitch |= R300_COLOR_TILE_ENABLE;
-+
-+	BEGIN_BATCH_NO_AUTOSTATE(6);
-+	OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
-+	OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+	OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
-+	OUT_BATCH(cbpitch);
-+	END_BATCH();
-+}
- 
--	r300EmitAtoms(r300, GL_TRUE);
-+static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
-+{
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
-+	BATCH_LOCALS(&r300->radeon);
-+	struct radeon_renderbuffer *rrb;
-+	uint32_t zbpitch;
- 
--	assert(r300->cmdbuf.count_used < r300->cmdbuf.size);
-+	rrb = radeon_get_depthbuffer(&r300->radeon);
-+	if (!rrb)
-+		return;
- 
--	r300->hw.is_dirty = GL_FALSE;
--	r300->hw.all_dirty = GL_FALSE;
-+	zbpitch = (rrb->pitch / rrb->cpp);
-+	if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+		zbpitch |= R300_DEPTHMACROTILE_ENABLE;
-+	}
-+	if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
-+		zbpitch |= R300_DEPTHMICROTILE_TILED;
-+	}
-+	
-+	BEGIN_BATCH_NO_AUTOSTATE(6);
-+	OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
-+	OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+	OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, zbpitch);
-+	END_BATCH();
- }
- 
--#define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count)
--#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
--#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
--
--static int check_always(r300ContextPtr r300, struct r300_state_atom *atom)
-+static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
- {
- 	return atom->cmd_size;
- }
- 
--static int check_variable(r300ContextPtr r300, struct r300_state_atom *atom)
-+static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom)
- {
-+	r300ContextPtr r300 = R300_CONTEXT(ctx);
- 	int cnt;
--	cnt = packet0_count(atom->cmd);
-+	if (atom->cmd[0] == CP_PACKET2) {
-+		return 0;
-+	}
-+	cnt = packet0_count(r300, atom->cmd);
- 	return cnt ? cnt + 1 : 0;
- }
- 
--static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom)
-+int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
- {
- 	int cnt;
-+
- 	cnt = vpu_count(atom->cmd);
- 	return cnt ? (cnt * 4) + 1 : 0;
- }
- 
--static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom)
-+int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom)
- {
- 	int cnt;
-+
- 	cnt = r500fp_count(atom->cmd);
- 	return cnt ? (cnt * 6) + 1 : 0;
- }
- 
--static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
-+int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom)
- {
- 	int cnt;
-+
- 	cnt = r500fp_count(atom->cmd);
- 	return cnt ? (cnt * 4) + 1 : 0;
- }
-@@ -285,8 +302,8 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
-       r300->hw.ATOM.idx = (IDX);					\
-       r300->hw.ATOM.check = check_##CHK;				\
-       r300->hw.ATOM.dirty = GL_FALSE;					\
--      r300->hw.max_state_size += (SZ);					\
--      insert_at_tail(&r300->hw.atomlist, &r300->hw.ATOM);		\
-+      r300->radeon.hw.max_state_size += (SZ);					\
-+      insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM);		\
-    } while (0)
- /**
-  * Allocate memory for the command buffer and initialize the state atom
-@@ -294,7 +311,7 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
-  */
- void r300InitCmdBuf(r300ContextPtr r300)
- {
--	int size, mtu;
-+	int mtu;
- 	int has_tcl = 1;
- 	int is_r500 = 0;
- 	int i;
-@@ -305,7 +322,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
- 		is_r500 = 1;
- 
--	r300->hw.max_state_size = 2 + 2;	/* reserve extra space for WAIT_IDLE and tex cache flush */
-+	r300->radeon.hw.max_state_size = 2 + 2;	/* reserve extra space for WAIT_IDLE and tex cache flush */
- 
- 	mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
- 	if (RADEON_DEBUG & DEBUG_TEXTURE) {
-@@ -313,97 +330,97 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 	}
- 
- 	/* Setup the atom linked list */
--	make_empty_list(&r300->hw.atomlist);
--	r300->hw.atomlist.name = "atom-list";
-+	make_empty_list(&r300->radeon.hw.atomlist);
-+	r300->radeon.hw.atomlist.name = "atom-list";
- 
- 	/* Initialize state atoms */
- 	ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0);
--	r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6);
-+	r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6);
- 	ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0);
--	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1);
-+	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
- 	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0;
--	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1);
-+	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1);
- 	if (is_r500) {
- 	    ALLOC_STATE(vap_index_offset, always, 2, 0);
--	    r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1);
-+	    r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1);
- 	    r300->hw.vap_index_offset.cmd[1] = 0;
- 	}
- 	ALLOC_STATE(vte, always, 3, 0);
--	r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2);
-+	r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2);
- 	ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0);
--	r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(R300_VAP_VF_MAX_VTX_INDX, 2);
-+	r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2);
- 	ALLOC_STATE(vap_cntl_status, always, 2, 0);
--	r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1);
-+	r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1);
- 	ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0);
- 	r300->hw.vir[0].cmd[R300_VIR_CMD_0] =
--	    cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1);
- 	ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1);
- 	r300->hw.vir[1].cmd[R300_VIR_CMD_0] =
--	    cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
- 	ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0);
--	r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2);
-+	r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2);
- 	ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0);
--	r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
-+	r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
- 
- 	if (has_tcl) {
- 		ALLOC_STATE(vap_clip_cntl, always, 2, 0);
--		r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1);
-+		r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1);
- 		ALLOC_STATE(vap_clip, always, 5, 0);
--		r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4);
-+		r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4);
- 		ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0);
--		r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1);
-+		r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1);
- 	}
- 
- 	ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0);
- 	r300->hw.vof.cmd[R300_VOF_CMD_0] =
--	    cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2);
- 
- 	if (has_tcl) {
- 		ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0);
- 		r300->hw.pvs.cmd[R300_PVS_CMD_0] =
--		    cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3);
-+		    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3);
- 	}
- 
- 	ALLOC_STATE(gb_enable, always, 2, 0);
--	r300->hw.gb_enable.cmd[0] = cmdpacket0(R300_GB_ENABLE, 1);
-+	r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1);
- 	ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
--	r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5);
-+	r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 5);
- 	ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
--	r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1);
-+	r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1);
- 	ALLOC_STATE(ga_point_s0, always, 5, 0);
--	r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4);
-+	r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4);
- 	ALLOC_STATE(ga_triangle_stipple, always, 2, 0);
--	r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1);
-+	r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1);
- 	ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0);
--	r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1);
-+	r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1);
- 	ALLOC_STATE(ga_point_minmax, always, 4, 0);
--	r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3);
-+	r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3);
- 	ALLOC_STATE(lcntl, always, 2, 0);
--	r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1);
-+	r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1);
- 	ALLOC_STATE(ga_line_stipple, always, 4, 0);
--	r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3);
-+	r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3);
- 	ALLOC_STATE(shade, always, 5, 0);
--	r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4);
-+	r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 4);
- 	ALLOC_STATE(polygon_mode, always, 4, 0);
--	r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3);
-+	r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3);
- 	ALLOC_STATE(fogp, always, 3, 0);
--	r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2);
-+	r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2);
- 	ALLOC_STATE(zbias_cntl, always, 2, 0);
--	r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1);
-+	r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1);
- 	ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0);
- 	r300->hw.zbs.cmd[R300_ZBS_CMD_0] =
--	    cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
- 	ALLOC_STATE(occlusion_cntl, always, 2, 0);
--	r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1);
-+	r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1);
- 	ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0);
--	r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1);
-+	r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1);
- 	ALLOC_STATE(su_depth_scale, always, 3, 0);
--	r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2);
-+	r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2);
- 	ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0);
--	r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2);
-+	r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2);
- 	if (is_r500) {
- 		ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0);
--		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16);
-+		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16);
- 		for (i = 0; i < 8; i++) {
- 			r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] =
- 			  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-@@ -412,133 +429,146 @@ void r300InitCmdBuf(r300ContextPtr r300)
-                           (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
- 		}
- 		ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
--		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1);
-+		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1);
- 	} else {
- 		ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0);
--		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8);
-+		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8);
- 		ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
--		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1);
-+		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1);
- 	}
- 	ALLOC_STATE(sc_hyperz, always, 3, 0);
--	r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2);
-+	r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2);
- 	ALLOC_STATE(sc_screendoor, always, 2, 0);
--	r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1);
-+	r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
- 	ALLOC_STATE(us_out_fmt, always, 6, 0);
--	r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5);
-+	r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5);
- 
- 	if (is_r500) {
- 		ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0);
--		r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2);
-+		r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2);
- 		r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO;
--		r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3);
--		r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1);
-+		r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3);
-+		r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1);
- 		r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */
- 
- 		ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0);
--		r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0);
-+		r300->hw.r500fp.cmd[R300_FPI_CMD_0] =
-+			cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0);
-+		r300->hw.r500fp.emit = emit_r500fp;
- 		ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0);
--		r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0);
-+		r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] =
-+			cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0);
-+		r300->hw.r500fp_const.emit = emit_r500fp;
- 	} else {
- 		ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
--		r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3);
--		r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4);
-+		r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3);
-+		r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4);
-+
- 		ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
--		r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0);
-+		r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0);
- 
- 		ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
--		r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1);
-+		r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1);
- 		ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
--		r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1);
-+		r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1);
- 		ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
--		r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1);
-+		r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1);
- 		ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
--		r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1);
-+		r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1);
- 		ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
--		r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0);
-+		r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0);
- 	}
- 	ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0);
--	r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1);
-+	r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1);
- 	ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0);
--	r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3);
-+	r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3);
- 	ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0);
--	r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2);
-+	r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2);
- 	ALLOC_STATE(fg_depth_src, always, 2, 0);
--	r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1);
-+	r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1);
- 	ALLOC_STATE(rb3d_cctl, always, 2, 0);
--	r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1);
-+	r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1);
- 	ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0);
--	r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2);
-+	r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2);
- 	ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0);
--	r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(RB3D_COLOR_CHANNEL_MASK, 1);
-+	r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1);
- 	if (is_r500) {
- 		ALLOC_STATE(blend_color, always, 3, 0);
--		r300->hw.blend_color.cmd[0] = cmdpacket0(R500_RB3D_CONSTANT_COLOR_AR, 2);
-+		r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2);
- 	} else {
- 		ALLOC_STATE(blend_color, always, 2, 0);
--		r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 1);
-+		r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1);
- 	}
- 	ALLOC_STATE(rop, always, 2, 0);
--	r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1);
-+	r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1);
- 	ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
--	r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
--	r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
-+	r300->hw.cb.emit = &emit_cb_offset;
- 	ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
--	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9);
-+	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
- 	ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
--	r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1);
-+	r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
- 	ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
--	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
-+	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
- 	ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
- 	r300->hw.zs.cmd[R300_ZS_CMD_0] =
--	    cmdpacket0(R300_ZB_CNTL, 3);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
- 	ALLOC_STATE(zstencil_format, always, 5, 0);
- 	r300->hw.zstencil_format.cmd[0] =
--	    cmdpacket0(R300_ZB_FORMAT, 4);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4);
- 	ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
--	r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2);
-+	r300->hw.zb.emit = emit_zb_offset;
- 	ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
--	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1);
-+	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1);
- 	ALLOC_STATE(unk4F30, always, 3, 0);
--	r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2);
-+	r300->hw.unk4F30.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4F30, 2);
- 	ALLOC_STATE(zb_hiz_offset, always, 2, 0);
--	r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1);
-+	r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1);
- 	ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
--	r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1);
-+	r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1);
- 
- 	/* VPU only on TCL */
- 	if (has_tcl) {
-    	        int i;
- 		ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
--		r300->hw.vpi.cmd[R300_VPI_CMD_0] =
--		    cmdvpu(R300_PVS_CODE_START, 0);
-+		r300->hw.vpi.cmd[0] =
-+		    cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
-+		r300->hw.vpi.emit = emit_vpu;
- 
- 		if (is_r500) {
- 		    ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
--		    r300->hw.vpp.cmd[R300_VPP_CMD_0] =
--			cmdvpu(R500_PVS_CONST_START, 0);
-+		    r300->hw.vpp.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
-+		    r300->hw.vpp.emit = emit_vpu;
- 
- 		    ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
--		    r300->hw.vps.cmd[R300_VPS_CMD_0] =
--			cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.emit = emit_vpu;
- 
- 			for (i = 0; i < 6; i++) {
--				ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
--				r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
--					cmdvpu(R500_PVS_UCP_START + i, 1);
-+			  ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
-+			  r300->hw.vpucp[i].cmd[0] =
-+				  cmdvpu(r300->radeon.radeonScreen,
-+                           R500_PVS_UCP_START + i, 1);
-+				r300->hw.vpucp[i].emit = emit_vpu;
- 			}
- 		} else {
- 		    ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
--		    r300->hw.vpp.cmd[R300_VPP_CMD_0] =
--			cmdvpu(R300_PVS_CONST_START, 0);
-+		    r300->hw.vpp.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
-+		    r300->hw.vpp.emit = emit_vpu;
- 
- 		    ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
--		    r300->hw.vps.cmd[R300_VPS_CMD_0] =
--			cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.emit = emit_vpu;
- 
- 			for (i = 0; i < 6; i++) {
- 				ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
--				r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
--					cmdvpu(R300_PVS_UCP_START + i, 1);
-+				r300->hw.vpucp[i].cmd[0] =
-+					cmdvpu(r300->radeon.radeonScreen,
-+					       R300_PVS_UCP_START + i, 1);
-+				r300->hw.vpucp[i].emit = emit_vpu;
- 			}
- 		}
- 	}
-@@ -546,61 +576,39 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 	/* Textures */
- 	ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
- 	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER0_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0);
- 
- 	ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
- 	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER1_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0);
- 
- 	ALLOC_STATE(tex.size, variable, mtu + 1, 0);
--	r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0);
-+	r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0);
- 
- 	ALLOC_STATE(tex.format, variable, mtu + 1, 0);
- 	r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FORMAT_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0);
- 
- 	ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
--	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0);
-+	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0);
- 
--	ALLOC_STATE(tex.offset, variable, mtu + 1, 0);
-+	ALLOC_STATE(tex.offset, variable, 1, 0);
- 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_OFFSET_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0);
-+	r300->hw.tex.offset.emit = &emit_tex_offsets;
- 
- 	ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
- 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_CHROMA_KEY_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0);
- 
- 	ALLOC_STATE(tex.border_color, variable, mtu + 1, 0);
- 	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_BORDER_COLOR_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0);
- 
--	r300->hw.is_dirty = GL_TRUE;
--	r300->hw.all_dirty = GL_TRUE;
-+	r300->radeon.hw.is_dirty = GL_TRUE;
-+	r300->radeon.hw.all_dirty = GL_TRUE;
- 
--	/* Initialize command buffer */
--	size =
--	    256 * driQueryOptioni(&r300->radeon.optionCache,
--				  "command_buffer_size");
--	if (size < 2 * r300->hw.max_state_size) {
--		size = 2 * r300->hw.max_state_size + 65535;
--	}
--	if (size > 64 * 256)
--		size = 64 * 256;
--
--	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) {
--		fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n",
--			sizeof(drm_r300_cmd_header_t));
--		fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n",
--			sizeof(drm_radeon_cmd_buffer_t));
--		fprintf(stderr,
--			"Allocating %d bytes command buffer (max state is %d bytes)\n",
--			size * 4, r300->hw.max_state_size * 4);
--	}
--
--	r300->cmdbuf.size = size;
--	r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4);
--	r300->cmdbuf.count_used = 0;
--	r300->cmdbuf.count_reemit = 0;
-+	rcommonInitCmdBuf(&r300->radeon);
- }
- 
- /**
-@@ -608,68 +616,10 @@ void r300InitCmdBuf(r300ContextPtr r300)
-  */
- void r300DestroyCmdBuf(r300ContextPtr r300)
- {
--	struct r300_state_atom *atom;
--
--	FREE(r300->cmdbuf.cmd_buf);
-+	struct radeon_state_atom *atom;
- 
--	foreach(atom, &r300->hw.atomlist) {
-+	foreach(atom, &r300->radeon.hw.atomlist) {
- 		FREE(atom->cmd);
- 	}
--}
--
--void r300EmitBlit(r300ContextPtr rmesa,
--		  GLuint color_fmt,
--		  GLuint src_pitch,
--		  GLuint src_offset,
--		  GLuint dst_pitch,
--		  GLuint dst_offset,
--		  GLint srcx, GLint srcy,
--		  GLint dstx, GLint dsty, GLuint w, GLuint h)
--{
--	drm_r300_cmd_header_t *cmd;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr,
--			"%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
--			__FUNCTION__, src_pitch, src_offset, srcx, srcy,
--			dst_pitch, dst_offset, dstx, dsty, w, h);
--
--	assert((src_pitch & 63) == 0);
--	assert((dst_pitch & 63) == 0);
--	assert((src_offset & 1023) == 0);
--	assert((dst_offset & 1023) == 0);
--	assert(w < (1 << 16));
--	assert(h < (1 << 16));
--
--	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__);
--
--	cmd[0].header.cmd_type = R300_CMD_PACKET3;
--	cmd[0].header.pad0 = R300_CMD_PACKET3_RAW;
--	cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16);
--	cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
--		    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
--		    RADEON_GMC_BRUSH_NONE |
--		    (color_fmt << 8) |
--		    RADEON_GMC_SRC_DATATYPE_COLOR |
--		    RADEON_ROP3_S |
--		    RADEON_DP_SRC_SOURCE_MEMORY |
--		    RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
--
--	cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10);
--	cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
--	cmd[5].u = (srcx << 16) | srcy;
--	cmd[6].u = (dstx << 16) | dsty;	/* dst */
--	cmd[7].u = (w << 16) | h;
--}
--
--void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
--{
--	drm_r300_cmd_header_t *cmd;
--
--	assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D)));
- 
--	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].u = 0;
--	cmd[0].wait.cmd_type = R300_CMD_WAIT;
--	cmd[0].wait.flags = flags;
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-index a8eaa58..b7798eb 100644
---- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-@@ -38,79 +38,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "r300_context.h"
- 
--extern int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller);
--extern int r300FlushCmdBuf(r300ContextPtr r300, const char *caller);
--
--extern void r300EmitState(r300ContextPtr r300);
--
- extern void r300InitCmdBuf(r300ContextPtr r300);
- extern void r300DestroyCmdBuf(r300ContextPtr r300);
- 
--/**
-- * Make sure that enough space is available in the command buffer
-- * by flushing if necessary.
-- *
-- * \param dwords The number of dwords we need to be free on the command buffer
-- */
--static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300,
--					     int dwords, const char *caller)
--{
--	assert(dwords < r300->cmdbuf.size);
--
--	if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size)
--		r300FlushCmdBuf(r300, caller);
--}
--
--/**
-- * Allocate the given number of dwords in the command buffer and return
-- * a pointer to the allocated area.
-- * When necessary, these functions cause a flush. r300AllocCmdBuf() also
-- * causes state reemission after a flush. This is necessary to ensure
-- * correct hardware state after an unlock.
-- */
--static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
--					       int dwords, const char *caller)
--{
--	uint32_t *ptr;
--
--	r300EnsureCmdBufSpace(r300, dwords, caller);
--
--	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
--	r300->cmdbuf.count_used += dwords;
--	return ptr;
--}
--
--static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300,
--					    int dwords, const char *caller)
--{
--	uint32_t *ptr;
--
--	r300EnsureCmdBufSpace(r300, dwords, caller);
--
--	if (!r300->cmdbuf.count_used) {
--		if (RADEON_DEBUG & DEBUG_IOCTL)
--			fprintf(stderr,
--				"Reemit state after flush (from %s)\n", caller);
--		r300EmitState(r300);
--	}
--
--	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
--	r300->cmdbuf.count_used += dwords;
--	return ptr;
--}
- 
--extern void r300EmitBlit(r300ContextPtr rmesa,
--			 GLuint color_fmt,
--			 GLuint src_pitch,
--			 GLuint src_offset,
--			 GLuint dst_pitch,
--			 GLuint dst_offset,
--			 GLint srcx, GLint srcy,
--			 GLint dstx, GLint dsty, GLuint w, GLuint h);
-+void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom);
-+int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom);
- 
--extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags);
--extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start);
--extern void r300EmitVertexShader(r300ContextPtr rmesa);
--extern void r300EmitPixelShader(r300ContextPtr rmesa);
-+void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom);
-+int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom);
-+int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom);
- 
- #endif				/* __R300_CMDBUF_H__ */
-diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
-index 4c14c7e..dd63add 100644
---- a/src/mesa/drivers/dri/r300/r300_context.c
-+++ b/src/mesa/drivers/dri/r300/r300_context.c
-@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/extensions.h"
- #include "main/state.h"
- #include "main/bufferobj.h"
-+#include "main/texobj.h"
- 
- #include "swrast/swrast.h"
- #include "swrast_setup/swrast_setup.h"
-@@ -55,19 +56,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "drivers/common/driverfuncs.h"
- 
--#include "radeon_ioctl.h"
--#include "radeon_span.h"
- #include "r300_context.h"
-+#include "radeon_context.h"
-+#include "radeon_span.h"
- #include "r300_cmdbuf.h"
- #include "r300_state.h"
- #include "r300_ioctl.h"
- #include "r300_tex.h"
- #include "r300_emit.h"
- #include "r300_swtcl.h"
-+#include "radeon_bocs_wrapper.h"
- 
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--#endif
- 
- #include "vblank.h"
- #include "utils.h"
-@@ -183,6 +182,78 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = {
- 	0,
- };
- 
-+static void r300RunPipeline(GLcontext * ctx)
-+{
-+    _mesa_lock_context_textures(ctx);
-+
-+    if (ctx->NewState)
-+        _mesa_update_state_locked(ctx);
-+    
-+    _tnl_run_pipeline(ctx);
-+    _mesa_unlock_context_textures(ctx);
-+}
-+
-+static void r300_get_lock(radeonContextPtr rmesa)
-+{
-+	drm_radeon_sarea_t *sarea = rmesa->sarea;
-+
-+	if (sarea->ctx_owner != rmesa->dri.hwContext) {
-+		sarea->ctx_owner = rmesa->dri.hwContext;
-+		if (!rmesa->radeonScreen->kernel_mm)
-+			radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
-+	}
-+}		  
-+
-+static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
-+{
-+    /* please flush pipe do all pending work */
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_SC_SCREENDOOR, 1));
-+    radeon_cs_write_dword(cs, 0x0);
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_SC_SCREENDOOR, 1));
-+    radeon_cs_write_dword(cs, 0x00FFFFFF);
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_SC_HYPERZ, 1));
-+    radeon_cs_write_dword(cs, 0x0);
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_US_CONFIG, 1));
-+    radeon_cs_write_dword(cs, 0x0);
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_ZB_CNTL, 1));
-+    radeon_cs_write_dword(cs, 0x0);
-+    radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D));
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_RB3D_DSTCACHE_CTLSTAT, 1));
-+    radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
-+                                  R300_ZB_ZCACHE_CTLSTAT, 1));
-+    radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
-+    radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen,
-+                               R300_WAIT_3D | R300_WAIT_3D_CLEAN));
-+}
-+
-+static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon)
-+{
-+   BATCH_LOCALS(radeon);
-+   cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-+   BEGIN_BATCH_NO_AUTOSTATE(2);
-+   OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH);
-+   END_BATCH();
-+   end_3d(radeon);
-+}
-+
-+static void r300_init_vtbl(radeonContextPtr radeon)
-+{
-+   radeon->vtbl.get_lock = r300_get_lock;
-+   radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset;
-+   radeon->vtbl.update_draw_buffer = r300UpdateDrawBuffer;
-+   radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header;
-+   radeon->vtbl.swtcl_flush = r300_swtcl_flush;
-+   radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms;
-+}
-+
-+
- /* Create the device specific rendering context.
-  */
- GLboolean r300CreateContext(const __GLcontextModes * glVisual,
-@@ -194,7 +265,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	struct dd_function_table functions;
- 	r300ContextPtr r300;
- 	GLcontext *ctx;
--	int tcl_mode, i;
-+	int tcl_mode;
- 
- 	assert(glVisual);
- 	assert(driContextPriv);
-@@ -208,13 +279,14 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
- 		hw_tcl_on = future_hw_tcl_on = 0;
- 
-+	r300_init_vtbl(&r300->radeon);
- 	/* Parse configuration files.
- 	 * Do this here so that initialMaxAnisotropy is set before we create
- 	 * the default textures.
- 	 */
- 	driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache,
- 			    screen->driScreen->myNum, "r300");
--	r300->initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache,
-+	r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache,
- 						     "def_max_anisotropy");
- 
- 	/* Init default driver functions then plug in our R300-specific functions
-@@ -226,10 +298,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	r300InitTextureFuncs(&functions);
- 	r300InitShaderFuncs(&functions);
- 
--#ifdef USER_BUFFERS
--	r300_mem_init(r300);
--#endif
--
- 	if (!radeonInitContext(&r300->radeon, &functions,
- 			       glVisual, driContextPriv,
- 			       sharedContextPrivate)) {
-@@ -238,37 +306,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	}
- 
- 	/* Init r300 context data */
--	r300->dma.buf0_address =
--	    r300->radeon.radeonScreen->buffers->list[0].address;
--
--	(void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps));
--	make_empty_list(&r300->swapped);
--
--	r300->nr_heaps = 1 /* screen->numTexHeaps */ ;
--	assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS);
--	for (i = 0; i < r300->nr_heaps; i++) {
--		/* *INDENT-OFF* */
--		r300->texture_heaps[i] = driCreateTextureHeap(i, r300,
--							       screen->
--							       texSize[i], 12,
--							       RADEON_NR_TEX_REGIONS,
--							       (drmTextureRegionPtr)
--							       r300->radeon.sarea->
--							       tex_list[i],
--							       &r300->radeon.sarea->
--							       tex_age[i],
--							       &r300->swapped,
--							       sizeof
--							       (r300TexObj),
--							       (destroy_texture_object_t
--								*)
--							       r300DestroyTexObj);
--		/* *INDENT-ON* */
--	}
--	r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache,
-+	r300->radeon.texture_depth = driQueryOptioni(&r300->radeon.optionCache,
- 					      "texture_depth");
--	if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
--		r300->texture_depth = (screen->cpp == 4) ?
-+	if (r300->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
-+		r300->radeon.texture_depth = (screen->cpp == 4) ?
- 		    DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
- 
- 	/* Set the maximum texture size small enough that we can guarentee that
-@@ -303,13 +344,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
- 	ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
- 
--#ifdef USER_BUFFERS
- 	/* Needs further modifications */
- #if 0
- 	ctx->Const.MaxArrayLockSize =
- 	    ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
- #endif
--#endif
- 
- 	ctx->Const.MaxDrawBuffers = 1;
- 
-@@ -384,13 +423,13 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	    driQueryOptionb(&r300->radeon.optionCache,
- 			    "disable_lowimpact_fallback");
- 
--	radeonInitSpanFuncs(ctx);
-+   	radeonInitSpanFuncs( ctx );
- 	r300InitCmdBuf(r300);
- 	r300InitState(r300);
- 	if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
- 	        r300InitSwtcl(ctx);
- 
--	TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
-+	TNL_CONTEXT(ctx)->Driver.RunPipeline = r300RunPipeline;
- 
- 	tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode");
- 	if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) {
-@@ -413,72 +452,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	return GL_TRUE;
- }
- 
--static void r300FreeGartAllocations(r300ContextPtr r300)
--{
--	int i, ret, tries = 0, done_age, in_use = 0;
--	drm_radeon_mem_free_t memfree;
--
--	memfree.region = RADEON_MEM_REGION_GART;
--
--#ifdef USER_BUFFERS
--	for (i = r300->rmm->u_last; i > 0; i--) {
--		if (r300->rmm->u_list[i].ptr == NULL) {
--			continue;
--		}
--
--		/* check whether this buffer is still in use */
--		if (r300->rmm->u_list[i].pending) {
--			in_use++;
--		}
--	}
--	/* Cannot flush/lock if no context exists. */
--	if (in_use)
--		r300FlushCmdBuf(r300, __FUNCTION__);
--
--	done_age = radeonGetAge((radeonContextPtr) r300);
--
--	for (i = r300->rmm->u_last; i > 0; i--) {
--		if (r300->rmm->u_list[i].ptr == NULL) {
--			continue;
--		}
--
--		/* check whether this buffer is still in use */
--		if (!r300->rmm->u_list[i].pending) {
--			continue;
--		}
--
--		assert(r300->rmm->u_list[i].h_pending == 0);
--
--		tries = 0;
--		while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
--			usleep(10);
--			done_age = radeonGetAge((radeonContextPtr) r300);
--		}
--		if (tries >= 1000) {
--			WARN_ONCE("Failed to idle region!");
--		}
--
--		memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
--		    (char *)r300->radeon.radeonScreen->gartTextures.map;
--
--		ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
--				      DRM_RADEON_FREE, &memfree,
--				      sizeof(memfree));
--		if (ret) {
--			fprintf(stderr, "Failed to free at %p\nret = %s\n",
--				r300->rmm->u_list[i].ptr, strerror(-ret));
--		} else {
--			if (i == r300->rmm->u_last)
--				r300->rmm->u_last--;
--
--			r300->rmm->u_list[i].pending = 0;
--			r300->rmm->u_list[i].ptr = NULL;
--		}
--	}
--	r300->rmm->u_head = i;
--#endif				/* USER_BUFFERS */
--}
--
- /* Destroy the device specific context.
-  */
- void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
-@@ -502,55 +475,27 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
- 	assert(r300);		/* should never be null */
- 
- 	if (r300) {
--		GLboolean release_texture_heaps;
--
--		release_texture_heaps =
--		    (r300->radeon.glCtx->Shared->RefCount == 1);
- 		_swsetup_DestroyContext(r300->radeon.glCtx);
- 		_tnl_DestroyContext(r300->radeon.glCtx);
- 		_vbo_DestroyContext(r300->radeon.glCtx);
- 		_swrast_DestroyContext(r300->radeon.glCtx);
- 
--		if (r300->dma.current.buf) {
--			r300ReleaseDmaRegion(r300, &r300->dma.current,
--					     __FUNCTION__);
--#ifndef USER_BUFFERS
--			r300FlushCmdBuf(r300, __FUNCTION__);
--#endif
--		}
--		r300FreeGartAllocations(r300);
--		r300DestroyCmdBuf(r300);
-+		rcommonFlushCmdBuf(&r300->radeon, __FUNCTION__);
- 
- 		if (radeon->state.scissor.pClipRects) {
- 			FREE(radeon->state.scissor.pClipRects);
- 			radeon->state.scissor.pClipRects = NULL;
- 		}
- 
--		if (release_texture_heaps) {
--			/* This share group is about to go away, free our private
--			 * texture object data.
--			 */
--			int i;
--
--			for (i = 0; i < r300->nr_heaps; i++) {
--				driDestroyTextureHeap(r300->texture_heaps[i]);
--				r300->texture_heaps[i] = NULL;
--			}
--
--			assert(is_empty_list(&r300->swapped));
--		}
-+		r300DestroyCmdBuf(r300);
- 
- 		radeonCleanupContext(&r300->radeon);
- 
--#ifdef USER_BUFFERS
-+
- 		/* the memory manager might be accessed when Mesa frees the shared
- 		 * state, so don't destroy it earlier
- 		 */
--		r300_mem_destroy(r300);
--#endif
- 
--		/* free the option cache */
--		driDestroyOptionCache(&r300->radeon.optionCache);
- 
- 		FREE(r300);
- 	}
-diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
-index c15e9fa..6d34727 100644
---- a/src/mesa/drivers/dri/r300/r300_context.h
-+++ b/src/mesa/drivers/dri/r300/r300_context.h
-@@ -42,21 +42,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_drm.h"
- #include "dri_util.h"
- #include "texmem.h"
-+#include "radeon_common.h"
- 
- #include "main/macros.h"
- #include "main/mtypes.h"
- #include "main/colormac.h"
- 
--#define USER_BUFFERS
--
- struct r300_context;
- typedef struct r300_context r300ContextRec;
- typedef struct r300_context *r300ContextPtr;
- 
--#include "radeon_lock.h"
-+
- #include "main/mm.h"
- 
--/* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
-+/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
-    I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
-    with other compilers ... GLUE!
- */
-@@ -75,174 +74,19 @@ typedef struct r300_context *r300ContextPtr;
- #include "r300_vertprog.h"
- #include "r500_fragprog.h"
- 
--/**
-- * This function takes a float and packs it into a uint32_t
-- */
--static INLINE uint32_t r300PackFloat32(float fl)
--{
--	union {
--		float fl;
--		uint32_t u;
--	} u;
--
--	u.fl = fl;
--	return u.u;
--}
--
--/* This is probably wrong for some values, I need to test this
-- * some more.  Range checking would be a good idea also..
-- *
-- * But it works for most things.  I'll fix it later if someone
-- * else with a better clue doesn't
-- */
--static INLINE uint32_t r300PackFloat24(float f)
--{
--	float mantissa;
--	int exponent;
--	uint32_t float24 = 0;
--
--	if (f == 0.0)
--		return 0;
- 
--	mantissa = frexpf(f, &exponent);
--
--	/* Handle -ve */
--	if (mantissa < 0) {
--		float24 |= (1 << 23);
--		mantissa = mantissa * -1.0;
--	}
--	/* Handle exponent, bias of 63 */
--	exponent += 62;
--	float24 |= (exponent << 16);
--	/* Kill 7 LSB of mantissa */
--	float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF) >> 7;
--
--	return float24;
--}
- 
- /************ DMA BUFFERS **************/
- 
--/* Need refcounting on dma buffers:
-- */
--struct r300_dma_buffer {
--	int refcount;		/**< the number of retained regions in buf */
--	drmBufPtr buf;
--	int id;
--};
--#undef GET_START
--#ifdef USER_BUFFERS
--#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start))
--#else
--#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset +		\
--			(rvb)->address - rmesa->dma.buf0_address +	\
--			(rvb)->start)
--#endif
--/* A retained region, eg vertices for indexed vertices.
-- */
--struct r300_dma_region {
--	struct r300_dma_buffer *buf;
--	char *address;		/* == buf->address */
--	int start, end, ptr;	/* offsets from start of buf */
--
--	int aos_offset;		/* address in GART memory */
--	int aos_stride;		/* distance between elements, in dwords */
--	int aos_size;		/* number of components (1-4) */
--};
--
--struct r300_dma {
--	/* Active dma region.  Allocations for vertices and retained
--	 * regions come from here.  Also used for emitting random vertices,
--	 * these may be flushed by calling flush_current();
--	 */
--	struct r300_dma_region current;
--
--	void (*flush) (r300ContextPtr);
--
--	char *buf0_address;	/* start of buf[0], for index calcs */
--
--	/* Number of "in-flight" DMA buffers, i.e. the number of buffers
--	 * for which a DISCARD command is currently queued in the command buffer.
--	 */
--	GLuint nr_released_bufs;
--};
--
--       /* Texture related */
--
--typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr;
--
--/* Texture object in locally shared texture space.
-- */
--struct r300_tex_obj {
--	driTextureObject base;
--
--	GLuint bufAddr;		/* Offset to start of locally
--				   shared texture block */
--
--	drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
--	/* Six, for the cube faces */
--
--	GLboolean image_override;	/* Image overridden by GLX_EXT_tfp */
--
--	GLuint pitch;		/* this isn't sent to hardware just used in calculations */
--	/* hardware register values */
--	/* Note that R200 has 8 registers per texture and R300 only 7 */
--	GLuint filter;
--	GLuint filter_1;
--	GLuint pitch_reg;
--	GLuint size;		/* npot only */
--	GLuint format;
--	GLuint offset;		/* Image location in the card's address space.
--				   All cube faces follow. */
--	GLuint unknown4;
--	GLuint unknown5;
--	/* end hardware registers */
--
--	/* registers computed by r200 code - keep them here to
--	   compare against what is actually written.
--
--	   to be removed later.. */
--	GLuint pp_border_color;
--	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
--	GLuint format_x;
--
--	GLboolean border_fallback;
--
--	GLuint tile_bits;	/* hw texture tile bits used on this texture */
--};
--
--struct r300_texture_env_state {
--	r300TexObjPtr texobj;
--	GLenum format;
--	GLenum envMode;
--};
--
- /* The blit width for texture uploads
-  */
- #define R300_BLIT_WIDTH_BYTES 1024
- #define R300_MAX_TEXTURE_UNITS 8
- 
- struct r300_texture_state {
--	struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS];
- 	int tc_count;		/* number of incoming texture coordinates from VAP */
- };
- 
--/**
-- * A block of hardware state.
-- *
-- * When check returns non-zero, the returned number of dwords must be
-- * copied verbatim into the command buffer in order to update a state atom
-- * when it is dirty.
-- */
--struct r300_state_atom {
--	struct r300_state_atom *next, *prev;
--	const char *name;	/* for debug */
--	int cmd_size;		/* maximum size in dwords */
--	GLuint idx;		/* index in an array (e.g. textures) */
--	uint32_t *cmd;
--	GLboolean dirty;
--
--	int (*check) (r300ContextPtr, struct r300_state_atom * atom);
--};
- 
- #define R300_VPT_CMD_0		0
- #define R300_VPT_XSCALE		1
-@@ -459,124 +303,98 @@ struct r300_state_atom {
-  * Cache for hardware register state.
-  */
- struct r300_hw_state {
--	struct r300_state_atom atomlist;
--
--	GLboolean is_dirty;
--	GLboolean all_dirty;
--	int max_state_size;	/* in dwords */
--
--	struct r300_state_atom vpt;	/* viewport (1D98) */
--	struct r300_state_atom vap_cntl;
--        struct r300_state_atom vap_index_offset; /* 0x208c r5xx only */
--	struct r300_state_atom vof;	/* VAP output format register 0x2090 */
--	struct r300_state_atom vte;	/* (20B0) */
--	struct r300_state_atom vap_vf_max_vtx_indx;	/* Maximum Vertex Indx Clamp (2134) */
--	struct r300_state_atom vap_cntl_status;
--	struct r300_state_atom vir[2];	/* vap input route (2150/21E0) */
--	struct r300_state_atom vic;	/* vap input control (2180) */
--	struct r300_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
--	struct r300_state_atom vap_clip_cntl;
--	struct r300_state_atom vap_clip;
--	struct r300_state_atom vap_pvs_vtx_timeout_reg;	/* Vertex timeout register (2288) */
--	struct r300_state_atom pvs;	/* pvs_cntl (22D0) */
--	struct r300_state_atom gb_enable;	/* (4008) */
--	struct r300_state_atom gb_misc;	/* Multisampling position shifts ? (4010) */
--	struct r300_state_atom ga_point_s0;	/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
--	struct r300_state_atom ga_triangle_stipple;	/* (4214) */
--	struct r300_state_atom ps;	/* pointsize (421C) */
--	struct r300_state_atom ga_point_minmax;	/* (4230) */
--	struct r300_state_atom lcntl;	/* line control */
--	struct r300_state_atom ga_line_stipple;	/* (4260) */
--	struct r300_state_atom shade;
--	struct r300_state_atom polygon_mode;
--	struct r300_state_atom fogp;	/* fog parameters (4294) */
--	struct r300_state_atom ga_soft_reset;	/* (429C) */
--	struct r300_state_atom zbias_cntl;
--	struct r300_state_atom zbs;	/* zbias (42A4) */
--	struct r300_state_atom occlusion_cntl;
--	struct r300_state_atom cul;	/* cull cntl (42B8) */
--	struct r300_state_atom su_depth_scale;	/* (42C0) */
--	struct r300_state_atom rc;	/* rs control (4300) */
--	struct r300_state_atom ri;	/* rs interpolators (4310) */
--	struct r300_state_atom rr;	/* rs route (4330) */
--	struct r300_state_atom sc_hyperz;	/* (43A4) */
--	struct r300_state_atom sc_screendoor;	/* (43E8) */
--	struct r300_state_atom fp;	/* fragment program cntl + nodes (4600) */
--	struct r300_state_atom fpt;	/* texi - (4620) */
--	struct r300_state_atom us_out_fmt;	/* (46A4) */
--	struct r300_state_atom r500fp;	/* r500 fp instructions */
--	struct r300_state_atom r500fp_const;	/* r500 fp constants */
--	struct r300_state_atom fpi[4];	/* fp instructions (46C0/47C0/48C0/49C0) */
--	struct r300_state_atom fogs;	/* fog state (4BC0) */
--	struct r300_state_atom fogc;	/* fog color (4BC8) */
--	struct r300_state_atom at;	/* alpha test (4BD4) */
--	struct r300_state_atom fg_depth_src;	/* (4BD8) */
--	struct r300_state_atom fpp;	/* 0x4C00 and following */
--	struct r300_state_atom rb3d_cctl;	/* (4E00) */
--	struct r300_state_atom bld;	/* blending (4E04) */
--	struct r300_state_atom cmk;	/* colormask (4E0C) */
--	struct r300_state_atom blend_color;	/* constant blend color */
--	struct r300_state_atom rop;	/* ropcntl */
--	struct r300_state_atom cb;	/* colorbuffer (4E28) */
--	struct r300_state_atom rb3d_dither_ctl;	/* (4E50) */
--	struct r300_state_atom rb3d_aaresolve_ctl;	/* (4E88) */
--	struct r300_state_atom rb3d_discard_src_pixel_lte_threshold;	/* (4E88) I saw it only written on RV350 hardware..  */
--	struct r300_state_atom zs;	/* zstencil control (4F00) */
--	struct r300_state_atom zstencil_format;
--	struct r300_state_atom zb;	/* z buffer (4F20) */
--	struct r300_state_atom zb_depthclearvalue;	/* (4F28) */
--	struct r300_state_atom unk4F30;	/* (4F30) */
--	struct r300_state_atom zb_hiz_offset;	/* (4F44) */
--	struct r300_state_atom zb_hiz_pitch;	/* (4F54) */
--
--	struct r300_state_atom vpi;	/* vp instructions */
--	struct r300_state_atom vpp;	/* vp parameters */
--	struct r300_state_atom vps;	/* vertex point size (?) */
--	struct r300_state_atom vpucp[6];	/* vp user clip plane - 6 */
-+	struct radeon_state_atom vpt;	/* viewport (1D98) */
-+	struct radeon_state_atom vap_cntl;
-+        struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */
-+	struct radeon_state_atom vof;	/* VAP output format register 0x2090 */
-+	struct radeon_state_atom vte;	/* (20B0) */
-+	struct radeon_state_atom vap_vf_max_vtx_indx;	/* Maximum Vertex Indx Clamp (2134) */
-+	struct radeon_state_atom vap_cntl_status;
-+	struct radeon_state_atom vir[2];	/* vap input route (2150/21E0) */
-+	struct radeon_state_atom vic;	/* vap input control (2180) */
-+	struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
-+	struct radeon_state_atom vap_clip_cntl;
-+	struct radeon_state_atom vap_clip;
-+	struct radeon_state_atom vap_pvs_vtx_timeout_reg;	/* Vertex timeout register (2288) */
-+	struct radeon_state_atom pvs;	/* pvs_cntl (22D0) */
-+	struct radeon_state_atom gb_enable;	/* (4008) */
-+	struct radeon_state_atom gb_misc;	/* Multisampling position shifts ? (4010) */
-+	struct radeon_state_atom ga_point_s0;	/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
-+	struct radeon_state_atom ga_triangle_stipple;	/* (4214) */
-+	struct radeon_state_atom ps;	/* pointsize (421C) */
-+	struct radeon_state_atom ga_point_minmax;	/* (4230) */
-+	struct radeon_state_atom lcntl;	/* line control */
-+	struct radeon_state_atom ga_line_stipple;	/* (4260) */
-+	struct radeon_state_atom shade;
-+	struct radeon_state_atom polygon_mode;
-+	struct radeon_state_atom fogp;	/* fog parameters (4294) */
-+	struct radeon_state_atom ga_soft_reset;	/* (429C) */
-+	struct radeon_state_atom zbias_cntl;
-+	struct radeon_state_atom zbs;	/* zbias (42A4) */
-+	struct radeon_state_atom occlusion_cntl;
-+	struct radeon_state_atom cul;	/* cull cntl (42B8) */
-+	struct radeon_state_atom su_depth_scale;	/* (42C0) */
-+	struct radeon_state_atom rc;	/* rs control (4300) */
-+	struct radeon_state_atom ri;	/* rs interpolators (4310) */
-+	struct radeon_state_atom rr;	/* rs route (4330) */
-+	struct radeon_state_atom sc_hyperz;	/* (43A4) */
-+	struct radeon_state_atom sc_screendoor;	/* (43E8) */
-+	struct radeon_state_atom fp;	/* fragment program cntl + nodes (4600) */
-+	struct radeon_state_atom fpt;	/* texi - (4620) */
-+	struct radeon_state_atom us_out_fmt;	/* (46A4) */
-+	struct radeon_state_atom r500fp;	/* r500 fp instructions */
-+	struct radeon_state_atom r500fp_const;	/* r500 fp constants */
-+	struct radeon_state_atom fpi[4];	/* fp instructions (46C0/47C0/48C0/49C0) */
-+	struct radeon_state_atom fogs;	/* fog state (4BC0) */
-+	struct radeon_state_atom fogc;	/* fog color (4BC8) */
-+	struct radeon_state_atom at;	/* alpha test (4BD4) */
-+	struct radeon_state_atom fg_depth_src;	/* (4BD8) */
-+	struct radeon_state_atom fpp;	/* 0x4C00 and following */
-+	struct radeon_state_atom rb3d_cctl;	/* (4E00) */
-+	struct radeon_state_atom bld;	/* blending (4E04) */
-+	struct radeon_state_atom cmk;	/* colormask (4E0C) */
-+	struct radeon_state_atom blend_color;	/* constant blend color */
-+	struct radeon_state_atom rop;	/* ropcntl */
-+	struct radeon_state_atom cb;	/* colorbuffer (4E28) */
-+	struct radeon_state_atom rb3d_dither_ctl;	/* (4E50) */
-+	struct radeon_state_atom rb3d_aaresolve_ctl;	/* (4E88) */
-+	struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold;	/* (4E88) I saw it only written on RV350 hardware..  */
-+	struct radeon_state_atom zs;	/* zstencil control (4F00) */
-+	struct radeon_state_atom zstencil_format;
-+	struct radeon_state_atom zb;	/* z buffer (4F20) */
-+	struct radeon_state_atom zb_depthclearvalue;	/* (4F28) */
-+	struct radeon_state_atom unk4F30;	/* (4F30) */
-+	struct radeon_state_atom zb_hiz_offset;	/* (4F44) */
-+	struct radeon_state_atom zb_hiz_pitch;	/* (4F54) */
-+
-+	struct radeon_state_atom vpi;	/* vp instructions */
-+	struct radeon_state_atom vpp;	/* vp parameters */
-+	struct radeon_state_atom vps;	/* vertex point size (?) */
-+	struct radeon_state_atom vpucp[6];	/* vp user clip plane - 6 */
- 	/* 8 texture units */
- 	/* the state is grouped by function and not by
- 	   texture unit. This makes single unit updates
- 	   really awkward - we are much better off
- 	   updating the whole thing at once */
- 	struct {
--		struct r300_state_atom filter;
--		struct r300_state_atom filter_1;
--		struct r300_state_atom size;
--		struct r300_state_atom format;
--		struct r300_state_atom pitch;
--		struct r300_state_atom offset;
--		struct r300_state_atom chroma_key;
--		struct r300_state_atom border_color;
-+		struct radeon_state_atom filter;
-+		struct radeon_state_atom filter_1;
-+		struct radeon_state_atom size;
-+		struct radeon_state_atom format;
-+		struct radeon_state_atom pitch;
-+		struct radeon_state_atom offset;
-+		struct radeon_state_atom chroma_key;
-+		struct radeon_state_atom border_color;
- 	} tex;
--	struct r300_state_atom txe;	/* tex enable (4104) */
--};
-+	struct radeon_state_atom txe;	/* tex enable (4104) */
- 
--/**
-- * This structure holds the command buffer while it is being constructed.
-- *
-- * The first batch of commands in the buffer is always the state that needs
-- * to be re-emitted when the context is lost. This batch can be skipped
-- * otherwise.
-- */
--struct r300_cmdbuf {
--	int size;		/* DWORDs allocated for buffer */
--	uint32_t *cmd_buf;
--	int count_used;		/* DWORDs filled so far */
--	int count_reemit;	/* size of re-emission batch */
-+	radeonTexObj *textures[R300_MAX_TEXTURE_UNITS];
- };
- 
- /**
-  * State cache
-  */
- 
--struct r300_depthbuffer_state {
--	GLfloat scale;
--};
--
--struct r300_stencilbuffer_state {
--	GLboolean hw_stencil;
--};
--
- /* Vertex shader state */
- 
- /* Perhaps more if we store programs in vmem? */
-@@ -812,22 +630,18 @@ struct r500_fragment_program {
- #define REG_TEX0	2
- 
- struct r300_state {
--	struct r300_depthbuffer_state depth;
- 	struct r300_texture_state texture;
- 	int sw_tcl_inputs[VERT_ATTRIB_MAX];
- 	struct r300_vertex_shader_state vertex_shader;
--	struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
-+	struct radeon_aos aos[R300_MAX_AOS_ARRAYS];
- 	int aos_count;
- 
--	GLuint *Elts;
--	struct r300_dma_region elt_dma;
-+	struct radeon_bo *elt_dma_bo; /** Buffer object that contains element indices */
-+	int elt_dma_offset; /** Offset into this buffer object, in bytes */
- 
--	struct r300_dma_region swtcl_dma;
- 	DECLARE_RENDERINPUTS(render_inputs_bitset);	/* actual render inputs that R300 was configured for.
- 							   They are the same as tnl->render_inputs for fixed pipeline */
- 
--	struct r300_stencilbuffer_state stencil;
--
- };
- 
- #define R300_FALLBACK_NONE 0
-@@ -837,41 +651,7 @@ struct r300_state {
- /* r300_swtcl.c
-  */
- struct r300_swtcl_info {
--   GLuint RenderIndex;
--
--   /**
--    * Size of a hardware vertex.  This is calculated when \c ::vertex_attrs is
--    * installed in the Mesa state vector.
--    */
--   GLuint vertex_size;
--
--   /**
--    * Attributes instructing the Mesa TCL pipeline where / how to put vertex
--    * data in the hardware buffer.
--    */
--   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
--
--   /**
--    * Number of elements of \c ::vertex_attrs that are actually used.
--    */
--   GLuint vertex_attr_count;
--
--   /**
--    * Cached pointer to the buffer where Mesa will store vertex data.
--    */
--   GLubyte *verts;
--
--   /* Fallback rasterization functions
--    */
--  //   r200_point_func draw_point;
--  //   r200_line_func draw_line;
--  //   r200_tri_func draw_tri;
--
--   GLuint hw_primitive;
--   GLenum render_primitive;
--   GLuint numverts;
--
--   /**
-+  /*
-     * Offset of the 4UB color data within a hardware (swtcl) vertex.
-     */
-    GLuint coloroffset;
-@@ -880,13 +660,6 @@ struct r300_swtcl_info {
-     * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
-     */
-    GLuint specoffset;
--
--   /**
--    * Should Mesa project vertex data or will the hardware do it?
--    */
--   GLboolean needproj;
--
--   struct r300_dma_region indexed_verts;
- };
- 
- 
-@@ -897,33 +670,13 @@ struct r300_context {
- 	struct radeon_context radeon;	/* parent class, must be first */
- 
- 	struct r300_hw_state hw;
--	struct r300_cmdbuf cmdbuf;
-+
- 	struct r300_state state;
- 	struct gl_vertex_program *curr_vp;
- 	struct r300_vertex_program *selected_vp;
- 
- 	/* Vertex buffers
- 	 */
--	struct r300_dma dma;
--	GLboolean save_on_next_unlock;
--	GLuint NewGLState;
--
--	/* Texture object bookkeeping
--	 */
--	unsigned nr_heaps;
--	driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
--	driTextureObject swapped;
--	int texture_depth;
--	float initialMaxAnisotropy;
--
--	/* Clientdata textures;
--	 */
--	GLuint prefer_gart_client_texturing;
--
--#ifdef USER_BUFFERS
--	struct r300_memory_manager *rmm;
--#endif
--
- 	GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
- 	GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
- 
-@@ -956,4 +709,7 @@ extern int r300VertexProgUpdateParams(GLcontext * ctx,
- #define RADEON_D_PLAYBACK_RAW 2
- #define RADEON_D_T 3
- 
-+#define r300PackFloat32 radeonPackFloat32
-+#define r300PackFloat24 radeonPackFloat24
-+
- #endif				/* __R300_CONTEXT_H__ */
-diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
-index 80bd338..1512e90 100644
---- a/src/mesa/drivers/dri/r300/r300_emit.c
-+++ b/src/mesa/drivers/dri/r300/r300_emit.c
-@@ -46,14 +46,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "tnl/t_context.h"
- 
- #include "r300_context.h"
--#include "radeon_ioctl.h"
- #include "r300_state.h"
- #include "r300_emit.h"
- #include "r300_ioctl.h"
- 
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--#endif
- 
- #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
-     SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
-@@ -66,147 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define DEBUG_ALL DEBUG_VERTS
- 
--#if defined(USE_X86_ASM)
--#define COPY_DWORDS( dst, src, nr )					\
--do {									\
--	int __tmp;							\
--	__asm__ __volatile__( "rep ; movsl"				\
--			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
--			      : "0" (nr),				\
--			        "D" ((long)dst),			\
--			        "S" ((long)src) );			\
--} while (0)
--#else
--#define COPY_DWORDS( dst, src, nr )		\
--do {						\
--   int j;					\
--   for ( j = 0 ; j < nr ; j++ )			\
--      dst[j] = ((int *)src)[j];			\
--   dst += nr;					\
--} while (0)
--#endif
--
--static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
--			 GLvoid * data, int stride, int count)
--{
--	int i;
--	int *out = (int *)(rvb->address + rvb->start);
--
--	if (RADEON_DEBUG & DEBUG_VERTS)
--		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--			__FUNCTION__, count, stride, (void *)out, (void *)data);
--
--	if (stride == 4)
--		COPY_DWORDS(out, data, count);
--	else
--		for (i = 0; i < count; i++) {
--			out[0] = *(int *)data;
--			out++;
--			data += stride;
--		}
--}
--
--static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
--			 GLvoid * data, int stride, int count)
--{
--	int i;
--	int *out = (int *)(rvb->address + rvb->start);
--
--	if (RADEON_DEBUG & DEBUG_VERTS)
--		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--			__FUNCTION__, count, stride, (void *)out, (void *)data);
--
--	if (stride == 8)
--		COPY_DWORDS(out, data, count * 2);
--	else
--		for (i = 0; i < count; i++) {
--			out[0] = *(int *)data;
--			out[1] = *(int *)(data + 4);
--			out += 2;
--			data += stride;
--		}
--}
--
--static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
--			  GLvoid * data, int stride, int count)
--{
--	int i;
--	int *out = (int *)(rvb->address + rvb->start);
--
--	if (RADEON_DEBUG & DEBUG_VERTS)
--		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--			__FUNCTION__, count, stride, (void *)out, (void *)data);
--
--	if (stride == 12)
--		COPY_DWORDS(out, data, count * 3);
--	else
--		for (i = 0; i < count; i++) {
--			out[0] = *(int *)data;
--			out[1] = *(int *)(data + 4);
--			out[2] = *(int *)(data + 8);
--			out += 3;
--			data += stride;
--		}
--}
--
--static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
--			  GLvoid * data, int stride, int count)
--{
--	int i;
--	int *out = (int *)(rvb->address + rvb->start);
--
--	if (RADEON_DEBUG & DEBUG_VERTS)
--		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--			__FUNCTION__, count, stride, (void *)out, (void *)data);
--
--	if (stride == 16)
--		COPY_DWORDS(out, data, count * 4);
--	else
--		for (i = 0; i < count; i++) {
--			out[0] = *(int *)data;
--			out[1] = *(int *)(data + 4);
--			out[2] = *(int *)(data + 8);
--			out[3] = *(int *)(data + 12);
--			out += 4;
--			data += stride;
--		}
--}
--
--static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
--			GLvoid * data, int size, int stride, int count)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--
--	if (stride == 0) {
--		r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
--		count = 1;
--		rvb->aos_offset = GET_START(rvb);
--		rvb->aos_stride = 0;
--	} else {
--		r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
--		rvb->aos_offset = GET_START(rvb);
--		rvb->aos_stride = size;
--	}
--
--	switch (size) {
--	case 1:
--		r300EmitVec4(ctx, rvb, data, stride, count);
--		break;
--	case 2:
--		r300EmitVec8(ctx, rvb, data, stride, count);
--		break;
--	case 3:
--		r300EmitVec12(ctx, rvb, data, stride, count);
--		break;
--	case 4:
--		r300EmitVec16(ctx, rvb, data, stride, count);
--		break;
--	default:
--		assert(0);
--		break;
--	}
--}
--
- #define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) |	\
- 		    (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT)
- 
-@@ -314,10 +169,6 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
- 		    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
- 		    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
- 
--#if 0
--	if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
--#endif
--
- 	if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
- 		ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
- 
-@@ -371,7 +222,6 @@ int r300EmitArrays(GLcontext * ctx)
- 
- 		assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
- 		assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
--		//assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
- 
- 		if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
- 			InputsRead |= 1 << VERT_ATTRIB_POS;
-@@ -433,7 +283,7 @@ int r300EmitArrays(GLcontext * ctx)
- 	}
- 
- 	for (i = 0; i < nr; i++) {
--		int ci, fix, found = 0;
-+		int ci;
- 
- 		swizzle[i][0] = SWIZZLE_ZERO;
- 		swizzle[i][1] = SWIZZLE_ZERO;
-@@ -443,61 +293,35 @@ int r300EmitArrays(GLcontext * ctx)
- 		for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
- 			swizzle[i][ci] = ci;
- 		}
--
--		if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
--			if (vb->AttribPtr[tab[i]]->stride % 4) {
--				return R300_FALLBACK_TCL;
--			}
--			rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
--			rmesa->state.aos[i].start = 0;
--			rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
--			rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
--			rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
--		} else {
--			r300EmitVec(ctx, &rmesa->state.aos[i],
-+		rcommon_emit_vector(ctx, &rmesa->state.aos[i],
- 				    vb->AttribPtr[tab[i]]->data,
- 				    vb->AttribPtr[tab[i]]->size,
- 				    vb->AttribPtr[tab[i]]->stride, count);
--		}
--
--		rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
--
--		for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
--			if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
--				continue;
--			}
--			found = 1;
--			break;
--		}
--
--		if (found) {
--			if (fix > 0) {
--				WARN_ONCE("Feeling lucky?\n");
--			}
--			rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
--			for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
--				swizzle[i][ci] += fix;
--			}
--		} else {
--			WARN_ONCE
--			    ("Cannot handle offset %x with stride %d, comp %d\n",
--			     rmesa->state.aos[i].aos_offset,
--			     rmesa->state.aos[i].aos_stride,
--			     vb->AttribPtr[tab[i]]->size);
--			return R300_FALLBACK_TCL;
--		}
- 	}
- 
- 	/* Setup INPUT_ROUTE. */
--	R300_STATECHANGE(rmesa, vir[0]);
--	((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
--	    r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
--			       vb->AttribPtr, inputs, tab, nr);
--	R300_STATECHANGE(rmesa, vir[1]);
--	((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
--	    r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
--			       nr);
--
-+	if (rmesa->radeon.radeonScreen->kernel_mm) {
-+		R300_STATECHANGE(rmesa, vir[0]);
-+		rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
-+		rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
-+		rmesa->hw.vir[0].cmd[0] |=
-+			(r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
-+					    vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16;
-+		R300_STATECHANGE(rmesa, vir[1]);
-+		rmesa->hw.vir[1].cmd[0] |=
-+			(r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
-+					    nr) & 0x3FFF) << 16;
-+	} else {
-+		R300_STATECHANGE(rmesa, vir[0]);
-+		((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
-+			r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
-+					   vb->AttribPtr, inputs, tab, nr);
-+		R300_STATECHANGE(rmesa, vir[1]);
-+		((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
-+			r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
-+					   nr);
-+	}
-+	
- 	/* Setup INPUT_CNTL. */
- 	R300_STATECHANGE(rmesa, vic);
- 	rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
-@@ -515,45 +339,34 @@ int r300EmitArrays(GLcontext * ctx)
- 	return R300_FALLBACK_NONE;
- }
- 
--#ifdef USER_BUFFERS
--void r300UseArrays(GLcontext * ctx)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	int i;
--
--	if (rmesa->state.elt_dma.buf)
--		r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
--
--	for (i = 0; i < rmesa->state.aos_count; i++) {
--		if (rmesa->state.aos[i].buf)
--			r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
--	}
--}
--#endif
--
- void r300ReleaseArrays(GLcontext * ctx)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	int i;
- 
--	r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
-+	if (rmesa->state.elt_dma_bo) {
-+		radeon_bo_unref(rmesa->state.elt_dma_bo);
-+		rmesa->state.elt_dma_bo = NULL;
-+	}
- 	for (i = 0; i < rmesa->state.aos_count; i++) {
--		r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
-+		if (rmesa->state.aos[i].bo) {
-+			radeon_bo_unref(rmesa->state.aos[i].bo);
-+			rmesa->state.aos[i].bo = NULL;
-+		}
- 	}
- }
- 
- void r300EmitCacheFlush(r300ContextPtr rmesa)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--
--	drm_radeon_cmd_header_t *cmd = NULL;
--
--	reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
--	e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
--	    R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
--
--	reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
--	e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
--	    R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-+	BATCH_LOCALS(&rmesa->radeon);
-+
-+	BEGIN_BATCH(4);
-+	OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
-+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
-+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-+	OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
-+		R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
-+		R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-+	END_BATCH();
-+	COMMIT_BATCH();
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
-index 89d7383..6bc8f8e 100644
---- a/src/mesa/drivers/dri/r300/r300_emit.h
-+++ b/src/mesa/drivers/dri/r300/r300_emit.h
-@@ -44,28 +44,31 @@
- #include "r300_cmdbuf.h"
- #include "radeon_reg.h"
- 
--/* TODO: move these defines (and the ones from DRM) into r300_reg.h and sync up
-- * with DRM */
--#define CP_PACKET0(reg, n)	(RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
--#define CP_PACKET3( pkt, n )						\
--	(RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
--
--static INLINE uint32_t cmdpacket0(int reg, int count)
-+static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn,
-+                                  int reg, int count)
- {
--	drm_r300_cmd_header_t cmd;
--
--	cmd.packet0.cmd_type = R300_CMD_PACKET0;
--	cmd.packet0.count = count;
--	cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
--	cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
--
--	return cmd.u;
-+    if (!rscrn->kernel_mm) {
-+	    drm_r300_cmd_header_t cmd;
-+
-+	cmd.u = 0;
-+    	cmd.packet0.cmd_type = R300_CMD_PACKET0;
-+	    cmd.packet0.count = count;
-+    	cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
-+	    cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
-+
-+    	return cmd.u;
-+    }
-+    if (count) {
-+        return CP_PACKET0(reg, count - 1);
-+    }
-+    return CP_PACKET2;
- }
- 
--static INLINE uint32_t cmdvpu(int addr, int count)
-+static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
- 	cmd.vpu.cmd_type = R300_CMD_VPU;
- 	cmd.vpu.count = count;
- 	cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8;
-@@ -74,10 +77,12 @@ static INLINE uint32_t cmdvpu(int addr, int count)
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
-+static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn,
-+                                 int addr, int count, int type, int clamp)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
- 	cmd.r500fp.cmd_type = R300_CMD_R500FP;
- 	cmd.r500fp.count = count;
- 	cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8;
-@@ -88,169 +93,131 @@ static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdpacket3(int packet)
-+static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
- 	cmd.packet3.cmd_type = R300_CMD_PACKET3;
- 	cmd.packet3.packet = packet;
- 
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdcpdelay(unsigned short count)
-+static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn,  
-+                                  unsigned short count)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
-+
- 	cmd.delay.cmd_type = R300_CMD_CP_DELAY;
- 	cmd.delay.count = count;
- 
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdwait(unsigned char flags)
-+static INLINE uint32_t cmdwait(struct radeon_screen *rscrn,
-+                               unsigned char flags)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
- 	cmd.wait.cmd_type = R300_CMD_WAIT;
- 	cmd.wait.flags = flags;
- 
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdpacify(void)
-+static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-+	cmd.u = 0;
- 	cmd.header.cmd_type = R300_CMD_END3D;
- 
- 	return cmd.u;
- }
- 
- /**
-- * Prepare to write a register value to register at address reg.
-- * If num_extra > 0 then the following extra values are written
-- * to registers with address +4, +8 and so on..
-- */
--#define reg_start(reg, num_extra)					\
--	do {								\
--		int _n;							\
--		_n=(num_extra);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+2),				\
--					__FUNCTION__);			\
--		cmd_reserved=_n+2;					\
--		cmd_written=1;						\
--		cmd[0].i=cmdpacket0((reg), _n+1);			\
--	} while (0);
--
--/**
-- * Emit GLuint freestyle
-+ * Write the header of a packet3 to the command buffer.
-+ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
-  */
--#define e32(dword)							\
--	do {								\
--		if(cmd_written<cmd_reserved) {				\
--			cmd[cmd_written].i=(dword);			\
--			cmd_written++;					\
--		} else {						\
--			fprintf(stderr,					\
--				"e32 but no previous packet "		\
--				"declaration.\n"			\
--				"Aborting! in %s::%s at line %d, "	\
--				"cmd_written=%d cmd_reserved=%d\n",	\
--				__FILE__, __FUNCTION__, __LINE__,	\
--				cmd_written, cmd_reserved);		\
--			_mesa_exit(-1);					\
--		}							\
-+#define OUT_BATCH_PACKET3(packet, num_extra) do {\
-+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		\
-+    	OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\
-+                  R300_CMD_PACKET3_RAW)); \
-+    } else b_l_rmesa->cmdbuf.cs->section_cdw++;\
-+	OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
- 	} while(0)
- 
--#define	efloat(f) e32(r300PackFloat32(f))
--
--#define vsf_start_fragment(dest, length)				\
--	do {								\
--		int _n;							\
--		_n = (length);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+1),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+2;					\
--		cmd_written =1;						\
--		cmd[0].i = cmdvpu((dest), _n/4);			\
--	} while (0);
--
--#define r500fp_start_fragment(dest, length)				\
--	do {								\
--		int _n;							\
--		_n = (length);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+1),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+1;					\
--		cmd_written =1;						\
--		cmd[0].i = cmdr500fp((dest), _n/6, 0, 0);		\
--	} while (0);
--
--#define start_packet3(packet, count)					\
--	{								\
--		int _n;							\
--		GLuint _p;						\
--		_n = (count);						\
--		_p = (packet);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+3),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+3;					\
--		cmd_written = 2;					\
--		if(_n > 0x3fff) {					\
--			fprintf(stderr,"Too big packet3 %08x: cannot "	\
--				"store %d dwords\n",			\
--				_p, _n);				\
--			_mesa_exit(-1);					\
--		}							\
--		cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW);		\
--		cmd[1].i = _p | ((_n & 0x3fff)<<16);			\
--	}
--
- /**
-  * Must be sent to switch to 2d commands
-  */
--void static INLINE end_3d(r300ContextPtr rmesa)
-+void static INLINE end_3d(radeonContextPtr radeon)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(radeon);
- 
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].header.cmd_type = R300_CMD_END3D;
-+	if (!radeon->radeonScreen->kernel_mm) {
-+		BEGIN_BATCH_NO_AUTOSTATE(1);
-+		OUT_BATCH(cmdpacify(radeon->radeonScreen));
-+		END_BATCH();
-+	}
- }
- 
- void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(&rmesa->radeon);
- 
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].i = cmdcpdelay(count);
-+	if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+		BEGIN_BATCH_NO_AUTOSTATE(1);
-+		OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count));
-+		END_BATCH();
-+	}
- }
- 
--void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags)
-+void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
--
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].i = cmdwait(flags);
-+	BATCH_LOCALS(radeon);
-+	uint32_t wait_until;
-+
-+	if (!radeon->radeonScreen->kernel_mm) {
-+		BEGIN_BATCH_NO_AUTOSTATE(1);
-+		OUT_BATCH(cmdwait(radeon->radeonScreen, flags));
-+		END_BATCH();
-+	} else {
-+		switch(flags) {
-+		case R300_WAIT_2D:
-+			wait_until = (1 << 14);
-+			break;
-+		case R300_WAIT_3D:
-+			wait_until = (1 << 15);
-+			break;
-+		case R300_NEW_WAIT_2D_3D:
-+			wait_until = (1 << 14) | (1 << 15);
-+			break;
-+		case R300_NEW_WAIT_2D_2D_CLEAN:
-+			wait_until = (1 << 14) | (1 << 16) | (1 << 18);
-+			break;
-+		case R300_NEW_WAIT_3D_3D_CLEAN:
-+			wait_until = (1 << 15) | (1 << 17) | (1 << 18);
-+			break;
-+		case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
-+			wait_until  = (1 << 14) | (1 << 16) | (1 << 18);
-+			wait_until |= (1 << 15) | (1 << 17) | (1 << 18);
-+			break;
-+		default:
-+			return;
-+		}
-+		BEGIN_BATCH_NO_AUTOSTATE(2);
-+		OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
-+		OUT_BATCH(wait_until);
-+		END_BATCH();
-+	}
- }
- 
- extern int r300EmitArrays(GLcontext * ctx);
- 
--#ifdef USER_BUFFERS
--void r300UseArrays(GLcontext * ctx);
--#endif
--
- extern void r300ReleaseArrays(GLcontext * ctx);
- extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
- extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
-diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
-index 4ef7f2b..8d030c6 100644
---- a/src/mesa/drivers/dri/r300/r300_fragprog.c
-+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
-@@ -163,6 +163,19 @@ static GLboolean transform_TEX(
- 		}
- 	}
- 
-+	if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
-+		int tmpreg = radeonFindFreeTemporary(t);
-+		tgt = radeonAppendInstructions(t->Program, 1);
-+		tgt->Opcode = OPCODE_MOV;
-+		tgt->DstReg.File = PROGRAM_TEMPORARY;
-+		tgt->DstReg.Index = tmpreg;
-+		tgt->SrcReg[0] = inst.SrcReg[0];
-+
-+		reset_srcreg(&inst.SrcReg[0]);
-+		inst.SrcReg[0].File = PROGRAM_TEMPORARY;
-+		inst.SrcReg[0].Index = tmpreg;
-+	}
-+	
- 	tgt = radeonAppendInstructions(t->Program, 1);
- 	_mesa_copy_instructions(tgt, &inst, 1);
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
-index ee85e22..90b85f0 100644
---- a/src/mesa/drivers/dri/r300/r300_ioctl.c
-+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
-@@ -46,8 +46,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/context.h"
- #include "swrast/swrast.h"
- 
-+#include "radeon_common.h"
-+#include "radeon_lock.h"
- #include "r300_context.h"
--#include "radeon_ioctl.h"
- #include "r300_ioctl.h"
- #include "r300_cmdbuf.h"
- #include "r300_state.h"
-@@ -55,71 +56,83 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_reg.h"
- #include "r300_emit.h"
- #include "r300_fragprog.h"
-+#include "r300_context.h"
- 
- #include "vblank.h"
- 
-+#define R200_3D_DRAW_IMMD_2      0xC0003500
-+
- #define CLEARBUFFER_COLOR	0x1
- #define CLEARBUFFER_DEPTH	0x2
- #define CLEARBUFFER_STENCIL	0x4
- 
--static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
-+static void r300ClearBuffer(r300ContextPtr r300, int flags,
-+			    struct radeon_renderbuffer *rrb,
-+			    struct radeon_renderbuffer *rrbd)
- {
-+	BATCH_LOCALS(&r300->radeon);
- 	GLcontext *ctx = r300->radeon.glCtx;
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
--	GLuint cboffset, cbpitch;
--	drm_r300_cmd_header_t *cmd2;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	GLuint cbpitch = 0;
- 	r300ContextPtr rmesa = r300;
- 
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n",
--			__FUNCTION__, buffer ? "back" : "front",
--			dPriv->x, dPriv->y, dPriv->w, dPriv->h);
--
--	if (buffer) {
--		cboffset = r300->radeon.radeonScreen->backOffset;
--		cbpitch = r300->radeon.radeonScreen->backPitch;
--	} else {
--		cboffset = r300->radeon.radeonScreen->frontOffset;
--		cbpitch = r300->radeon.radeonScreen->frontPitch;
-+		fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n",
-+			__FUNCTION__, rrb, dPriv->x, dPriv->y,
-+			dPriv->w, dPriv->h);
-+
-+	if (rrb) {
-+		cbpitch = (rrb->pitch / rrb->cpp);
-+		if (rrb->cpp == 4)
-+			cbpitch |= R300_COLOR_FORMAT_ARGB8888;
-+		else
-+			cbpitch |= R300_COLOR_FORMAT_RGB565;
-+
-+		if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
-+			cbpitch |= R300_COLOR_TILE_ENABLE;
-+        }
- 	}
- 
--	cboffset += r300->radeon.radeonScreen->fbLocation;
--
--	cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
--	end_3d(rmesa);
--
--	R300_STATECHANGE(r300, cb);
--	reg_start(R300_RB3D_COLOROFFSET0, 0);
--	e32(cboffset);
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		cbpitch |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		cbpitch |= R300_COLOR_TILE_ENABLE;
--
--	reg_start(R300_RB3D_COLORPITCH0, 0);
--	e32(cbpitch);
--
--	R300_STATECHANGE(r300, cmk);
--	reg_start(RB3D_COLOR_CHANNEL_MASK, 0);
-+	/* TODO in bufmgr */
-+	cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-+	end_3d(&rmesa->radeon);
- 
- 	if (flags & CLEARBUFFER_COLOR) {
--		e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
--		    (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
--		    (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
--		    (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
-+		assert(rrb != 0);
-+		BEGIN_BATCH_NO_AUTOSTATE(6);
-+		OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
-+		OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+		OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch);
-+		END_BATCH();
-+	}
-+#if 1
-+	if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) {
-+		assert(rrbd != 0);
-+		cbpitch = (rrbd->pitch / rrbd->cpp);
-+		if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
-+			cbpitch |= R300_DEPTHMACROTILE_ENABLE;
-+        }
-+		if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
-+            cbpitch |= R300_DEPTHMICROTILE_TILED;
-+        }
-+		BEGIN_BATCH_NO_AUTOSTATE(6);
-+		OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
-+		OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+		OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch);
-+		END_BATCH();
-+	}
-+#endif
-+	BEGIN_BATCH_NO_AUTOSTATE(6);
-+	OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1);
-+	if (flags & CLEARBUFFER_COLOR) {
-+		OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
- 	} else {
--		e32(0x0);
-+		OUT_BATCH(0);
- 	}
- 
--	R300_STATECHANGE(r300, zs);
--	reg_start(R300_ZB_CNTL, 2);
- 
- 	{
- 		uint32_t t1, t2;
-@@ -146,37 +159,55 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
- 			     R300_S_FRONT_ZFAIL_OP_SHIFT);
- 		}
- 
--		e32(t1);
--		e32(t2);
--		e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
--		    (ctx->Stencil.Clear & R300_STENCILREF_MASK));
-+		OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3);
-+		OUT_BATCH(t1);
-+		OUT_BATCH(t2);
-+		OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) <<
-+                   R300_STENCILWRITEMASK_SHIFT) |
-+			  (ctx->Stencil.Clear & R300_STENCILREF_MASK));
-+		END_BATCH();
- 	}
- 
--	cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__);
--	cmd2[0].packet3.cmd_type = R300_CMD_PACKET3;
--	cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR;
--	cmd2[1].u = r300PackFloat32(dPriv->w / 2.0);
--	cmd2[2].u = r300PackFloat32(dPriv->h / 2.0);
--	cmd2[3].u = r300PackFloat32(ctx->Depth.Clear);
--	cmd2[4].u = r300PackFloat32(1.0);
--	cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]);
--	cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]);
--	cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]);
--	cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
--
-+	if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+		BEGIN_BATCH_NO_AUTOSTATE(9);
-+		OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR));
-+		OUT_BATCH_FLOAT32(dPriv->w / 2.0);
-+		OUT_BATCH_FLOAT32(dPriv->h / 2.0);
-+		OUT_BATCH_FLOAT32(ctx->Depth.Clear);
-+		OUT_BATCH_FLOAT32(1.0);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
-+		END_BATCH();
-+	} else {
-+		OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
-+		OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
-+			  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
-+		OUT_BATCH_FLOAT32(dPriv->w / 2.0);
-+		OUT_BATCH_FLOAT32(dPriv->h / 2.0);
-+		OUT_BATCH_FLOAT32(ctx->Depth.Clear);
-+		OUT_BATCH_FLOAT32(1.0);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
-+		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
-+	}
-+	
- 	r300EmitCacheFlush(rmesa);
--	cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-+	cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-+
-+	R300_STATECHANGE(r300, cb);
-+	R300_STATECHANGE(r300, cmk);
-+	R300_STATECHANGE(r300, zs);
- }
- 
- static void r300EmitClearState(GLcontext * ctx)
- {
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
--	r300ContextPtr rmesa = r300;
-+	BATCH_LOCALS(&r300->radeon);
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
- 	int i;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
- 	int has_tcl = 1;
- 	int is_r500 = 0;
- 	GLuint vap_cntl;
-@@ -184,35 +215,37 @@ static void r300EmitClearState(GLcontext * ctx)
- 	if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- 		has_tcl = 0;
- 
--        if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
--                is_r500 = 1;
-+	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
-+		is_r500 = 1;
- 
--
--	/* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and
--	 * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are
--	 * quite complex; see the functions in r300_emit.c.
-+	/* State atom dirty tracking is a little subtle here.
-+	 *
-+	 * On the one hand, we need to make sure base state is emitted
-+	 * here if we start with an empty batch buffer, otherwise clear
-+	 * works incorrectly with multiple processes. Therefore, the first
-+	 * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE.
-+	 *
-+	 * On the other hand, implicit state emission clears the state atom
-+	 * dirty bits, so we have to call R300_STATECHANGE later than the
-+	 * first BEGIN_BATCH.
- 	 *
--	 * I believe it would be a good idea to extend the functions in
--	 * r300_emit.c so that they can be used to setup the default values for
--	 * these registers, as well as the actual values used for rendering.
-+	 * The final trickiness is that, because we change state, we need
-+	 * to ensure that any stored swtcl primitives are flushed properly
-+	 * before we start changing state. See the R300_NEWPRIM in r300Clear
-+	 * for this.
- 	 */
--	R300_STATECHANGE(r300, vir[0]);
--	reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0);
-+	BEGIN_BATCH(31);
-+	OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1);
- 	if (!has_tcl)
--	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
-+		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
- 		 ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
- 	else
--	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
-+		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
- 		 ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
- 
--	/* disable fog */
--	R300_STATECHANGE(r300, fogs);
--	reg_start(R300_FG_FOG_BLEND, 0);
--	e32(0x0);
--
--	R300_STATECHANGE(r300, vir[1]);
--	reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0);
--	e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
-+	OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
-+	OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
-+	   ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
-@@ -226,238 +259,276 @@ static void r300EmitClearState(GLcontext * ctx)
- 	      << R300_SWIZZLE1_SHIFT)));
- 
- 	/* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
--	R300_STATECHANGE(r300, vic);
--	reg_start(R300_VAP_VTX_STATE_CNTL, 1);
--	e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
--	e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
-+	OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2);
-+	OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
-+	OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
- 
--	R300_STATECHANGE(r300, vte);
- 	/* comes from fglrx startup of clear */
--	reg_start(R300_SE_VTE_CNTL, 1);
--	e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
--	    R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
--	    R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
--	    R300_VPORT_Z_OFFSET_ENA);
--	e32(0x8);
-+	OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
-+	OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
-+		  R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
-+		  R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
-+		  R300_VPORT_Z_OFFSET_ENA);
-+	OUT_BATCH(0x8);
- 
--	reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0);
--	e32(0xaaaaaaaa);
-+	OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
- 
--	R300_STATECHANGE(r300, vof);
--	reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
--	e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
--	    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
--	e32(0x0);		/* no textures */
-+	OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
-+	OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
-+		  R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
-+	OUT_BATCH(0); /* no textures */
- 
--	R300_STATECHANGE(r300, txe);
--	reg_start(R300_TX_ENABLE, 0);
--	e32(0x0);
-+	OUT_BATCH_REGVAL(R300_TX_ENABLE, 0);
- 
--	R300_STATECHANGE(r300, vpt);
--	reg_start(R300_SE_VPORT_XSCALE, 5);
--	efloat(1.0);
--	efloat(dPriv->x);
--	efloat(1.0);
--	efloat(dPriv->y);
--	efloat(1.0);
--	efloat(0.0);
-+	OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(dPriv->x);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(dPriv->y);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(0.0);
- 
--	R300_STATECHANGE(r300, at);
--	reg_start(R300_FG_ALPHA_FUNC, 0);
--	e32(0x0);
-+	OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
-+
-+	OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
-+	OUT_BATCH(0x0);
-+	OUT_BATCH(0x0);
-+	END_BATCH();
- 
-+	R300_STATECHANGE(r300, vir[0]);
-+	R300_STATECHANGE(r300, fogs);
-+	R300_STATECHANGE(r300, vir[1]);
-+	R300_STATECHANGE(r300, vic);
-+	R300_STATECHANGE(r300, vte);
-+	R300_STATECHANGE(r300, vof);
-+	R300_STATECHANGE(r300, txe);
-+	R300_STATECHANGE(r300, vpt);
-+	R300_STATECHANGE(r300, at);
- 	R300_STATECHANGE(r300, bld);
--	reg_start(R300_RB3D_CBLEND, 1);
--	e32(0x0);
--	e32(0x0);
-+	R300_STATECHANGE(r300, ps);
- 
- 	if (has_tcl) {
--	    R300_STATECHANGE(r300, vap_clip_cntl);
--	    reg_start(R300_VAP_CLIP_CNTL, 0);
--	    e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
-+		R300_STATECHANGE(r300, vap_clip_cntl);
-+
-+		BEGIN_BATCH_NO_AUTOSTATE(2);
-+		OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
-+		END_BATCH();
-         }
- 
--	R300_STATECHANGE(r300, ps);
--	reg_start(R300_GA_POINT_SIZE, 0);
--	e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
--	    ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
-+	BEGIN_BATCH_NO_AUTOSTATE(2);
-+	OUT_BATCH_REGVAL(R300_GA_POINT_SIZE,
-+		((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
-+		((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
-+	END_BATCH();
- 
- 	if (!is_r500) {
- 		R300_STATECHANGE(r300, ri);
--		reg_start(R300_RS_IP_0, 7);
--		for (i = 0; i < 8; ++i) {
--			e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
--		}
--
- 		R300_STATECHANGE(r300, rc);
--		/* The second constant is needed to get glxgears display anything .. */
--		reg_start(R300_RS_COUNT, 1);
--		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
--		e32(0x0);
--
- 		R300_STATECHANGE(r300, rr);
--		reg_start(R300_RS_INST_0, 0);
--		e32(R300_RS_INST_COL_CN_WRITE);
-+
-+		BEGIN_BATCH(14);
-+		OUT_BATCH_REGSEQ(R300_RS_IP_0, 8);
-+		for (i = 0; i < 8; ++i)
-+			OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
-+
-+		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
-+		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-+		OUT_BATCH(0x0);
-+
-+		OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
-+		END_BATCH();
- 	} else {
- 		R300_STATECHANGE(r300, ri);
--		reg_start(R500_RS_IP_0, 7);
-+		R300_STATECHANGE(r300, rc);
-+		R300_STATECHANGE(r300, rr);
-+
-+		BEGIN_BATCH(14);
-+		OUT_BATCH_REGSEQ(R500_RS_IP_0, 8);
- 		for (i = 0; i < 8; ++i) {
--			e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
--			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
--			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
--			    (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
-+			OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-+				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-+				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-+				  (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
- 		}
- 
--		R300_STATECHANGE(r300, rc);
--		/* The second constant is needed to get glxgears display anything .. */
--		reg_start(R300_RS_COUNT, 1);
--		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
--		e32(0x0);
--
--		R300_STATECHANGE(r300, rr);
--		reg_start(R500_RS_INST_0, 0);
--		e32(R500_RS_INST_COL_CN_WRITE);
-+		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
-+		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-+		OUT_BATCH(0x0);
- 
-+		OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
-+		END_BATCH();
- 	}
- 
- 	if (!is_r500) {
- 		R300_STATECHANGE(r300, fp);
--		reg_start(R300_US_CONFIG, 2);
--		e32(0x0);
--		e32(0x0);
--		e32(0x0);
--		reg_start(R300_US_CODE_ADDR_0, 3);
--		e32(0x0);
--		e32(0x0);
--		e32(0x0);
--		e32(R300_RGBA_OUT);
--
- 		R300_STATECHANGE(r300, fpi[0]);
- 		R300_STATECHANGE(r300, fpi[1]);
- 		R300_STATECHANGE(r300, fpi[2]);
- 		R300_STATECHANGE(r300, fpi[3]);
- 
--		reg_start(R300_US_ALU_RGB_INST_0, 0);
--		e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
--
--		reg_start(R300_US_ALU_RGB_ADDR_0, 0);
--		e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
--
--		reg_start(R300_US_ALU_ALPHA_INST_0, 0);
--		e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
--
--		reg_start(R300_US_ALU_ALPHA_ADDR_0, 0);
--		e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
-+		BEGIN_BATCH(17);
-+		OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(R300_RGBA_OUT);
-+
-+		OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0,
-+			FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
-+		OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0,
-+			FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
-+		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0,
-+			FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
-+		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0,
-+			FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
-+		END_BATCH();
- 	} else {
-- 		R300_STATECHANGE(r300, fp);
-- 		reg_start(R500_US_CONFIG, 1);
-- 		e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-- 		e32(0x0);
-- 		reg_start(R500_US_CODE_ADDR, 2);
-- 		e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
-- 		e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
-- 		e32(R500_US_CODE_OFFSET_ADDR(0));
-+		struct radeon_state_atom r500fp;
-+		uint32_t _cmd[10];
- 
-+		R300_STATECHANGE(r300, fp);
- 		R300_STATECHANGE(r300, r500fp);
--		r500fp_start_fragment(0, 6);
--
--		e32(R500_INST_TYPE_OUT |
--		    R500_INST_TEX_SEM_WAIT |
--		    R500_INST_LAST |
--		    R500_INST_RGB_OMASK_R |
--		    R500_INST_RGB_OMASK_G |
--		    R500_INST_RGB_OMASK_B |
--		    R500_INST_ALPHA_OMASK |
--		    R500_INST_RGB_CLAMP |
--		    R500_INST_ALPHA_CLAMP);
--
--		e32(R500_RGB_ADDR0(0) |
--		    R500_RGB_ADDR1(0) |
--		    R500_RGB_ADDR1_CONST |
--		    R500_RGB_ADDR2(0) |
--		    R500_RGB_ADDR2_CONST);
--
--		e32(R500_ALPHA_ADDR0(0) |
--		    R500_ALPHA_ADDR1(0) |
--		    R500_ALPHA_ADDR1_CONST |
--		    R500_ALPHA_ADDR2(0) |
--		    R500_ALPHA_ADDR2_CONST);
--
--		e32(R500_ALU_RGB_SEL_A_SRC0 |
--		    R500_ALU_RGB_R_SWIZ_A_R |
--		    R500_ALU_RGB_G_SWIZ_A_G |
--		    R500_ALU_RGB_B_SWIZ_A_B |
--		    R500_ALU_RGB_SEL_B_SRC0 |
--		    R500_ALU_RGB_R_SWIZ_B_R |
--		    R500_ALU_RGB_B_SWIZ_B_G |
--		    R500_ALU_RGB_G_SWIZ_B_B);
--
--		e32(R500_ALPHA_OP_CMP |
--		    R500_ALPHA_SWIZ_A_A |
--		    R500_ALPHA_SWIZ_B_A);
--
--		e32(R500_ALU_RGBA_OP_CMP |
--		    R500_ALU_RGBA_R_SWIZ_0 |
--		    R500_ALU_RGBA_G_SWIZ_0 |
--		    R500_ALU_RGBA_B_SWIZ_0 |
--		    R500_ALU_RGBA_A_SWIZ_0);
-+
-+		BEGIN_BATCH(7);
-+		OUT_BATCH_REGSEQ(R500_US_CONFIG, 2);
-+		OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
-+		OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
-+		OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
-+		OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0));
-+		END_BATCH();
-+
-+		r500fp.check = check_r500fp;
-+		r500fp.cmd = _cmd;
-+		r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0);
-+		r500fp.cmd[1] = R500_INST_TYPE_OUT |
-+			R500_INST_TEX_SEM_WAIT |
-+			R500_INST_LAST |
-+			R500_INST_RGB_OMASK_R |
-+			R500_INST_RGB_OMASK_G |
-+			R500_INST_RGB_OMASK_B |
-+			R500_INST_ALPHA_OMASK |
-+			R500_INST_RGB_CLAMP |
-+			R500_INST_ALPHA_CLAMP;
-+		r500fp.cmd[2] = R500_RGB_ADDR0(0) |
-+			R500_RGB_ADDR1(0) |
-+			R500_RGB_ADDR1_CONST |
-+			R500_RGB_ADDR2(0) |
-+			R500_RGB_ADDR2_CONST;
-+		r500fp.cmd[3] = R500_ALPHA_ADDR0(0) |
-+			R500_ALPHA_ADDR1(0) |
-+			R500_ALPHA_ADDR1_CONST |
-+			R500_ALPHA_ADDR2(0) |
-+			R500_ALPHA_ADDR2_CONST;
-+		r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 |
-+			R500_ALU_RGB_R_SWIZ_A_R |
-+			R500_ALU_RGB_G_SWIZ_A_G |
-+			R500_ALU_RGB_B_SWIZ_A_B |
-+			R500_ALU_RGB_SEL_B_SRC0 |
-+			R500_ALU_RGB_R_SWIZ_B_R |
-+			R500_ALU_RGB_B_SWIZ_B_G |
-+			R500_ALU_RGB_G_SWIZ_B_B;
-+		r500fp.cmd[5] = R500_ALPHA_OP_CMP |
-+			R500_ALPHA_SWIZ_A_A |
-+			R500_ALPHA_SWIZ_B_A;
-+		r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP |
-+			R500_ALU_RGBA_R_SWIZ_0 |
-+			R500_ALU_RGBA_G_SWIZ_0 |
-+			R500_ALU_RGBA_B_SWIZ_0 |
-+			R500_ALU_RGBA_A_SWIZ_0;
-+		
-+		r500fp.cmd[7] = 0;
-+		emit_r500fp(ctx, &r500fp);
- 	}
- 
--	reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0);
--	e32(0x00000000);
-+	BEGIN_BATCH(2);
-+	OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-+	END_BATCH();
-+
- 	if (has_tcl) {
--	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-+		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
- 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
- 			(12 << R300_VF_MAX_VTX_NUM_SHIFT));
--	    if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
--		vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
--	} else
--	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-+		if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
-+			vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
-+	} else {
-+		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
- 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
- 			(5 << R300_VF_MAX_VTX_NUM_SHIFT));
-+	}
- 
- 	if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
--	    vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
--	    vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
--	    vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
--	    vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
- 	else
--	    vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
-+
-+	R300_STATECHANGE(r300, vap_cntl);
- 
--	R300_STATECHANGE(rmesa, vap_cntl);
--	reg_start(R300_VAP_CNTL, 0);
--	e32(vap_cntl);
-+	BEGIN_BATCH(2);
-+	OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl);
-+	END_BATCH();
- 
- 	if (has_tcl) {
-+        struct radeon_state_atom vpu;
-+        uint32_t _cmd[10];
- 		R300_STATECHANGE(r300, pvs);
--		reg_start(R300_VAP_PVS_CODE_CNTL_0, 2);
--
--		e32((0 << R300_PVS_FIRST_INST_SHIFT) |
--		    (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
--		    (1 << R300_PVS_LAST_INST_SHIFT));
--		e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
--		    (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
--		e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
--
- 		R300_STATECHANGE(r300, vpi);
--		vsf_start_fragment(0x0, 8);
--
--		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
--		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(0x0);
- 
--		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
--		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(0x0);
-+		BEGIN_BATCH(4);
-+		OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
-+		OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
-+			  (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
-+			  (1 << R300_PVS_LAST_INST_SHIFT));
-+		OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
-+			  (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
-+		OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
-+		END_BATCH();
-+
-+		vpu.check = check_vpu;
-+		vpu.cmd = _cmd;
-+		vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2);
-+
-+		vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE,
-+                                         0, 0xf, PVS_DST_REG_OUT);
-+		vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
-+                                      PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
-+                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+		vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+		vpu.cmd[4] = 0x0;
-+
-+		vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf,
-+                                         PVS_DST_REG_OUT);
-+		vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X,
-+                                      PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
-+                                      PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT,
-+
-+                                      VSF_FLAG_NONE);
-+		vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+		vpu.cmd[8] = 0x0;
-+		emit_vpu(ctx, &vpu);
- 	}
- }
- 
-@@ -468,9 +539,11 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
- {
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
-+	GLframebuffer *fb = dPriv->driverPrivate;
-+	struct radeon_renderbuffer *rrb;
-+	struct radeon_renderbuffer *rrbd;
- 	int flags = 0;
- 	int bits = 0;
--	int swapped;
- 
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
- 		fprintf(stderr, "r300Clear\n");
-@@ -482,6 +555,12 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
- 			return;
- 	}
- 
-+	/* Flush swtcl vertices if necessary, because we will change hardware
-+	 * state during clear. See also the state-related comment in
-+	 * r300EmitClearState.
-+	 */
-+	R300_NEWPRIM(r300);
-+
- 	if (mask & BUFFER_BIT_FRONT_LEFT) {
- 		flags |= BUFFER_BIT_FRONT_LEFT;
- 		mask &= ~BUFFER_BIT_FRONT_LEFT;
-@@ -497,7 +576,7 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
- 		mask &= ~BUFFER_BIT_DEPTH;
- 	}
- 
--	if ((mask & BUFFER_BIT_STENCIL) && r300->state.stencil.hw_stencil) {
-+	if ((mask & BUFFER_BIT_STENCIL) && r300->radeon.state.stencil.hwBuffer) {
- 		bits |= CLEARBUFFER_STENCIL;
- 		mask &= ~BUFFER_BIT_STENCIL;
- 	}
-@@ -509,26 +588,28 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
- 		_swrast_Clear(ctx, mask);
- 	}
- 
--	swapped = r300->radeon.sarea->pfCurrentPage == 1;
--
- 	/* Make sure it fits there. */
--	r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__);
-+	rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__);
- 	if (flags || bits)
- 		r300EmitClearState(ctx);
-+	rrbd = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
- 
- 	if (flags & BUFFER_BIT_FRONT_LEFT) {
--		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped);
-+		rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
- 		bits = 0;
- 	}
- 
- 	if (flags & BUFFER_BIT_BACK_LEFT) {
--		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1);
-+		rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
- 		bits = 0;
- 	}
- 
- 	if (bits)
--		r300ClearBuffer(r300, bits, 0);
-+		r300ClearBuffer(r300, bits, NULL, rrbd);
- 
-+	COMMIT_BATCH();
- }
- 
- void r300Flush(GLcontext * ctx)
-@@ -538,302 +619,13 @@ void r300Flush(GLcontext * ctx)
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
- 		fprintf(stderr, "%s\n", __FUNCTION__);
- 
--	if (rmesa->dma.flush)
--		rmesa->dma.flush( rmesa );
--
--	if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--}
--
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--
--void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
--{
--	struct r300_dma_buffer *dmabuf;
--	size = MAX2(size, RADEON_BUFFER_SIZE * 16);
--
--	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
--		fprintf(stderr, "%s\n", __FUNCTION__);
--
--	if (rmesa->dma.flush) {
--		rmesa->dma.flush(rmesa);
--	}
--
--	if (rmesa->dma.current.buf) {
--#ifdef USER_BUFFERS
--		r300_mem_use(rmesa, rmesa->dma.current.buf->id);
--#endif
--		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
--	}
--	if (rmesa->dma.nr_released_bufs > 4)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
--	dmabuf->buf = (void *)1;	/* hack */
--	dmabuf->refcount = 1;
--
--	dmabuf->id = r300_mem_alloc(rmesa, 4, size);
--	if (dmabuf->id == 0) {
--		LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
--
--		r300FlushCmdBufLocked(rmesa, __FUNCTION__);
--		radeonWaitForIdleLocked(&rmesa->radeon);
--
--		dmabuf->id = r300_mem_alloc(rmesa, 4, size);
--
--		UNLOCK_HARDWARE(&rmesa->radeon);
--
--		if (dmabuf->id == 0) {
--			fprintf(stderr,
--				"Error: Could not get dma buffer... exiting\n");
--			_mesa_exit(-1);
--		}
--	}
--
--	rmesa->dma.current.buf = dmabuf;
--	rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id);
--	rmesa->dma.current.end = size;
--	rmesa->dma.current.start = 0;
--	rmesa->dma.current.ptr = 0;
--}
--
--void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--			  struct r300_dma_region *region, const char *caller)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
--
--	if (!region->buf)
--		return;
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (--region->buf->refcount == 0) {
--		r300_mem_free(rmesa, region->buf->id);
--		FREE(region->buf);
--		rmesa->dma.nr_released_bufs++;
--	}
--
--	region->buf = 0;
--	region->start = 0;
--}
--
--/* Allocates a region from rmesa->dma.current.  If there isn't enough
-- * space in current, grab a new buffer (and discard what was left of current)
-- */
--void r300AllocDmaRegion(r300ContextPtr rmesa,
--			struct r300_dma_region *region,
--			int bytes, int alignment)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (region->buf)
--		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
--
--	alignment--;
--	rmesa->dma.current.start = rmesa->dma.current.ptr =
--	    (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
--		r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7);
--
--	region->start = rmesa->dma.current.start;
--	region->ptr = rmesa->dma.current.start;
--	region->end = rmesa->dma.current.start + bytes;
--	region->address = rmesa->dma.current.address;
--	region->buf = rmesa->dma.current.buf;
--	region->buf->refcount++;
--
--	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
--	rmesa->dma.current.start =
--	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
--
--	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
--}
--
--#else
--static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
--{
--	struct r300_dma_buffer *dmabuf;
--	int fd = rmesa->radeon.dri.fd;
--	int index = 0;
--	int size = 0;
--	drmDMAReq dma;
--	int ret;
--
--	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
--		fprintf(stderr, "%s\n", __FUNCTION__);
--
--	if (rmesa->dma.flush) {
--		rmesa->dma.flush(rmesa);
-+	if (rmesa->radeon.dma.flush) {
-+		rmesa->radeon.dma.flush(ctx);
- 	}
--
--	if (rmesa->dma.current.buf)
--		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
--
--	if (rmesa->dma.nr_released_bufs > 4)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--	dma.context = rmesa->radeon.dri.hwContext;
--	dma.send_count = 0;
--	dma.send_list = NULL;
--	dma.send_sizes = NULL;
--	dma.flags = 0;
--	dma.request_count = 1;
--	dma.request_size = RADEON_BUFFER_SIZE;
--	dma.request_list = &index;
--	dma.request_sizes = &size;
--	dma.granted_count = 0;
--
--	LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
--
--	ret = drmDMA(fd, &dma);
--
--	if (ret != 0) {
--		/* Try to release some buffers and wait until we can't get any more */
--		if (rmesa->dma.nr_released_bufs) {
--			r300FlushCmdBufLocked(rmesa, __FUNCTION__);
--		}
--
--		if (RADEON_DEBUG & DEBUG_DMA)
--			fprintf(stderr, "Waiting for buffers\n");
--
--		radeonWaitForIdleLocked(&rmesa->radeon);
--		ret = drmDMA(fd, &dma);
--
--		if (ret != 0) {
--			UNLOCK_HARDWARE(&rmesa->radeon);
--			fprintf(stderr,
--				"Error: Could not get dma buffer... exiting\n");
--			_mesa_exit(-1);
--		}
--	}
--
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
--	if (RADEON_DEBUG & DEBUG_DMA)
--		fprintf(stderr, "Allocated buffer %d\n", index);
--
--	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
--	dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index];
--	dmabuf->refcount = 1;
--
--	rmesa->dma.current.buf = dmabuf;
--	rmesa->dma.current.address = dmabuf->buf->address;
--	rmesa->dma.current.end = dmabuf->buf->total;
--	rmesa->dma.current.start = 0;
--	rmesa->dma.current.ptr = 0;
--}
--
--void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--			  struct r300_dma_region *region, const char *caller)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
--
--	if (!region->buf)
--		return;
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (--region->buf->refcount == 0) {
--		drm_radeon_cmd_header_t *cmd;
--
--		if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
--			fprintf(stderr, "%s -- DISCARD BUF %d\n",
--				__FUNCTION__, region->buf->buf->idx);
--		cmd =
--		    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
--								sizeof
--								(*cmd) / 4,
--								__FUNCTION__);
--		cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
--		cmd->dma.buf_idx = region->buf->buf->idx;
--
--		FREE(region->buf);
--		rmesa->dma.nr_released_bufs++;
-+	
-+	if (rmesa->radeon.cmdbuf.cs->cdw) {
-+		rcommonFlushCmdBuf(&rmesa->radeon, __FUNCTION__);
- 	}
--
--	region->buf = 0;
--	region->start = 0;
--}
--
--/* Allocates a region from rmesa->dma.current.  If there isn't enough
-- * space in current, grab a new buffer (and discard what was left of current)
-- */
--void r300AllocDmaRegion(r300ContextPtr rmesa,
--			struct r300_dma_region *region,
--			int bytes, int alignment)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (region->buf)
--		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
--
--	alignment--;
--	rmesa->dma.current.start = rmesa->dma.current.ptr =
--	    (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
--		r300RefillCurrentDmaRegion(rmesa);
--
--	region->start = rmesa->dma.current.start;
--	region->ptr = rmesa->dma.current.start;
--	region->end = rmesa->dma.current.start + bytes;
--	region->address = rmesa->dma.current.address;
--	region->buf = rmesa->dma.current.buf;
--	region->buf->refcount++;
--
--	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
--	rmesa->dma.current.start =
--	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
--
--	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
--}
--
--#endif
--
--GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer,
--			   GLint size)
--{
--	int offset =
--	    (char *)pointer -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--	int valid = (size >= 0 && offset >= 0
--		     && offset + size <
--		     rmesa->radeon.radeonScreen->gartTextures.size);
--
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer,
--			valid);
--
--	return valid;
--}
--
--GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer)
--{
--	int offset =
--	    (char *)pointer -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--
--	//fprintf(stderr, "offset=%08x\n", offset);
--
--	if (offset < 0
--	    || offset > rmesa->radeon.radeonScreen->gartTextures.size)
--		return ~0;
--	else
--		return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
- }
- 
- void r300InitIoctlFuncs(struct dd_function_table *functions)
-diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h
-index e1143fb..5f00264 100644
---- a/src/mesa/drivers/dri/r300/r300_ioctl.h
-+++ b/src/mesa/drivers/dri/r300/r300_ioctl.h
-@@ -39,20 +39,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_context.h"
- #include "radeon_drm.h"
- 
--extern GLboolean r300IsGartMemory(r300ContextPtr rmesa,
--				  const GLvoid * pointer, GLint size);
--
--extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa,
--					const GLvoid * pointer);
--
- extern void r300Flush(GLcontext * ctx);
- 
--extern void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--				 struct r300_dma_region *region,
--				 const char *caller);
- extern void r300AllocDmaRegion(r300ContextPtr rmesa,
--			       struct r300_dma_region *region, int bytes,
--			       int alignment);
-+			       struct radeon_bo **pbo, int *poffset,
-+			       int bytes, int alignment);
- 
- extern void r300InitIoctlFuncs(struct dd_function_table *functions);
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c
-deleted file mode 100644
-index f8f9d4f..0000000
---- a/src/mesa/drivers/dri/r300/r300_mem.c
-+++ /dev/null
-@@ -1,385 +0,0 @@
--/*
-- * Copyright (C) 2005 Aapo Tahkola.
-- *
-- * All Rights Reserved.
-- *
-- * Permission is hereby granted, free of charge, to any person obtaining
-- * a copy of this software and associated documentation files (the
-- * "Software"), to deal in the Software without restriction, including
-- * without limitation the rights to use, copy, modify, merge, publish,
-- * distribute, sublicense, and/or sell copies of the Software, and to
-- * permit persons to whom the Software is furnished to do so, subject to
-- * the following conditions:
-- *
-- * The above copyright notice and this permission notice (including the
-- * next paragraph) shall be included in all copies or substantial
-- * portions of the Software.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-- *
-- */
--
--/**
-- * \file
-- *
-- * \author Aapo Tahkola <aet@rasterburn.org>
-- */
--
--#include <unistd.h>
--
--#include "r300_context.h"
--#include "r300_cmdbuf.h"
--#include "r300_ioctl.h"
--#include "r300_mem.h"
--#include "radeon_ioctl.h"
--
--#ifdef USER_BUFFERS
--
--static void resize_u_list(r300ContextPtr rmesa)
--{
--	void *temp;
--	int nsize;
--
--	temp = rmesa->rmm->u_list;
--	nsize = rmesa->rmm->u_size * 2;
--
--	rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list));
--	_mesa_memset(rmesa->rmm->u_list, 0,
--		     nsize * sizeof(*rmesa->rmm->u_list));
--
--	if (temp) {
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--		_mesa_memcpy(rmesa->rmm->u_list, temp,
--			     rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list));
--		_mesa_free(temp);
--	}
--
--	rmesa->rmm->u_size = nsize;
--}
--
--void r300_mem_init(r300ContextPtr rmesa)
--{
--	rmesa->rmm = malloc(sizeof(struct r300_memory_manager));
--	memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager));
--
--	rmesa->rmm->u_size = 128;
--	resize_u_list(rmesa);
--}
--
--void r300_mem_destroy(r300ContextPtr rmesa)
--{
--	_mesa_free(rmesa->rmm->u_list);
--	rmesa->rmm->u_list = NULL;
--
--	_mesa_free(rmesa->rmm);
--	rmesa->rmm = NULL;
--}
--
--void *r300_mem_ptr(r300ContextPtr rmesa, int id)
--{
--	assert(id <= rmesa->rmm->u_last);
--	return rmesa->rmm->u_list[id].ptr;
--}
--
--int r300_mem_find(r300ContextPtr rmesa, void *ptr)
--{
--	int i;
--
--	for (i = 1; i < rmesa->rmm->u_size + 1; i++)
--		if (rmesa->rmm->u_list[i].ptr &&
--		    ptr >= rmesa->rmm->u_list[i].ptr &&
--		    ptr <
--		    rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size)
--			break;
--
--	if (i < rmesa->rmm->u_size + 1)
--		return i;
--
--	fprintf(stderr, "%p failed\n", ptr);
--	return 0;
--}
--
--//#define MM_DEBUG
--int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size)
--{
--	drm_radeon_mem_alloc_t alloc;
--	int offset = 0, ret;
--	int i, free = -1;
--	int done_age;
--	drm_radeon_mem_free_t memfree;
--	int tries = 0;
--	static int bytes_wasted = 0, allocated = 0;
--
--	if (size < 4096)
--		bytes_wasted += 4096 - size;
--
--	allocated += size;
--
--#if 0
--	static int t = 0;
--	if (t != time(NULL)) {
--		t = time(NULL);
--		fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n",
--			rmesa->rmm->u_last, bytes_wasted / 1024,
--			allocated / 1024);
--	}
--#endif
--
--	memfree.region = RADEON_MEM_REGION_GART;
--
--      again:
--
--	done_age = radeonGetAge((radeonContextPtr) rmesa);
--
--	if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size)
--		resize_u_list(rmesa);
--
--	for (i = rmesa->rmm->u_last + 1; i > 0; i--) {
--		if (rmesa->rmm->u_list[i].ptr == NULL) {
--			free = i;
--			continue;
--		}
--
--		if (rmesa->rmm->u_list[i].h_pending == 0 &&
--		    rmesa->rmm->u_list[i].pending
--		    && rmesa->rmm->u_list[i].age <= done_age) {
--			memfree.region_offset =
--			    (char *)rmesa->rmm->u_list[i].ptr -
--			    (char *)rmesa->radeon.radeonScreen->gartTextures.
--			    map;
--
--			ret =
--			    drmCommandWrite(rmesa->radeon.radeonScreen->
--					    driScreen->fd, DRM_RADEON_FREE,
--					    &memfree, sizeof(memfree));
--
--			if (ret) {
--				fprintf(stderr, "Failed to free at %p\n",
--					rmesa->rmm->u_list[i].ptr);
--				fprintf(stderr, "ret = %s\n", strerror(-ret));
--				exit(1);
--			} else {
--#ifdef MM_DEBUG
--				fprintf(stderr, "really freed %d at age %x\n",
--					i,
--					radeonGetAge((radeonContextPtr) rmesa));
--#endif
--				if (i == rmesa->rmm->u_last)
--					rmesa->rmm->u_last--;
--
--				if (rmesa->rmm->u_list[i].size < 4096)
--					bytes_wasted -=
--					    4096 - rmesa->rmm->u_list[i].size;
--
--				allocated -= rmesa->rmm->u_list[i].size;
--				rmesa->rmm->u_list[i].pending = 0;
--				rmesa->rmm->u_list[i].ptr = NULL;
--				free = i;
--			}
--		}
--	}
--	rmesa->rmm->u_head = i;
--
--	if (free == -1) {
--		WARN_ONCE("Ran out of slots!\n");
--		//usleep(100);
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--		tries++;
--		if (tries > 100) {
--			WARN_ONCE("Ran out of slots!\n");
--			exit(1);
--		}
--		goto again;
--	}
--
--	alloc.region = RADEON_MEM_REGION_GART;
--	alloc.alignment = alignment;
--	alloc.size = size;
--	alloc.region_offset = &offset;
--
--	ret =
--	    drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc,
--				sizeof(alloc));
--	if (ret) {
--#if 0
--		WARN_ONCE("Ran out of mem!\n");
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--		//usleep(100);
--		tries2++;
--		tries = 0;
--		if (tries2 > 100) {
--			WARN_ONCE("Ran out of GART memory!\n");
--			exit(1);
--		}
--		goto again;
--#else
--		WARN_ONCE
--		    ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n",
--		     size);
--		return 0;
--#endif
--	}
--
--	i = free;
--
--	if (i > rmesa->rmm->u_last)
--		rmesa->rmm->u_last = i;
--
--	rmesa->rmm->u_list[i].ptr =
--	    ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset;
--	rmesa->rmm->u_list[i].size = size;
--	rmesa->rmm->u_list[i].age = 0;
--	//fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i);
--
--#ifdef MM_DEBUG
--	fprintf(stderr, "allocated %d at age %x\n", i,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--
--	return i;
--}
--
--void r300_mem_use(r300ContextPtr rmesa, int id)
--{
--	uint64_t ull;
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--	drm_r300_cmd_header_t *cmd;
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (id == 0)
--		return;
--
--	cmd =
--	    (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa,
--						      2 + sizeof(ull) / 4,
--						      __FUNCTION__);
--	cmd[0].scratch.cmd_type = R300_CMD_SCRATCH;
--	cmd[0].scratch.reg = R300_MEM_SCRATCH;
--	cmd[0].scratch.n_bufs = 1;
--	cmd[0].scratch.flags = 0;
--	cmd++;
--
--	ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age;
--	_mesa_memcpy(cmd, &ull, sizeof(ull));
--	cmd += sizeof(ull) / 4;
--
--	cmd[0].u = /*id */ 0;
--
--	LOCK_HARDWARE(&rmesa->radeon);	/* Protect from DRM. */
--	rmesa->rmm->u_list[id].h_pending++;
--	UNLOCK_HARDWARE(&rmesa->radeon);
--}
--
--unsigned long r300_mem_offset(r300ContextPtr rmesa, int id)
--{
--	unsigned long offset;
--
--	assert(id <= rmesa->rmm->u_last);
--
--	offset = (char *)rmesa->rmm->u_list[id].ptr -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--	offset += rmesa->radeon.radeonScreen->gart_texture_offset;
--
--	return offset;
--}
--
--void *r300_mem_map(r300ContextPtr rmesa, int id, int access)
--{
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--	void *ptr;
--	int tries = 0;
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (access == R300_MEM_R) {
--
--		if (rmesa->rmm->u_list[id].mapped == 1)
--			WARN_ONCE("buffer %d already mapped\n", id);
--
--		rmesa->rmm->u_list[id].mapped = 1;
--		ptr = r300_mem_ptr(rmesa, id);
--
--		return ptr;
--	}
--
--	if (rmesa->rmm->u_list[id].h_pending)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--	if (rmesa->rmm->u_list[id].h_pending) {
--		return NULL;
--	}
--
--	while (rmesa->rmm->u_list[id].age >
--	       radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000)
--		usleep(10);
--
--	if (tries >= 1000) {
--		fprintf(stderr, "Idling failed (%x vs %x)\n",
--			rmesa->rmm->u_list[id].age,
--			radeonGetAge((radeonContextPtr) rmesa));
--		return NULL;
--	}
--
--	if (rmesa->rmm->u_list[id].mapped == 1)
--		WARN_ONCE("buffer %d already mapped\n", id);
--
--	rmesa->rmm->u_list[id].mapped = 1;
--	ptr = r300_mem_ptr(rmesa, id);
--
--	return ptr;
--}
--
--void r300_mem_unmap(r300ContextPtr rmesa, int id)
--{
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (rmesa->rmm->u_list[id].mapped == 0)
--		WARN_ONCE("buffer %d not mapped\n", id);
--
--	rmesa->rmm->u_list[id].mapped = 0;
--}
--
--void r300_mem_free(r300ContextPtr rmesa, int id)
--{
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (id == 0)
--		return;
--
--	if (rmesa->rmm->u_list[id].ptr == NULL) {
--		WARN_ONCE("Not allocated!\n");
--		return;
--	}
--
--	if (rmesa->rmm->u_list[id].pending) {
--		WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr);
--		return;
--	}
--
--	rmesa->rmm->u_list[id].pending = 1;
--}
--#endif
-diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h
-deleted file mode 100644
-index 625a7f6..0000000
---- a/src/mesa/drivers/dri/r300/r300_mem.h
-+++ /dev/null
-@@ -1,37 +0,0 @@
--#ifndef __R300_MEM_H__
--#define __R300_MEM_H__
--
--//#define R300_MEM_PDL 0
--#define R300_MEM_UL 1
--
--#define R300_MEM_R 1
--#define R300_MEM_W 2
--#define R300_MEM_RW (R300_MEM_R | R300_MEM_W)
--
--#define R300_MEM_SCRATCH 2
--
--struct r300_memory_manager {
--	struct {
--		void *ptr;
--		uint32_t size;
--		uint32_t age;
--		uint32_t h_pending;
--		int pending;
--		int mapped;
--	} *u_list;
--	int u_head, u_size, u_last;
--
--};
--
--extern void r300_mem_init(r300ContextPtr rmesa);
--extern void r300_mem_destroy(r300ContextPtr rmesa);
--extern void *r300_mem_ptr(r300ContextPtr rmesa, int id);
--extern int r300_mem_find(r300ContextPtr rmesa, void *ptr);
--extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size);
--extern void r300_mem_use(r300ContextPtr rmesa, int id);
--extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id);
--extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access);
--extern void r300_mem_unmap(r300ContextPtr rmesa, int id);
--extern void r300_mem_free(r300ContextPtr rmesa, int id);
--
--#endif
-diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
-index 872a33e..5f344be 100644
---- a/src/mesa/drivers/dri/r300/r300_reg.h
-+++ b/src/mesa/drivers/dri/r300/r300_reg.h
-@@ -1525,6 +1525,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
- #	define R500_SEL_FILTER4_TC3		 (3 << 18)
- 
- #define R300_TX_OFFSET_0                    0x4540
-+#define R300_TX_OFFSET_1                    0x4544
-+#define R300_TX_OFFSET_2                    0x4548
-+#define R300_TX_OFFSET_3                    0x454C
-+#define R300_TX_OFFSET_4                    0x4550
-+#define R300_TX_OFFSET_5                    0x4554
-+#define R300_TX_OFFSET_6                    0x4558
-+#define R300_TX_OFFSET_7                    0x455C
- 	/* BEGIN: Guess from R200 */
- #       define R300_TXO_ENDIAN_NO_SWAP           (0 << 0)
- #       define R300_TXO_ENDIAN_BYTE_SWAP         (1 << 0)
-diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
-index 16ce4a1..f46477f 100644
---- a/src/mesa/drivers/dri/r300/r300_render.c
-+++ b/src/mesa/drivers/dri/r300/r300_render.c
-@@ -66,8 +66,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "tnl/t_vp_build.h"
- #include "radeon_reg.h"
- #include "radeon_macros.h"
--#include "radeon_ioctl.h"
--#include "radeon_state.h"
- #include "r300_context.h"
- #include "r300_ioctl.h"
- #include "r300_state.h"
-@@ -175,85 +173,164 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
- static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct r300_dma_region *rvb = &rmesa->state.elt_dma;
- 	void *out;
- 
--	if (r300IsGartMemory(rmesa, elts, n_elts * 4)) {
--		rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
--		rvb->start = ((char *)elts) - rvb->address;
--		rvb->aos_offset =
--		    rmesa->radeon.radeonScreen->gart_texture_offset +
--		    rvb->start;
--		return;
--	} else if (r300IsGartMemory(rmesa, elts, 1)) {
--		WARN_ONCE("Pointer not within GART memory!\n");
--		_mesa_exit(-1);
--	}
--
--	r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4);
--	rvb->aos_offset = GET_START(rvb);
--
--	out = rvb->address + rvb->start;
-+	radeonAllocDmaRegion(&rmesa->radeon, &rmesa->state.elt_dma_bo,
-+			     &rmesa->state.elt_dma_offset, n_elts * 4, 4);
-+	radeon_bo_map(rmesa->state.elt_dma_bo, 1);
-+	out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
- 	memcpy(out, elts, n_elts * 4);
-+	radeon_bo_unmap(rmesa->state.elt_dma_bo);
- }
- 
--static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
--		       int vertex_count, int type)
-+static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
--
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
--
--	start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2);
--	e32(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
--	    (R300_VAP_PORT_IDX0 >> 2));
--	e32(addr);
--	e32(vertex_count);
-+	BATCH_LOCALS(&rmesa->radeon);
-+
-+	if (vertex_count > 0) {
-+		BEGIN_BATCH(10);
-+		OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
-+		OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
-+			  ((vertex_count + 0) << 16) |
-+			  type |
-+			  R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
-+		
-+		if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+			OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
-+			OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
-+	    			 (R300_VAP_PORT_IDX0 >> 2));
-+			OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
-+					rmesa->state.elt_dma_bo,
-+					rmesa->state.elt_dma_offset,
-+					RADEON_GEM_DOMAIN_GTT, 0, 0);
-+			OUT_BATCH(vertex_count);
-+		} else {
-+			OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
-+			OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
-+	    			 (R300_VAP_PORT_IDX0 >> 2));
-+			OUT_BATCH(rmesa->state.elt_dma_offset);
-+			OUT_BATCH(vertex_count);
-+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+					      rmesa->state.elt_dma_bo,
-+					      RADEON_GEM_DOMAIN_GTT, 0, 0);
-+		}
-+		END_BATCH();
-+	}
- }
- 
- static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
- {
-+	BATCH_LOCALS(&rmesa->radeon);
-+	uint32_t voffset;
- 	int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
- 	int i;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
--
-+	
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
- 			offset);
- 
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1);
--	e32(nr);
--
--	for (i = 0; i + 1 < nr; i += 2) {
--		e32((rmesa->state.aos[i].aos_size << 0) |
--		    (rmesa->state.aos[i].aos_stride << 8) |
--		    (rmesa->state.aos[i + 1].aos_size << 16) |
--		    (rmesa->state.aos[i + 1].aos_stride << 24));
-+    
-+	if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+		BEGIN_BATCH(sz+2+(nr * 2));
-+		OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
-+		OUT_BATCH(nr);
-+
-+		for (i = 0; i + 1 < nr; i += 2) {
-+			OUT_BATCH((rmesa->state.aos[i].components << 0) |
-+				  (rmesa->state.aos[i].stride << 8) |
-+				  (rmesa->state.aos[i + 1].components << 16) |
-+				  (rmesa->state.aos[i + 1].stride << 24));
-+			
-+			voffset =  rmesa->state.aos[i + 0].offset +
-+				offset * 4 * rmesa->state.aos[i + 0].stride;
-+			OUT_BATCH_RELOC(voffset,
-+					rmesa->state.aos[i].bo,
-+					voffset,
-+					RADEON_GEM_DOMAIN_GTT,
-+					0, 0);
-+			voffset =  rmesa->state.aos[i + 1].offset +
-+			  offset * 4 * rmesa->state.aos[i + 1].stride;
-+			OUT_BATCH_RELOC(voffset,
-+					rmesa->state.aos[i+1].bo,
-+					voffset,
-+					RADEON_GEM_DOMAIN_GTT,
-+					0, 0);
-+		}
-+		
-+		if (nr & 1) {
-+			OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
-+				  (rmesa->state.aos[nr - 1].stride << 8));
-+			voffset =  rmesa->state.aos[nr - 1].offset +
-+				offset * 4 * rmesa->state.aos[nr - 1].stride;
-+			OUT_BATCH_RELOC(voffset,
-+					rmesa->state.aos[nr - 1].bo,
-+					voffset,
-+					RADEON_GEM_DOMAIN_GTT,
-+					0, 0);
-+		}
-+		END_BATCH();
-+	} else {
- 
--		e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride);
--		e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride);
-+		BEGIN_BATCH(sz+2+(nr * 2));
-+		OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
-+		OUT_BATCH(nr);
-+
-+		for (i = 0; i + 1 < nr; i += 2) {
-+			OUT_BATCH((rmesa->state.aos[i].components << 0) |
-+				  (rmesa->state.aos[i].stride << 8) |
-+				  (rmesa->state.aos[i + 1].components << 16) |
-+				  (rmesa->state.aos[i + 1].stride << 24));
-+			
-+			voffset =  rmesa->state.aos[i + 0].offset +
-+				offset * 4 * rmesa->state.aos[i + 0].stride;
-+			OUT_BATCH(voffset);
-+			voffset =  rmesa->state.aos[i + 1].offset +
-+				offset * 4 * rmesa->state.aos[i + 1].stride;
-+			OUT_BATCH(voffset);
-+		}
-+		
-+		if (nr & 1) {
-+			OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
-+			  (rmesa->state.aos[nr - 1].stride << 8));
-+			voffset =  rmesa->state.aos[nr - 1].offset +
-+				offset * 4 * rmesa->state.aos[nr - 1].stride;
-+			OUT_BATCH(voffset);
-+		}
-+		for (i = 0; i + 1 < nr; i += 2) {
-+			voffset =  rmesa->state.aos[i + 0].offset +
-+				offset * 4 * rmesa->state.aos[i + 0].stride;
-+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+					      rmesa->state.aos[i+0].bo,
-+					      RADEON_GEM_DOMAIN_GTT,
-+					      0, 0);
-+			voffset =  rmesa->state.aos[i + 1].offset +
-+				offset * 4 * rmesa->state.aos[i + 1].stride;
-+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+					      rmesa->state.aos[i+1].bo,
-+					      RADEON_GEM_DOMAIN_GTT,
-+					      0, 0);
-+		}
-+		if (nr & 1) {
-+			voffset =  rmesa->state.aos[nr - 1].offset +
-+				offset * 4 * rmesa->state.aos[nr - 1].stride;
-+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+					      rmesa->state.aos[nr-1].bo,
-+					      RADEON_GEM_DOMAIN_GTT,
-+					      0, 0);
-+		}
-+		END_BATCH();
- 	}
- 
--	if (nr & 1) {
--		e32((rmesa->state.aos[nr - 1].aos_size << 0) |
--		    (rmesa->state.aos[nr - 1].aos_stride << 8));
--		e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
--	}
- }
- 
- static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(&rmesa->radeon);
- 
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
-+	BEGIN_BATCH(3);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
-+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
-+	END_BATCH();
- }
- 
- static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
-@@ -269,6 +346,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
- 	if (type < 0 || num_verts <= 0)
- 		return;
- 
-+	/* Make space for at least 64 dwords.
-+	 * This is supposed to ensure that we can get all rendering
-+	 * commands into a single command buffer.
-+	 */
-+	rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
-+
- 	if (vb->Elts) {
- 		if (num_verts > 65535) {
- 			/* not implemented yet */
-@@ -288,11 +371,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
- 		 */
- 		r300EmitElts(ctx, vb->Elts, num_verts);
- 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
--		r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type);
-+		r300FireEB(rmesa, num_verts, type);
- 	} else {
- 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
- 		r300FireAOS(rmesa, num_verts, type);
- 	}
-+	COMMIT_BATCH();
- }
- 
- static GLboolean r300RunRender(GLcontext * ctx,
-@@ -303,7 +387,6 @@ static GLboolean r300RunRender(GLcontext * ctx,
- 	TNLcontext *tnl = TNL_CONTEXT(ctx);
- 	struct vertex_buffer *vb = &tnl->vb;
- 
--
- 	if (RADEON_DEBUG & DEBUG_PRIMS)
- 		fprintf(stderr, "%s\n", __FUNCTION__);
- 
-@@ -314,7 +397,7 @@ static GLboolean r300RunRender(GLcontext * ctx,
- 	r300UpdateShaderStates(rmesa);
- 
- 	r300EmitCacheFlush(rmesa);
--	r300EmitState(rmesa);
-+	radeonEmitState(&rmesa->radeon);
- 
- 	for (i = 0; i < vb->PrimitiveCount; i++) {
- 		GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
-@@ -325,10 +408,6 @@ static GLboolean r300RunRender(GLcontext * ctx,
- 
- 	r300EmitCacheFlush(rmesa);
- 
--#ifdef USER_BUFFERS
--	r300UseArrays(ctx);
--#endif
--
- 	r300ReleaseArrays(ctx);
- 
- 	return GL_FALSE;
-@@ -432,6 +511,9 @@ static GLboolean r300RunTCLRender(GLcontext * ctx,
- 		return GL_TRUE;
- 	}
- 
-+	if (!r300ValidateBuffers(ctx))
-+	    return GL_TRUE;
-+	
- 	r300UpdateShaders(rmesa);
- 
- 	vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
-diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
-index c192fec..93ef06f 100644
---- a/src/mesa/drivers/dri/r300/r300_state.c
-+++ b/src/mesa/drivers/dri/r300/r300_state.c
-@@ -53,8 +53,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "vbo/vbo.h"
- #include "tnl/tnl.h"
- 
--#include "radeon_ioctl.h"
--#include "radeon_state.h"
- #include "r300_context.h"
- #include "r300_ioctl.h"
- #include "r300_state.h"
-@@ -590,7 +588,7 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state)
- {
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
- 
--	if (r300->state.stencil.hw_stencil) {
-+	if (r300->radeon.state.stencil.hwBuffer) {
- 		R300_STATECHANGE(r300, zs);
- 		if (state) {
- 			r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
-@@ -1068,10 +1066,10 @@ static void r300UpdateWindow(GLcontext * ctx)
- 	GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
- 	GLfloat sy = -v[MAT_SY];
- 	GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y;
--	GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
--	GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
-+	GLfloat sz = v[MAT_SZ] * rmesa->radeon.state.depth.scale;
-+	GLfloat tz = v[MAT_TZ] * rmesa->radeon.state.depth.scale;
- 
--	R300_FIREVERTICES(rmesa);
-+	radeon_firevertices(&rmesa->radeon);
- 	R300_STATECHANGE(rmesa, vpt);
- 
- 	rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx);
-@@ -1085,10 +1083,19 @@ static void r300UpdateWindow(GLcontext * ctx)
- static void r300Viewport(GLcontext * ctx, GLint x, GLint y,
- 			 GLsizei width, GLsizei height)
- {
-+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+    __DRIcontext *driContext = rmesa->radeon.dri.context;
- 	/* Don't pipeline viewport changes, conflict with window offset
- 	 * setting below.  Could apply deltas to rescue pipelined viewport
- 	 * values, or keep the originals hanging around.
- 	 */
-+    if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled) {
-+        radeon_update_renderbuffers(driContext, driContext->driDrawablePriv);
-+        if (driContext->driDrawablePriv != driContext->driReadablePriv) {
-+            radeon_update_renderbuffers(driContext,
-+                                        driContext->driReadablePriv);
-+        }
-+    }
- 	r300UpdateWindow(ctx);
- }
- 
-@@ -1129,55 +1136,25 @@ void r300UpdateViewportOffset(GLcontext * ctx)
- void r300UpdateDrawBuffer(GLcontext * ctx)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	r300ContextPtr r300 = rmesa;
- 	struct gl_framebuffer *fb = ctx->DrawBuffer;
--	driRenderbuffer *drb;
-+	struct radeon_renderbuffer *rrb;
- 
- 	if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
- 		/* draw to front */
--		drb =
--		    (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].
--		    Renderbuffer;
-+		rrb =
-+		    (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
- 	} else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
- 		/* draw to back */
--		drb =
--		    (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].
--		    Renderbuffer;
-+		rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
- 	} else {
- 		/* drawing to multiple buffers, or none */
- 		return;
- 	}
- 
--	assert(drb);
--	assert(drb->flippedPitch);
-+	assert(rrb);
-+	assert(rrb->pitch);
- 
- 	R300_STATECHANGE(rmesa, cb);
--
--	r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset +	//r300->radeon.state.color.drawOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch;	//r300->radeon.state.color.drawPitch;
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
--#if 0
--	R200_STATECHANGE(rmesa, ctx);
--
--	/* Note: we used the (possibly) page-flipped values */
--	rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
--	    = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
--	       & R200_COLOROFFSET_MASK);
--	rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
--
--	if (rmesa->sarea->tiling_enabled) {
--		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
--		    R200_COLOR_TILE_ENABLE;
--	}
--#endif
- }
- 
- static void
-@@ -1397,7 +1374,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
- 	}
- 
- 	r300->hw.fpt.cmd[R300_FPT_CMD_0] =
--		cmdpacket0(R300_US_TEX_INST_0, code->tex.length);
-+		cmdpacket0(r300->radeon.radeonScreen,
-+                   R300_US_TEX_INST_0, code->tex.length);
- }
- 
- static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
-@@ -1448,7 +1426,7 @@ static GLuint translate_lod_bias(GLfloat bias)
- static void r300SetupTextures(GLcontext * ctx)
- {
- 	int i, mtu;
--	struct r300_tex_obj *t;
-+	struct radeon_tex_obj *t;
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
- 	int hw_tmu = 0;
- 	int last_hw_tmu = -1;	/* -1 translates into no setup costs for fields */
-@@ -1482,21 +1460,16 @@ static void r300SetupTextures(GLcontext * ctx)
- 	/* We cannot let disabled tmu offsets pass DRM */
- 	for (i = 0; i < mtu; i++) {
- 		if (ctx->Texture.Unit[i]._ReallyEnabled) {
--
--#if 0				/* Enables old behaviour */
--			hw_tmu = i;
--#endif
- 			tmu_mappings[i] = hw_tmu;
- 
--			t = r300->state.texture.unit[i].texobj;
--			/* XXX questionable fix for bug 9170: */
-+			t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
- 			if (!t)
- 				continue;
- 
--			if ((t->format & 0xffffff00) == 0xffffff00) {
-+			if ((t->pp_txformat & 0xffffff00) == 0xffffff00) {
- 				WARN_ONCE
- 				    ("unknown texture format (entry %x) encountered. Help me !\n",
--				     t->format & 0xff);
-+				     t->pp_txformat & 0xff);
- 			}
- 
- 			if (RADEON_DEBUG & DEBUG_STATE)
-@@ -1507,29 +1480,28 @@ static void r300SetupTextures(GLcontext * ctx)
- 
- 			r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 +
- 						hw_tmu] =
--			    gen_fixed_filter(t->filter) | (hw_tmu << 28);
-+			    gen_fixed_filter(t->pp_txfilter) | (hw_tmu << 28);
- 			/* Note: There is a LOD bias per texture unit and a LOD bias
- 			 * per texture object. We add them here to get the correct behaviour.
- 			 * (The per-texture object LOD bias was introduced in OpenGL 1.4
- 			 * and is not present in the EXT_texture_object extension).
- 			 */
- 			r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
--				t->filter_1 |
--				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias);
-+				t->pp_txfilter_1 |
-+				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias);
- 			r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
--			    t->size;
-+			    t->pp_txsize;
- 			r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
--						hw_tmu] = t->format;
-+						hw_tmu] = t->pp_txformat;
- 			r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
--			    t->pitch_reg;
--			r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 +
--						hw_tmu] = t->offset;
-+			  t->pp_txpitch;
-+			r300->hw.textures[hw_tmu] = t;
- 
--			if (t->offset & R300_TXO_MACRO_TILE) {
-+			if (t->tile_bits & R300_TXO_MACRO_TILE) {
- 				WARN_ONCE("macro tiling enabled!\n");
- 			}
- 
--			if (t->offset & R300_TXO_MICRO_TILE) {
-+			if (t->tile_bits & R300_TXO_MICRO_TILE) {
- 				WARN_ONCE("micro tiling enabled!\n");
- 			}
- 
-@@ -1546,21 +1518,21 @@ static void r300SetupTextures(GLcontext * ctx)
- 	}
- 
- 	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER0_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1);
- 	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1);
- 	r300->hw.tex.size.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1);
- 	r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1);
- 	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FORMAT2_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1);
- 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1);
- 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
- 	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
- 
- 	if (!fp)		/* should only happenen once, just after context is created */
- 		return;
-@@ -1572,7 +1544,7 @@ static void r300SetupTextures(GLcontext * ctx)
- 			r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
- 			r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
- 			r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
--				cmdpacket0(R300_TX_FILTER0_0, 1);
-+				cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1);
- 		}
- 		r300SetupFragmentShaderTextures(ctx, tmu_mappings);
- 	} else
-@@ -1741,7 +1713,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
- 	  | R300_HIRES_EN;
- 
- 	assert(high_rr >= 0);
--	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1);
-+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr + 1);
- 	r300->hw.rc.cmd[2] = high_rr;
- 
- 	if (InputsRead)
-@@ -1901,7 +1873,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
- 	  | R300_HIRES_EN;
- 
- 	assert(high_rr >= 0);
--	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1);
-+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr + 1);
- 	r300->hw.rc.cmd[2] = 0xC0 | high_rr;
- 
- 	if (InputsRead)
-@@ -2099,6 +2071,7 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
- 	  (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
- }
- 
-+
- static void r300SetupVertexProgram(r300ContextPtr rmesa)
- {
- 	GLcontext *ctx = rmesa->radeon.glCtx;
-@@ -2128,6 +2101,7 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa)
-  */
- static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
- {
-+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	if (RADEON_DEBUG & DEBUG_STATE)
- 		fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__,
- 			_mesa_lookup_enum_by_nr(cap),
-@@ -2173,8 +2147,12 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
- 	case GL_POLYGON_OFFSET_FILL:
- 		r300SetPolygonOffsetState(ctx, state);
- 		break;
-+	case GL_SCISSOR_TEST:
-+		radeon_firevertices(&rmesa->radeon);
-+		rmesa->radeon.state.scissor.enabled = state;
-+		radeonUpdateScissor( ctx );
-+		break;
- 	default:
--		radeonEnable(ctx, cap, state);
- 		break;
- 	}
- }
-@@ -2185,6 +2163,7 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
- static void r300ResetHwState(r300ContextPtr r300)
- {
- 	GLcontext *ctx = r300->radeon.glCtx;
-+	struct radeon_renderbuffer *rrb;
- 	int has_tcl = 1;
- 
- 	if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
-@@ -2215,8 +2194,6 @@ static void r300ResetHwState(r300ContextPtr r300)
- 
- 	r300UpdateCulling(ctx);
- 
--	r300UpdateTextureState(ctx);
--
- 	r300SetBlendState(ctx);
- 	r300SetLogicOpState(ctx);
- 
-@@ -2363,20 +2340,6 @@ static void r300ResetHwState(r300ContextPtr r300)
- 
- 	r300BlendColor(ctx, ctx->Color.BlendColor);
- 
--	/* Again, r300ClearBuffer uses this */
--	r300->hw.cb.cmd[R300_CB_OFFSET] =
--	    r300->radeon.state.color.drawOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
--
- 	r300->hw.rb3d_dither_ctl.cmd[1] = 0;
- 	r300->hw.rb3d_dither_ctl.cmd[2] = 0;
- 	r300->hw.rb3d_dither_ctl.cmd[3] = 0;
-@@ -2392,12 +2355,8 @@ static void r300ResetHwState(r300ContextPtr r300)
- 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
- 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
- 
--	r300->hw.zb.cmd[R300_ZB_OFFSET] =
--	    r300->radeon.radeonScreen->depthOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
--
--	if (r300->radeon.sarea->tiling_enabled) {
-+	rrb = r300->radeon.state.depth.rrb;
-+	if (rrb && rrb->bo && (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)) {
- 		/* XXX: Turn off when clearing buffers ? */
- 		r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE;
- 
-@@ -2440,7 +2399,7 @@ static void r300ResetHwState(r300ContextPtr r300)
- 		r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0;
- 	}
- 
--	r300->hw.all_dirty = GL_TRUE;
-+	r300->radeon.hw.all_dirty = GL_TRUE;
- }
- 
- void r300UpdateShaders(r300ContextPtr rmesa)
-@@ -2451,8 +2410,8 @@ void r300UpdateShaders(r300ContextPtr rmesa)
- 
- 	ctx = rmesa->radeon.glCtx;
- 
--	if (rmesa->NewGLState && hw_tcl_on) {
--		rmesa->NewGLState = 0;
-+	if (rmesa->radeon.NewGLState && hw_tcl_on) {
-+		rmesa->radeon.NewGLState = 0;
- 
- 		for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- 			rmesa->temp_attrib[i] =
-@@ -2531,10 +2490,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
- 	R300_STATECHANGE(rmesa, fpi[1]);
- 	R300_STATECHANGE(rmesa, fpi[2]);
- 	R300_STATECHANGE(rmesa, fpi[3]);
--	rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu.length);
--	rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu.length);
--	rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu.length);
--	rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
-+	rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length);
-+	rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length);
-+	rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length);
-+	rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
- 	for (i = 0; i < code->alu.length; i++) {
- 		rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0;
- 		rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1;
-@@ -2565,7 +2524,7 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
- 	}
- 
- 	R300_STATECHANGE(rmesa, fpp);
--	rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4);
-+	rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4);
- 	for (i = 0; i < code->const_nr; i++) {
- 		const GLfloat *constant = get_fragmentprogram_constant(ctx,
- 			&fp->mesa_program.Base, code->constant[i]);
-@@ -2667,7 +2626,6 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
- 	GLcontext *ctx;
- 	ctx = rmesa->radeon.glCtx;
- 
--	r300UpdateTextureState(ctx);
- 	r300SetEarlyZState(ctx);
- 
- 	GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
-@@ -2712,7 +2670,7 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
- 
- 	r300UpdateStateParameters(ctx, new_state);
- 
--	r300->NewGLState |= new_state;
-+	r300->radeon.NewGLState |= new_state;
- }
- 
- /**
-@@ -2725,15 +2683,13 @@ void r300InitState(r300ContextPtr r300)
- 	GLcontext *ctx = r300->radeon.glCtx;
- 	GLuint depth_fmt;
- 
--	radeonInitState(&r300->radeon);
--
- 	switch (ctx->Visual.depthBits) {
- 	case 16:
--		r300->state.depth.scale = 1.0 / (GLfloat) 0xffff;
-+		r300->radeon.state.depth.scale = 1.0 / (GLfloat) 0xffff;
- 		depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z;
- 		break;
- 	case 24:
--		r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff;
-+		r300->radeon.state.depth.scale = 1.0 / (GLfloat) 0xffffff;
- 		depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
- 		break;
- 	default:
-@@ -2743,8 +2699,8 @@ void r300InitState(r300ContextPtr r300)
- 	}
- 
- 	/* Only have hw stencil when depth buffer is 24 bits deep */
--	r300->state.stencil.hw_stencil = (ctx->Visual.stencilBits > 0 &&
--					  ctx->Visual.depthBits == 24);
-+	r300->radeon.state.stencil.hwBuffer = (ctx->Visual.stencilBits > 0 &&
-+					       ctx->Visual.depthBits == 24);
- 
- 	memset(&(r300->state.texture), 0, sizeof(r300->state.texture));
- 
-@@ -2776,12 +2732,32 @@ void r300UpdateClipPlanes( GLcontext *ctx )
- 	}
- }
- 
-+static void r300DrawBuffer( GLcontext *ctx, GLenum mode )
-+{
-+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+	if (RADEON_DEBUG & DEBUG_DRI)
-+		fprintf(stderr, "%s %s\n", __FUNCTION__,
-+			_mesa_lookup_enum_by_nr( mode ));
-+
-+	radeon_firevertices(&rmesa->radeon);	/* don't pipeline cliprect changes */
-+
-+	radeonSetCliprects( &rmesa->radeon );
-+	radeonUpdatePageFlipping(&rmesa->radeon);
-+}
-+
-+static void r300ReadBuffer( GLcontext *ctx, GLenum mode )
-+{
-+	if (RADEON_DEBUG & DEBUG_DRI)
-+		fprintf(stderr, "%s %s\n", __FUNCTION__,
-+			_mesa_lookup_enum_by_nr( mode ));
-+
-+};
-+
- /**
-  * Initialize driver's state callback functions
-  */
- void r300InitStateFuncs(struct dd_function_table *functions)
- {
--	radeonInitStateFuncs(functions);
- 
- 	functions->UpdateState = r300InvalidateState;
- 	functions->AlphaFunc = r300AlphaFunc;
-@@ -2818,4 +2794,8 @@ void r300InitStateFuncs(struct dd_function_table *functions)
- 	functions->RenderMode = r300RenderMode;
- 
- 	functions->ClipPlane = r300ClipPlane;
-+	functions->Scissor = radeonScissor;
-+
-+	functions->DrawBuffer		= r300DrawBuffer;
-+	functions->ReadBuffer		= r300ReadBuffer;
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
-index 0589ab7..247a20e 100644
---- a/src/mesa/drivers/dri/r300/r300_state.h
-+++ b/src/mesa/drivers/dri/r300/r300_state.h
-@@ -39,32 +39,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define R300_NEWPRIM( rmesa )			\
-   do {						\
--    if ( rmesa->dma.flush )			\
--      rmesa->dma.flush( rmesa );		\
-+  if ( rmesa->radeon.dma.flush )			\
-+    rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
-   } while (0)
- 
- #define R300_STATECHANGE(r300, atom) \
- 	do {						\
- 	  R300_NEWPRIM(r300);				\
- 		r300->hw.atom.dirty = GL_TRUE;		\
--		r300->hw.is_dirty = GL_TRUE;		\
-+		r300->radeon.hw.is_dirty = GL_TRUE;		\
- 	} while(0)
- 
--#define R300_PRINT_STATE(r300, atom) \
--		r300PrintStateAtom(r300, &r300->hw.atom)
--
--/* Fire the buffered vertices no matter what.
--   TODO: This has not been implemented yet
-- */
--#define R300_FIREVERTICES( r300 )			\
--do {							\
--    \
--   if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) {	\
--      r300Flush( (r300)->radeon.glCtx );		\
--   }							\
--    \
--} while (0)
--
- // r300_state.c
- extern int future_hw_tcl_on;
- void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx);
-diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
-index b6e7ce1..d73af86 100644
---- a/src/mesa/drivers/dri/r300/r300_swtcl.c
-+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
-@@ -56,26 +56,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_state.h"
- #include "r300_ioctl.h"
- #include "r300_emit.h"
--#include "r300_mem.h"
-+#include "r300_tex.h"
- 
--static void flush_last_swtcl_prim( r300ContextPtr rmesa  );
--
--
--void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset);
-+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset);
- void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
- #define EMIT_ATTR( ATTR, STYLE )					\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
- } while (0)
- 
- #define EMIT_PAD( N )							\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
- } while (0)
- 
- static void r300SetVertexFormat( GLcontext *ctx )
-@@ -86,7 +83,6 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 	DECLARE_RENDERINPUTS(index_bitset);
- 	GLuint InputsRead = 0, OutputsWritten = 0;
- 	int vap_fmt_0 = 0;
--	int vap_vte_cntl = 0;
- 	int offset = 0;
- 	int vte = 0;
- 	GLint inputs[VERT_ATTRIB_MAX];
-@@ -114,7 +110,7 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 	}
- 
- 	assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
--	rmesa->swtcl.vertex_attr_count = 0;
-+	rmesa->radeon.swtcl.vertex_attr_count = 0;
- 
- 	/* EMIT_ATTR's must be in order as they tell t_vertex.c how to
- 	 * build up a hardware vertex.
-@@ -175,7 +171,7 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 			inputs[i] = -1;
- 		}
- 	}
--	
-+
- 	/* Fixed, apply to vir0 only */
- 	if (InputsRead & (1 << VERT_ATTRIB_POS))
- 		inputs[VERT_ATTRIB_POS] = 0;
-@@ -186,16 +182,16 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 	for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
- 		if (InputsRead & (1 << i))
- 			inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
--	
-+
- 	for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
- 		if (InputsRead & (1 << i)) {
- 			tab[nr++] = i;
- 		}
- 	}
--	
-+
- 	for (i = 0; i < nr; i++) {
- 		int ci;
--		
-+
- 		swizzle[i][0] = SWIZZLE_ZERO;
- 		swizzle[i][1] = SWIZZLE_ZERO;
- 		swizzle[i][2] = SWIZZLE_ZERO;
-@@ -215,98 +211,29 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 	((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
- 		r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
- 				   nr);
--   
-+
- 	R300_STATECHANGE(rmesa, vic);
- 	rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
- 	rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
--   
-+
- 	R300_STATECHANGE(rmesa, vof);
- 	rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
- 	rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
--   
--	rmesa->swtcl.vertex_size =
-+
-+	rmesa->radeon.swtcl.vertex_size =
- 		_tnl_install_attrs( ctx,
--				    rmesa->swtcl.vertex_attrs, 
--				    rmesa->swtcl.vertex_attr_count,
-+				    rmesa->radeon.swtcl.vertex_attrs,
-+				    rmesa->radeon.swtcl.vertex_attr_count,
- 				    NULL, 0 );
--	
--	rmesa->swtcl.vertex_size /= 4;
-+
-+	rmesa->radeon.swtcl.vertex_size /= 4;
- 
- 	RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
- 
- 
- 	R300_STATECHANGE(rmesa, vte);
- 	rmesa->hw.vte.cmd[1] = vte;
--	rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size;
--}
--
--
--/* Flush vertices in the current dma region.
-- */
--static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s\n", __FUNCTION__);
--	
--	rmesa->dma.flush = NULL;
--
--	if (rmesa->dma.current.buf) {
--		struct r300_dma_region *current = &rmesa->dma.current;
--		GLuint current_offset = GET_START(current);
--
--		assert (current->start + 
--			rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--			current->ptr);
--
--		if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
--
--			r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
--			
--			r300EmitState(rmesa);
--			
--			r300EmitVertexAOS( rmesa,
--					   rmesa->swtcl.vertex_size,
--					   current_offset);
--			
--			r300EmitVbufPrim( rmesa,
--					  rmesa->swtcl.hw_primitive,
--					  rmesa->swtcl.numverts);
--			
--			r300EmitCacheFlush(rmesa);
--		}
--		
--		rmesa->swtcl.numverts = 0;
--		current->start = current->ptr;
--	}
--}
--
--/* Alloc space in the current dma region.
-- */
--static void *
--r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
--{
--	GLuint bytes = vsize * nverts;
--
--	if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
--		r300RefillCurrentDmaRegion( rmesa, bytes);
--
--	if (!rmesa->dma.flush) {
--		rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
--		rmesa->dma.flush = flush_last_swtcl_prim;
--	}
--
--	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
--	ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
--	ASSERT( rmesa->dma.current.start + 
--		rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--		rmesa->dma.current.ptr );
--
--	{
--		GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
--		rmesa->dma.current.ptr += bytes;
--		rmesa->swtcl.numverts += nverts;
--		return head;
--	}
-+	rmesa->hw.vte.cmd[2] = rmesa->radeon.swtcl.vertex_size;
- }
- 
- static GLuint reduced_prim[] = {
-@@ -346,13 +273,13 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
- #undef LOCAL_VARS
- #undef ALLOC_VERTS
- #define CTX_ARG r300ContextPtr rmesa
--#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
--#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 )
-+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
-+#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 )
- #define LOCAL_VARS						\
-    r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
--   const char *r300verts = (char *)rmesa->swtcl.verts;
-+   const char *r300verts = (char *)rmesa->radeon.swtcl.verts;
- #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
--#define VERTEX r300Vertex 
-+#define VERTEX r300Vertex
- #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
- #define PRINT_VERTEX(x)
- #undef TAG
-@@ -409,7 +336,7 @@ static struct {
- #define VERT_Y(_v) _v->v.y
- #define VERT_Z(_v) _v->v.z
- #define AREA_IS_CCW( a ) (a < 0)
--#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
-+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
- 
- /* Only used to pull back colors into vertices (ie, we know color is
-  * floating point).
-@@ -455,7 +382,7 @@ do {							\
-  ***********************************************************************/
- 
- #define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] )
--#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
-+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
- #undef TAG
- #define TAG(x) x
- #include "tnl_dd/t_dd_unfilled.h"
-@@ -512,8 +439,8 @@ static void init_rast_tab( void )
- #undef LOCAL_VARS
- #define LOCAL_VARS						\
-    r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
--   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
--   const char *r300verts = (char *)rmesa->swtcl.verts;		\
-+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
-+   const char *r300verts = (char *)rmesa->radeon.swtcl.verts;		\
-    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
-    const GLboolean stipple = ctx->Line.StippleFlag;		\
-    (void) elt; (void) stipple;
-@@ -545,7 +472,7 @@ static void r300ChooseRenderState( GLcontext *ctx )
- 	if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
- 	if (flags & DD_TRI_UNFILLED)      index |= R300_UNFILLED_BIT;
- 
--	if (index != rmesa->swtcl.RenderIndex) {
-+	if (index != rmesa->radeon.swtcl.RenderIndex) {
- 		tnl->Driver.Render.Points = rast_tab[index].points;
- 		tnl->Driver.Render.Line = rast_tab[index].line;
- 		tnl->Driver.Render.ClippedLine = rast_tab[index].line;
-@@ -562,7 +489,7 @@ static void r300ChooseRenderState( GLcontext *ctx )
- 			tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
- 		}
- 
--		rmesa->swtcl.RenderIndex = index;
-+		rmesa->radeon.swtcl.RenderIndex = index;
- 	}
- }
- 
-@@ -572,18 +499,18 @@ static void r300RenderStart(GLcontext *ctx)
-         r300ContextPtr rmesa = R300_CONTEXT( ctx );
- 	//	fprintf(stderr, "%s\n", __FUNCTION__);
- 
--	r300ChooseRenderState(ctx);	
-+	r300ChooseRenderState(ctx);
- 	r300SetVertexFormat(ctx);
- 
-+	r300ValidateBuffers(ctx);
-+
- 	r300UpdateShaders(rmesa);
- 	r300UpdateShaderStates(rmesa);
- 
- 	r300EmitCacheFlush(rmesa);
--	
--	if (rmesa->dma.flush != 0 && 
--	    rmesa->dma.flush != flush_last_swtcl_prim)
--		rmesa->dma.flush( rmesa );
--
-+	if (rmesa->radeon.dma.flush != NULL) {
-+		rmesa->radeon.dma.flush(ctx);
-+	}
- }
- 
- static void r300RenderFinish(GLcontext *ctx)
-@@ -593,10 +520,10 @@ static void r300RenderFinish(GLcontext *ctx)
- static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	
--	if (rmesa->swtcl.hw_primitive != hwprim) {
-+
-+	if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
- 	        R300_NEWPRIM( rmesa );
--		rmesa->swtcl.hw_primitive = hwprim;
-+		rmesa->radeon.swtcl.hw_primitive = hwprim;
- 	}
- }
- 
-@@ -604,14 +531,14 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
- {
- 
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	rmesa->swtcl.render_primitive = prim;
-+	rmesa->radeon.swtcl.render_primitive = prim;
- 
- 	if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
- 	  return;
- 
- 	r300RasterPrimitive( ctx, reduced_prim[prim] );
- 	//	fprintf(stderr, "%s\n", __FUNCTION__);
--	
-+
- }
- 
- static void r300ResetLineStipple(GLcontext *ctx)
-@@ -625,12 +552,12 @@ void r300InitSwtcl(GLcontext *ctx)
- 	TNLcontext *tnl = TNL_CONTEXT(ctx);
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	static int firsttime = 1;
--	
-+
- 	if (firsttime) {
- 		init_rast_tab();
- 		firsttime = 0;
- 	}
--	
-+
- 	tnl->Driver.Render.Start = r300RenderStart;
- 	tnl->Driver.Render.Finish = r300RenderFinish;
- 	tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
-@@ -638,15 +565,15 @@ void r300InitSwtcl(GLcontext *ctx)
- 	tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
- 	tnl->Driver.Render.CopyPV = _tnl_copy_pv;
- 	tnl->Driver.Render.Interp = _tnl_interp;
--	
-+
- 	/* FIXME: what are these numbers? */
--	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
-+	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
- 			    48 * sizeof(GLfloat) );
--	
--	rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
--	rmesa->swtcl.RenderIndex = ~0;
--	rmesa->swtcl.render_primitive = GL_TRIANGLES;
--	rmesa->swtcl.hw_primitive = 0;	
-+
-+	rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
-+	rmesa->radeon.swtcl.RenderIndex = ~0;
-+	rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
-+	rmesa->radeon.swtcl.hw_primitive = 0;
- 
- 	_tnl_invalidate_vertex_state( ctx, ~0 );
- 	_tnl_invalidate_vertices( ctx, ~0 );
-@@ -655,9 +582,9 @@ void r300InitSwtcl(GLcontext *ctx)
- 	_tnl_need_projected_coords( ctx, GL_FALSE );
- 	r300ChooseRenderState(ctx);
- 
--	_mesa_validate_all_lighting_tables( ctx ); 
-+	_mesa_validate_all_lighting_tables( ctx );
- 
--	tnl->Driver.NotifyMaterialChange = 
-+	tnl->Driver.NotifyMaterialChange =
- 	  _mesa_validate_all_lighting_tables;
- }
- 
-@@ -665,33 +592,53 @@ void r300DestroySwtcl(GLcontext *ctx)
- {
- }
- 
--void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
-+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
-+	BATCH_LOCALS(&rmesa->radeon);
- 
--	drm_radeon_cmd_header_t *cmd = NULL;
- 	if (RADEON_DEBUG & DEBUG_VERTS)
--	  fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
--		  __FUNCTION__, vertex_size, offset);
--
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
--	e32(1);
--	e32(vertex_size | (vertex_size << 8));
--	e32(offset);
-+		fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
-+			__FUNCTION__, vertex_size, offset);
-+
-+	BEGIN_BATCH(5);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
-+	OUT_BATCH(1);
-+	OUT_BATCH(vertex_size | (vertex_size << 8));
-+	OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
-+	END_BATCH();
- }
- 
- void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
- {
--
--	int cmd_reserved = 0;
--	int cmd_written = 0;
-+	BATCH_LOCALS(&rmesa->radeon);
- 	int type, num_verts;
--	drm_radeon_cmd_header_t *cmd = NULL;
- 
- 	type = r300PrimitiveType(rmesa, primitive);
- 	num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
--	
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
-+
-+	BEGIN_BATCH(3);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
-+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
-+	END_BATCH();
-+}
-+
-+void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
-+{
-+  r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+
-+  rcommonEnsureCmdBufSpace(&rmesa->radeon,
-+			   rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
-+			   __FUNCTION__);
-+  radeonEmitState(&rmesa->radeon);
-+  r300EmitVertexAOS(rmesa,
-+		    rmesa->radeon.swtcl.vertex_size,
-+		    rmesa->radeon.dma.current,
-+		    current_offset);
-+  
-+  r300EmitVbufPrim(rmesa,
-+		   rmesa->radeon.swtcl.hw_primitive,
-+		   rmesa->radeon.swtcl.numverts);
-+  r300EmitCacheFlush(rmesa);
-+  COMMIT_BATCH();
-+
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h
-index 55df53c..23b4ce3 100644
---- a/src/mesa/drivers/dri/r300/r300_swtcl.h
-+++ b/src/mesa/drivers/dri/r300/r300_swtcl.h
-@@ -42,4 +42,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- extern void r300InitSwtcl( GLcontext *ctx );
- extern void r300DestroySwtcl( GLcontext *ctx );
- 
-+extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
- #endif
-diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
-index 8ab382c..27b9070 100644
---- a/src/mesa/drivers/dri/r300/r300_tex.c
-+++ b/src/mesa/drivers/dri/r300/r300_tex.c
-@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/context.h"
- #include "main/enums.h"
- #include "main/image.h"
-+#include "main/mipmap.h"
- #include "main/simple_list.h"
- #include "main/texformat.h"
- #include "main/texstore.h"
-@@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_context.h"
- #include "r300_state.h"
- #include "r300_ioctl.h"
-+#include "radeon_mipmap_tree.h"
- #include "r300_tex.h"
- 
- #include "xmlpool.h"
-@@ -77,20 +79,20 @@ static unsigned int translate_wrap_mode(GLenum wrapmode)
-  *
-  * \param t Texture object whose wrap modes are to be set
-  */
--static void r300UpdateTexWrap(r300TexObjPtr t)
-+static void r300UpdateTexWrap(radeonTexObjPtr t)
- {
--	struct gl_texture_object *tObj = t->base.tObj;
-+	struct gl_texture_object *tObj = &t->base;
- 
--	t->filter &=
-+	t->pp_txfilter &=
- 	    ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
- 
--	t->filter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
-+	t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
- 
- 	if (tObj->Target != GL_TEXTURE_1D) {
--		t->filter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
-+		t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
- 
- 		if (tObj->Target == GL_TEXTURE_3D)
--			t->filter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
-+			t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
- 	}
- }
- 
-@@ -117,10 +119,13 @@ static GLuint aniso_filter(GLfloat anisotropy)
-  * \param magf Texture magnification mode
-  * \param anisotropy Maximum anisotropy level
-  */
--static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
-+static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
- {
--	t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
--	t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
-+	/* Force revalidation to account for switches from/to mipmapping. */
-+	t->validated = GL_FALSE;
-+
-+	t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
-+	t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
- 
- 	/* Note that EXT_texture_filter_anisotropic is extremely vague about
- 	 * how anisotropic filtering interacts with the "normal" filter modes.
-@@ -128,7 +133,7 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
- 	 * filter settings completely. This includes driconf's settings.
- 	 */
- 	if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
--		t->filter |= R300_TX_MAG_FILTER_ANISO
-+		t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO
- 			| R300_TX_MIN_FILTER_ANISO
- 			| R300_TX_MIN_FILTER_MIP_LINEAR
- 			| aniso_filter(anisotropy);
-@@ -139,22 +144,22 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
- 
- 	switch (minf) {
- 	case GL_NEAREST:
--		t->filter |= R300_TX_MIN_FILTER_NEAREST;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST;
- 		break;
- 	case GL_LINEAR:
--		t->filter |= R300_TX_MIN_FILTER_LINEAR;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR;
- 		break;
- 	case GL_NEAREST_MIPMAP_NEAREST:
--		t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
- 		break;
- 	case GL_NEAREST_MIPMAP_LINEAR:
--		t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
- 		break;
- 	case GL_LINEAR_MIPMAP_NEAREST:
--		t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
- 		break;
- 	case GL_LINEAR_MIPMAP_LINEAR:
--		t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
-+		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
- 		break;
- 	}
- 
-@@ -163,743 +168,20 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
- 	 */
- 	switch (magf) {
- 	case GL_NEAREST:
--		t->filter |= R300_TX_MAG_FILTER_NEAREST;
-+		t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST;
- 		break;
- 	case GL_LINEAR:
--		t->filter |= R300_TX_MAG_FILTER_LINEAR;
-+		t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR;
- 		break;
- 	}
- }
- 
--static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4])
-+static void r300SetTexBorderColor(radeonTexObjPtr t, GLubyte c[4])
- {
- 	t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
- }
- 
- /**
-- * Allocate space for and load the mesa images into the texture memory block.
-- * This will happen before drawing with a new texture, or drawing with a
-- * texture after it was swapped out or teximaged again.
-- */
--
--static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj)
--{
--	r300TexObjPtr t;
--
--	t = CALLOC_STRUCT(r300_tex_obj);
--	texObj->DriverData = t;
--	if (t != NULL) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE) {
--			fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
--				(void *)texObj, (void *)t);
--		}
--
--		/* Initialize non-image-dependent parts of the state:
--		 */
--		t->base.tObj = texObj;
--		t->border_fallback = GL_FALSE;
--
--		make_empty_list(&t->base);
--
--		r300UpdateTexWrap(t);
--		r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
--		r300SetTexBorderColor(t, texObj->_BorderChan);
--	}
--
--	return t;
--}
--
--/* try to find a format which will only need a memcopy */
--static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat,
--							       GLenum srcType)
--{
--	const GLuint ui = 1;
--	const GLubyte littleEndian = *((const GLubyte *)&ui);
--
--	if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
--	    (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
--	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
--	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
--		return &_mesa_texformat_rgba8888;
--	} else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
--		   (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
--		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
--		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
--		return &_mesa_texformat_rgba8888_rev;
--	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
--					    srcType == GL_UNSIGNED_INT_8_8_8_8)) {
--		return &_mesa_texformat_argb8888_rev;
--	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
--					    srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
--		return &_mesa_texformat_argb8888;
--	} else
--		return _dri_texformat_argb8888;
--}
--
--static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx,
--							       GLint
--							       internalFormat,
--							       GLenum format,
--							       GLenum type)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	const GLboolean do32bpt =
--	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
--	const GLboolean force16bpt =
--	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
--	(void)format;
--
--#if 0
--	fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n",
--		_mesa_lookup_enum_by_nr(internalFormat), internalFormat,
--		_mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
--	fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt);
--#endif
--
--	switch (internalFormat) {
--	case 4:
--	case GL_RGBA:
--	case GL_COMPRESSED_RGBA:
--		switch (type) {
--		case GL_UNSIGNED_INT_10_10_10_2:
--		case GL_UNSIGNED_INT_2_10_10_10_REV:
--			return do32bpt ? _dri_texformat_argb8888 :
--			    _dri_texformat_argb1555;
--		case GL_UNSIGNED_SHORT_4_4_4_4:
--		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--			return _dri_texformat_argb4444;
--		case GL_UNSIGNED_SHORT_5_5_5_1:
--		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--			return _dri_texformat_argb1555;
--		default:
--			return do32bpt ? r300Choose8888TexFormat(format, type) :
--			    _dri_texformat_argb4444;
--		}
--
--	case 3:
--	case GL_RGB:
--	case GL_COMPRESSED_RGB:
--		switch (type) {
--		case GL_UNSIGNED_SHORT_4_4_4_4:
--		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--			return _dri_texformat_argb4444;
--		case GL_UNSIGNED_SHORT_5_5_5_1:
--		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--			return _dri_texformat_argb1555;
--		case GL_UNSIGNED_SHORT_5_6_5:
--		case GL_UNSIGNED_SHORT_5_6_5_REV:
--			return _dri_texformat_rgb565;
--		default:
--			return do32bpt ? _dri_texformat_argb8888 :
--			    _dri_texformat_rgb565;
--		}
--
--	case GL_RGBA8:
--	case GL_RGB10_A2:
--	case GL_RGBA12:
--	case GL_RGBA16:
--		return !force16bpt ?
--		    r300Choose8888TexFormat(format,
--					    type) : _dri_texformat_argb4444;
--
--	case GL_RGBA4:
--	case GL_RGBA2:
--		return _dri_texformat_argb4444;
--
--	case GL_RGB5_A1:
--		return _dri_texformat_argb1555;
--
--	case GL_RGB8:
--	case GL_RGB10:
--	case GL_RGB12:
--	case GL_RGB16:
--		return !force16bpt ? _dri_texformat_argb8888 :
--		    _dri_texformat_rgb565;
--
--	case GL_RGB5:
--	case GL_RGB4:
--	case GL_R3_G3_B2:
--		return _dri_texformat_rgb565;
--
--	case GL_ALPHA:
--	case GL_ALPHA4:
--	case GL_ALPHA8:
--	case GL_ALPHA12:
--	case GL_ALPHA16:
--	case GL_COMPRESSED_ALPHA:
--		return _dri_texformat_a8;
--
--	case 1:
--	case GL_LUMINANCE:
--	case GL_LUMINANCE4:
--	case GL_LUMINANCE8:
--	case GL_LUMINANCE12:
--	case GL_LUMINANCE16:
--	case GL_COMPRESSED_LUMINANCE:
--		return _dri_texformat_l8;
--
--	case 2:
--	case GL_LUMINANCE_ALPHA:
--	case GL_LUMINANCE4_ALPHA4:
--	case GL_LUMINANCE6_ALPHA2:
--	case GL_LUMINANCE8_ALPHA8:
--	case GL_LUMINANCE12_ALPHA4:
--	case GL_LUMINANCE12_ALPHA12:
--	case GL_LUMINANCE16_ALPHA16:
--	case GL_COMPRESSED_LUMINANCE_ALPHA:
--		return _dri_texformat_al88;
--
--	case GL_INTENSITY:
--	case GL_INTENSITY4:
--	case GL_INTENSITY8:
--	case GL_INTENSITY12:
--	case GL_INTENSITY16:
--	case GL_COMPRESSED_INTENSITY:
--		return _dri_texformat_i8;
--
--	case GL_YCBCR_MESA:
--		if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
--		    type == GL_UNSIGNED_BYTE)
--			return &_mesa_texformat_ycbcr;
--		else
--			return &_mesa_texformat_ycbcr_rev;
--
--	case GL_RGB_S3TC:
--	case GL_RGB4_S3TC:
--	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
--		return &_mesa_texformat_rgb_dxt1;
--
--	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
--		return &_mesa_texformat_rgba_dxt1;
--
--	case GL_RGBA_S3TC:
--	case GL_RGBA4_S3TC:
--	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
--		return &_mesa_texformat_rgba_dxt3;
--
--	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
--		return &_mesa_texformat_rgba_dxt5;
--
--	case GL_ALPHA16F_ARB:
--		return &_mesa_texformat_alpha_float16;
--	case GL_ALPHA32F_ARB:
--		return &_mesa_texformat_alpha_float32;
--	case GL_LUMINANCE16F_ARB:
--		return &_mesa_texformat_luminance_float16;
--	case GL_LUMINANCE32F_ARB:
--		return &_mesa_texformat_luminance_float32;
--	case GL_LUMINANCE_ALPHA16F_ARB:
--		return &_mesa_texformat_luminance_alpha_float16;
--	case GL_LUMINANCE_ALPHA32F_ARB:
--		return &_mesa_texformat_luminance_alpha_float32;
--	case GL_INTENSITY16F_ARB:
--		return &_mesa_texformat_intensity_float16;
--	case GL_INTENSITY32F_ARB:
--		return &_mesa_texformat_intensity_float32;
--	case GL_RGB16F_ARB:
--		return &_mesa_texformat_rgba_float16;
--	case GL_RGB32F_ARB:
--		return &_mesa_texformat_rgba_float32;
--	case GL_RGBA16F_ARB:
--		return &_mesa_texformat_rgba_float16;
--	case GL_RGBA32F_ARB:
--		return &_mesa_texformat_rgba_float32;
--
--	case GL_DEPTH_COMPONENT:
--	case GL_DEPTH_COMPONENT16:
--	case GL_DEPTH_COMPONENT24:
--	case GL_DEPTH_COMPONENT32:
--#if 0
--		switch (type) {
--		case GL_UNSIGNED_BYTE:
--		case GL_UNSIGNED_SHORT:
--			return &_mesa_texformat_z16;
--		case GL_UNSIGNED_INT:
--			return &_mesa_texformat_z32;
--		case GL_UNSIGNED_INT_24_8_EXT:
--		default:
--			return &_mesa_texformat_z24_s8;
--		}
--#else
--		return &_mesa_texformat_z16;
--#endif
--
--	default:
--		_mesa_problem(ctx,
--			      "unexpected internalFormat 0x%x in r300ChooseTextureFormat",
--			      (int)internalFormat);
--		return NULL;
--	}
--
--	return NULL;		/* never get here */
--}
--
--static GLboolean
--r300ValidateClientStorage(GLcontext * ctx, GLenum target,
--			  GLint internalFormat,
--			  GLint srcWidth, GLint srcHeight,
--			  GLenum format, GLenum type, const void *pixels,
--			  const struct gl_pixelstore_attrib *packing,
--			  struct gl_texture_object *texObj,
--			  struct gl_texture_image *texImage)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "intformat %s format %s type %s\n",
--			_mesa_lookup_enum_by_nr(internalFormat),
--			_mesa_lookup_enum_by_nr(format),
--			_mesa_lookup_enum_by_nr(type));
--
--	if (!ctx->Unpack.ClientStorage)
--		return 0;
--
--	if (ctx->_ImageTransferState ||
--	    texImage->IsCompressed || texObj->GenerateMipmap)
--		return 0;
--
--	/* This list is incomplete, may be different on ppc???
--	 */
--	switch (internalFormat) {
--	case GL_RGBA:
--		if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) {
--			texImage->TexFormat = _dri_texformat_argb8888;
--		} else
--			return 0;
--		break;
--
--	case GL_RGB:
--		if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
--			texImage->TexFormat = _dri_texformat_rgb565;
--		} else
--			return 0;
--		break;
--
--	case GL_YCBCR_MESA:
--		if (format == GL_YCBCR_MESA &&
--		    type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
--			texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
--		} else if (format == GL_YCBCR_MESA &&
--			   (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
--			    type == GL_UNSIGNED_BYTE)) {
--			texImage->TexFormat = &_mesa_texformat_ycbcr;
--		} else
--			return 0;
--		break;
--
--	default:
--		return 0;
--	}
--
--	/* Could deal with these packing issues, but currently don't:
--	 */
--	if (packing->SkipPixels ||
--	    packing->SkipRows || packing->SwapBytes || packing->LsbFirst) {
--		return 0;
--	}
--
--	GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
--						    format, type);
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "%s: srcRowStride %d/%x\n",
--			__FUNCTION__, srcRowStride, srcRowStride);
--
--	/* Could check this later in upload, pitch restrictions could be
--	 * relaxed, but would need to store the image pitch somewhere,
--	 * as packing details might change before image is uploaded:
--	 */
--	if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
--	    || (srcRowStride & 63))
--		return 0;
--
--	/* Have validated that _mesa_transfer_teximage would be a straight
--	 * memcpy at this point.  NOTE: future calls to TexSubImage will
--	 * overwrite the client data.  This is explicitly mentioned in the
--	 * extension spec.
--	 */
--	texImage->Data = (void *)pixels;
--	texImage->IsClientData = GL_TRUE;
--	texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
--
--	return 1;
--}
--
--static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
--			   GLint internalFormat,
--			   GLint width, GLint border,
--			   GLenum format, GLenum type, const GLvoid * pixels,
--			   const struct gl_pixelstore_attrib *packing,
--			   struct gl_texture_object *texObj,
--			   struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
--			return;
--		}
--	}
--
--	/* Note, this will call ChooseTextureFormat */
--	_mesa_store_teximage1d(ctx, target, level, internalFormat,
--			       width, border, format, type, pixels,
--			       &ctx->Unpack, texObj, texImage);
--
--	t->dirty_images[0] |= (1 << level);
--}
--
--static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
--			      GLint xoffset,
--			      GLsizei width,
--			      GLenum format, GLenum type,
--			      const GLvoid * pixels,
--			      const struct gl_pixelstore_attrib *packing,
--			      struct gl_texture_object *texObj,
--			      struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
--			return;
--		}
--	}
--
--	_mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
--				  format, type, pixels, packing, texObj,
--				  texImage);
--
--	t->dirty_images[0] |= (1 << level);
--}
--
--static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
--			   GLint internalFormat,
--			   GLint width, GLint height, GLint border,
--			   GLenum format, GLenum type, const GLvoid * pixels,
--			   const struct gl_pixelstore_attrib *packing,
--			   struct gl_texture_object *texObj,
--			   struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
--
--	/* which cube face or ordinary 2D image */
--	switch (target) {
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
--	default:
--		face = 0;
--	}
--
--	if (t != NULL) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
--			return;
--		}
--	}
--
--	texImage->IsClientData = GL_FALSE;
--
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
--	} else {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_teximage2d(ctx, target, level, internalFormat,
--				       width, height, border, format, type,
--				       pixels, &ctx->Unpack, texObj, texImage);
--
--		t->dirty_images[face] |= (1 << level);
--	}
--}
--
--static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
--			      GLint xoffset, GLint yoffset,
--			      GLsizei width, GLsizei height,
--			      GLenum format, GLenum type,
--			      const GLvoid * pixels,
--			      const struct gl_pixelstore_attrib *packing,
--			      struct gl_texture_object *texObj,
--			      struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
--
--	/* which cube face or ordinary 2D image */
--	switch (target) {
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
--	default:
--		face = 0;
--	}
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
--			return;
--		}
--	}
--
--	_mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
--				  height, format, type, pixels, packing, texObj,
--				  texImage);
--
--	t->dirty_images[face] |= (1 << level);
--}
--
--static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
--				     GLint level, GLint internalFormat,
--				     GLint width, GLint height, GLint border,
--				     GLsizei imageSize, const GLvoid * data,
--				     struct gl_texture_object *texObj,
--				     struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
--
--	/* which cube face or ordinary 2D image */
--	switch (target) {
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
--	default:
--		face = 0;
--	}
--
--	if (t != NULL) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY,
--				    "glCompressedTexImage2D");
--			return;
--		}
--	}
--
--	texImage->IsClientData = GL_FALSE;
--
--	/* can't call this, different parameters. Would never evaluate to true anyway currently */
--#if 0
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
--	} else
--#endif
--	{
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_compressed_teximage2d(ctx, target, level,
--						  internalFormat, width, height,
--						  border, imageSize, data,
--						  texObj, texImage);
--
--		t->dirty_images[face] |= (1 << level);
--	}
--}
--
--static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
--					GLint level, GLint xoffset,
--					GLint yoffset, GLsizei width,
--					GLsizei height, GLenum format,
--					GLsizei imageSize, const GLvoid * data,
--					struct gl_texture_object *texObj,
--					struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
--
--	/* which cube face or ordinary 2D image */
--	switch (target) {
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
--	default:
--		face = 0;
--	}
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY,
--				    "glCompressedTexSubImage3D");
--			return;
--		}
--	}
--
--	_mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset,
--					     yoffset, width, height, format,
--					     imageSize, data, texObj, texImage);
--
--	t->dirty_images[face] |= (1 << level);
--}
--
--static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
--			   GLint internalFormat,
--			   GLint width, GLint height, GLint depth,
--			   GLint border,
--			   GLenum format, GLenum type, const GLvoid * pixels,
--			   const struct gl_pixelstore_attrib *packing,
--			   struct gl_texture_object *texObj,
--			   struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
--			return;
--		}
--	}
--
--	texImage->IsClientData = GL_FALSE;
--
--#if 0
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
--	} else
--#endif
--	{
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_teximage3d(ctx, target, level, internalFormat,
--				       width, height, depth, border,
--				       format, type, pixels,
--				       &ctx->Unpack, texObj, texImage);
--
--		t->dirty_images[0] |= (1 << level);
--	}
--}
--
--static void
--r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
--		  GLint xoffset, GLint yoffset, GLint zoffset,
--		  GLsizei width, GLsizei height, GLsizei depth,
--		  GLenum format, GLenum type,
--		  const GLvoid * pixels,
--		  const struct gl_pixelstore_attrib *packing,
--		  struct gl_texture_object *texObj,
--		  struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--/*     fprintf(stderr, "%s\n", __FUNCTION__); */
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
--			return;
--		}
--		texObj->DriverData = t;
--	}
--
--	_mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
--				  width, height, depth,
--				  format, type, pixels, packing, texObj,
--				  texImage);
--
--	t->dirty_images[0] |= (1 << level);
--}
--
--/**
-  * Changes variables and flags for a state update, which will happen at the
-  * next UpdateTextureState
-  */
-@@ -908,7 +190,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 			     struct gl_texture_object *texObj,
- 			     GLenum pname, const GLfloat * params)
- {
--	r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData;
-+	radeonTexObj* t = radeon_tex_obj(texObj);
- 
- 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
- 		fprintf(stderr, "%s( %s )\n", __FUNCTION__,
-@@ -941,7 +223,11 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 		 * we just have to rely on loading the right subset of mipmap levels
- 		 * to simulate a clamped LOD.
- 		 */
--		driSwapOutTextureObject((driTextureObject *) t);
-+		if (t->mt) {
-+			radeon_miptree_unreference(t->mt);
-+			t->mt = 0;
-+			t->validated = GL_FALSE;
-+		}
- 		break;
- 
- 	case GL_DEPTH_TEXTURE_MODE:
-@@ -964,27 +250,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 	}
- }
- 
--static void r300BindTexture(GLcontext * ctx, GLenum target,
--			    struct gl_texture_object *texObj)
--{
--	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
--		fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__,
--			(void *)texObj, ctx->Texture.CurrentUnit);
--	}
--
--	if ((target == GL_TEXTURE_1D)
--	    || (target == GL_TEXTURE_2D)
--	    || (target == GL_TEXTURE_3D)
--	    || (target == GL_TEXTURE_CUBE_MAP)
--	    || (target == GL_TEXTURE_RECTANGLE_NV)) {
--		assert(texObj->DriverData != NULL);
--	}
--}
--
- static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	radeonTexObj* t = radeon_tex_obj(texObj);
- 
- 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
- 		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-@@ -992,14 +261,19 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
- 			_mesa_lookup_enum_by_nr(texObj->Target));
- 	}
- 
--	if (t != NULL) {
--		if (rmesa) {
--			R300_FIREVERTICES(rmesa);
--		}
-+	if (rmesa) {
-+		int i;
-+		radeon_firevertices(&rmesa->radeon);
- 
--		driDestroyTextureObject(t);
-+		for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
-+			if (rmesa->hw.textures[i] == t)
-+				rmesa->hw.textures[i] = 0;
-+	}
-+
-+	if (t->mt) {
-+		radeon_miptree_unreference(t->mt);
-+		t->mt = 0;
- 	}
--	/* Free mipmap images and the texture object itself */
- 	_mesa_delete_texture_object(ctx, texObj);
- }
- 
-@@ -1008,8 +282,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
-  * Called via ctx->Driver.NewTextureObject.
-  * Note: this function will be called during context creation to
-  * allocate the default texture objects.
-- * Note: we could use containment here to 'derive' the driver-specific
-- * texture object from the core mesa gl_texture_object.  Not done at this time.
-  * Fixup MaxAnisotropy according to user preference.
-  */
- static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
-@@ -1017,14 +289,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
- 						      GLenum target)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_object *obj;
--	obj = _mesa_new_texture_object(ctx, name, target);
--	if (!obj)
--		return NULL;
--	obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
-+	radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
-+
- 
--	r300AllocTexObj(obj);
--	return obj;
-+	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
-+		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-+			t, _mesa_lookup_enum_by_nr(target));
-+	}
-+
-+	_mesa_initialize_texture_object(&t->base, name, target);
-+	t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
-+
-+	/* Initialize hardware state */
-+	r300UpdateTexWrap(t);
-+	r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
-+	r300SetTexBorderColor(t, t->base._BorderChan);
-+
-+	return &t->base;
- }
- 
- void r300InitTextureFuncs(struct dd_function_table *functions)
-@@ -1032,22 +313,30 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
- 	/* Note: we only plug in the functions we implement in the driver
- 	 * since _mesa_init_driver_functions() was already called.
- 	 */
--	functions->ChooseTextureFormat = r300ChooseTextureFormat;
--	functions->TexImage1D = r300TexImage1D;
--	functions->TexImage2D = r300TexImage2D;
--	functions->TexImage3D = r300TexImage3D;
--	functions->TexSubImage1D = r300TexSubImage1D;
--	functions->TexSubImage2D = r300TexSubImage2D;
--	functions->TexSubImage3D = r300TexSubImage3D;
-+	functions->NewTextureImage = radeonNewTextureImage;
-+	functions->FreeTexImageData = radeonFreeTexImageData;
-+	functions->MapTexture = radeonMapTexture;
-+	functions->UnmapTexture = radeonUnmapTexture;
-+
-+	functions->ChooseTextureFormat = radeonChooseTextureFormat;
-+	functions->TexImage1D = radeonTexImage1D;
-+	functions->TexImage2D = radeonTexImage2D;
-+	functions->TexImage3D = radeonTexImage3D;
-+	functions->TexSubImage1D = radeonTexSubImage1D;
-+	functions->TexSubImage2D = radeonTexSubImage2D;
-+	functions->TexSubImage3D = radeonTexSubImage3D;
-+	functions->GetTexImage = radeonGetTexImage;
-+	functions->GetCompressedTexImage = radeonGetCompressedTexImage;
- 	functions->NewTextureObject = r300NewTextureObject;
--	functions->BindTexture = r300BindTexture;
- 	functions->DeleteTexture = r300DeleteTexture;
- 	functions->IsTextureResident = driIsTextureResident;
- 
- 	functions->TexParameter = r300TexParameter;
- 
--	functions->CompressedTexImage2D = r300CompressedTexImage2D;
--	functions->CompressedTexSubImage2D = r300CompressedTexSubImage2D;
-+	functions->CompressedTexImage2D = radeonCompressedTexImage2D;
-+	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
-+
-+	functions->GenerateMipmap = radeonGenerateMipmap;
- 
- 	driInitTextureFormats();
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
-index b86d45b..baad3fe 100644
---- a/src/mesa/drivers/dri/r300/r300_tex.h
-+++ b/src/mesa/drivers/dri/r300/r300_tex.h
-@@ -37,16 +37,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- extern void r300SetDepthTexMode(struct gl_texture_object *tObj);
- 
-+extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
-+			     __DRIdrawable *dPriv);
-+
- extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
- 			     unsigned long long offset, GLint depth,
- 			     GLuint pitch);
- 
--extern void r300UpdateTextureState(GLcontext * ctx);
--
--extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t,
--			       GLuint face);
--
--extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t);
-+extern GLboolean r300ValidateBuffers(GLcontext * ctx);
- 
- extern void r300InitTextureFuncs(struct dd_function_table *functions);
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c
-deleted file mode 100644
-index b03eefa..0000000
---- a/src/mesa/drivers/dri/r300/r300_texmem.c
-+++ /dev/null
-@@ -1,567 +0,0 @@
--/**************************************************************************
--
--Copyright (C) Tungsten Graphics 2002.  All Rights Reserved.
--The Weather Channel, Inc. funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86
--license. This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation on the rights to use, copy, modify, merge, publish,
--distribute, sub license, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
--NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
--SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
--IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
--IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
--SOFTWARE.
--
--**************************************************************************/
--
--/**
-- * \file
-- *
-- * \author Gareth Hughes <gareth@valinux.com>
-- *
-- * \author Kevin E. Martin <martin@valinux.com>
-- */
--
--#include <errno.h>
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/context.h"
--#include "main/colormac.h"
--#include "main/macros.h"
--#include "main/simple_list.h"
--#include "radeon_reg.h"		/* gets definition for usleep */
--#include "r300_context.h"
--#include "r300_state.h"
--#include "r300_cmdbuf.h"
--#include "radeon_ioctl.h"
--#include "r300_tex.h"
--#include "r300_ioctl.h"
--#include <unistd.h>		/* for usleep() */
--
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--#endif
--
--/**
-- * Destroy any device-dependent state associated with the texture.  This may
-- * include NULLing out hardware state that points to the texture.
-- */
--void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t)
--{
--	int i;
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE) {
--		fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
--			(void *)t, (void *)t->base.tObj);
--	}
--
--	for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
--		if (rmesa->state.texture.unit[i].texobj == t) {
--			rmesa->state.texture.unit[i].texobj = NULL;
--		}
--	}
--}
--
--/* ------------------------------------------------------------
-- * Texture image conversions
-- */
--
--static void r300UploadGARTClientSubImage(r300ContextPtr rmesa,
--					 r300TexObjPtr t,
--					 struct gl_texture_image *texImage,
--					 GLint hwlevel,
--					 GLint x, GLint y,
--					 GLint width, GLint height)
--{
--	const struct gl_texture_format *texFormat = texImage->TexFormat;
--	GLuint srcPitch, dstPitch;
--	int blit_format;
--	int srcOffset;
--
--	/*
--	 * XXX it appears that we always upload the full image, not a subimage.
--	 * I.e. x==0, y==0, width=texWidth, height=texWidth.  If this is ever
--	 * changed, the src pitch will have to change.
--	 */
--	switch (texFormat->TexelBytes) {
--	case 1:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 2:
--		blit_format = R300_CP_COLOR_FORMAT_RGB565;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 4:
--		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 8:
--	case 16:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	default:
--		return;
--	}
--
--	t->image[0][hwlevel].data = texImage->Data;
--	srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
--
--	assert(srcOffset != ~0);
--
--	/* Don't currently need to cope with small pitches?
--	 */
--	width = texImage->Width;
--	height = texImage->Height;
--
--	if (texFormat->TexelBytes > 4) {
--		width *= texFormat->TexelBytes;
--	}
--
--	r300EmitWait(rmesa, R300_WAIT_3D);
--
--	r300EmitBlit(rmesa, blit_format,
--		     srcPitch,
--		     srcOffset,
--		     dstPitch,
--		     t->bufAddr,
--		     x,
--		     y,
--		     t->image[0][hwlevel].x + x,
--		     t->image[0][hwlevel].y + y, width, height);
--
--	r300EmitWait(rmesa, R300_WAIT_2D);
--}
--
--static void r300UploadRectSubImage(r300ContextPtr rmesa,
--				   r300TexObjPtr t,
--				   struct gl_texture_image *texImage,
--				   GLint x, GLint y, GLint width, GLint height)
--{
--	const struct gl_texture_format *texFormat = texImage->TexFormat;
--	int blit_format, dstPitch, done;
--
--	switch (texFormat->TexelBytes) {
--	case 1:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		break;
--	case 2:
--		blit_format = R300_CP_COLOR_FORMAT_RGB565;
--		break;
--	case 4:
--		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
--		break;
--	case 8:
--	case 16:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		break;
--	default:
--		return;
--	}
--
--	t->image[0][0].data = texImage->Data;
--
--	/* Currently don't need to cope with small pitches.
--	 */
--	width = texImage->Width;
--	height = texImage->Height;
--	dstPitch = t->pitch;
--
--	if (texFormat->TexelBytes > 4) {
--		width *= texFormat->TexelBytes;
--	}
--
--	if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
--		/* In this case, could also use GART texturing.  This is
--		 * currently disabled, but has been tested & works.
--		 */
--		t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
--		t->pitch = texImage->RowStride * texFormat->TexelBytes - 32;
--
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr,
--				"Using GART texturing for rectangular client texture\n");
--
--		/* Release FB memory allocated for this image:
--		 */
--		/* FIXME This may not be correct as driSwapOutTextureObject sets
--		 * FIXME dirty_images.  It may be fine, though.
--		 */
--		if (t->base.memBlock) {
--			driSwapOutTextureObject((driTextureObject *) t);
--		}
--	} else if (texImage->IsClientData) {
--		/* Data already in GART memory, with usable pitch.
--		 */
--		GLuint srcPitch;
--		srcPitch = texImage->RowStride * texFormat->TexelBytes;
--		r300EmitBlit(rmesa,
--			     blit_format,
--			     srcPitch,
--			     r300GartOffsetFromVirtual(rmesa, texImage->Data),
--			     dstPitch, t->bufAddr, 0, 0, 0, 0, width, height);
--	} else {
--		/* Data not in GART memory, or bad pitch.
--		 */
--		for (done = 0; done < height;) {
--			struct r300_dma_region region;
--			int lines =
--			    MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch);
--			int src_pitch;
--			char *tex;
--
--			src_pitch = texImage->RowStride * texFormat->TexelBytes;
--
--			tex = (char *)texImage->Data + done * src_pitch;
--
--			memset(&region, 0, sizeof(region));
--			r300AllocDmaRegion(rmesa, &region, lines * dstPitch,
--					   1024);
--
--			/* Copy texdata to dma:
--			 */
--			if (RADEON_DEBUG & DEBUG_TEXTURE)
--				fprintf(stderr,
--					"%s: src_pitch %d dst_pitch %d\n",
--					__FUNCTION__, src_pitch, dstPitch);
--
--			if (src_pitch == dstPitch) {
--				memcpy(region.address + region.start, tex,
--				       lines * src_pitch);
--			} else {
--				char *buf = region.address + region.start;
--				int i;
--				for (i = 0; i < lines; i++) {
--					memcpy(buf, tex, src_pitch);
--					buf += dstPitch;
--					tex += src_pitch;
--				}
--			}
--
--			r300EmitWait(rmesa, R300_WAIT_3D);
--
--			/* Blit to framebuffer
--			 */
--			r300EmitBlit(rmesa,
--				     blit_format,
--				     dstPitch, GET_START(&region),
--				     dstPitch | (t->tile_bits >> 16),
--				     t->bufAddr, 0, 0, 0, done, width, lines);
--
--			r300EmitWait(rmesa, R300_WAIT_2D);
--#ifdef USER_BUFFERS
--			r300_mem_use(rmesa, region.buf->id);
--#endif
--
--			r300ReleaseDmaRegion(rmesa, &region, __FUNCTION__);
--			done += lines;
--		}
--	}
--}
--
--/**
-- * Upload the texture image associated with texture \a t at the specified
-- * level at the address relative to \a start.
-- */
--static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
--			       GLint hwlevel,
--			       GLint x, GLint y, GLint width, GLint height,
--			       GLuint face)
--{
--	struct gl_texture_image *texImage = NULL;
--	GLuint offset;
--	GLint imageWidth, imageHeight;
--	GLint ret;
--	drm_radeon_texture_t tex;
--	drm_radeon_tex_image_t tmp;
--	const int level = hwlevel + t->base.firstLevel;
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE) {
--		fprintf(stderr,
--			"%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
--			__FUNCTION__, (void *)t, (void *)t->base.tObj, level,
--			width, height, face);
--	}
--
--	ASSERT(face < 6);
--
--	/* Ensure we have a valid texture to upload */
--	if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) {
--		_mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
--		return;
--	}
--
--	texImage = t->base.tObj->Image[face][level];
--
--	if (!texImage) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: texImage %d is NULL!\n",
--				__FUNCTION__, level);
--		return;
--	}
--	if (!texImage->Data) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: image data is NULL!\n",
--				__FUNCTION__);
--		return;
--	}
--
--	if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--		assert(level == 0);
--		assert(hwlevel == 0);
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: image data is rectangular\n",
--				__FUNCTION__);
--		r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height);
--		return;
--	} else if (texImage->IsClientData) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr,
--				"%s: image data is in GART client storage\n",
--				__FUNCTION__);
--		r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y,
--					     width, height);
--		return;
--	} else if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "%s: image data is in normal memory\n",
--			__FUNCTION__);
--
--	imageWidth = texImage->Width;
--	imageHeight = texImage->Height;
--
--	offset = t->bufAddr;
--
--	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
--		GLint imageX = 0;
--		GLint imageY = 0;
--		GLint blitX = t->image[face][hwlevel].x;
--		GLint blitY = t->image[face][hwlevel].y;
--		GLint blitWidth = t->image[face][hwlevel].width;
--		GLint blitHeight = t->image[face][hwlevel].height;
--		fprintf(stderr, "   upload image: %d,%d at %d,%d\n",
--			imageWidth, imageHeight, imageX, imageY);
--		fprintf(stderr, "   upload  blit: %d,%d at %d,%d\n",
--			blitWidth, blitHeight, blitX, blitY);
--		fprintf(stderr, "       blit ofs: 0x%07x level: %d/%d\n",
--			(GLuint) offset, hwlevel, level);
--	}
--
--	t->image[face][hwlevel].data = texImage->Data;
--
--	/* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
--	 * NOTE: we're always use a 1KB-wide blit and I8 texture format.
--	 * We used to use 1, 2 and 4-byte texels and used to use the texture
--	 * width to dictate the blit width - but that won't work for compressed
--	 * textures. (Brian)
--	 * NOTE: can't do that with texture tiling. (sroland)
--	 */
--	tex.offset = offset;
--	tex.image = &tmp;
--	/* copy (x,y,width,height,data) */
--	memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp));
--
--	if (texImage->TexFormat->TexelBytes > 4) {
--		const int log2TexelBytes =
--		    (3 + (texImage->TexFormat->TexelBytes >> 4));
--		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
--		tex.pitch =
--		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
--			 64, 1);
--		tex.height = imageHeight;
--		tex.width = imageWidth << log2TexelBytes;
--		tex.offset += (tmp.x << log2TexelBytes) & ~1023;
--		tmp.x = tmp.x % (1024 >> log2TexelBytes);
--		tmp.width = tmp.width << log2TexelBytes;
--	} else if (texImage->TexFormat->TexelBytes) {
--		/* use multi-byte upload scheme */
--		tex.height = imageHeight;
--		tex.width = imageWidth;
--		switch (texImage->TexFormat->TexelBytes) {
--		case 1:
--			tex.format = RADEON_TXFORMAT_I8;
--			break;
--		case 2:
--			tex.format = RADEON_TXFORMAT_AI88;
--			break;
--		case 4:
--			tex.format = RADEON_TXFORMAT_ARGB8888;
--			break;
--		}
--		tex.pitch =
--		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
--			 64, 1);
--		tex.offset += tmp.x & ~1023;
--		tmp.x = tmp.x % 1024;
--
--		if (t->tile_bits & R300_TXO_MICRO_TILE) {
--			/* need something like "tiled coordinates" ? */
--			tmp.y = tmp.x / (tex.pitch * 128) * 2;
--			tmp.x =
--			    tmp.x % (tex.pitch * 128) / 2 /
--			    texImage->TexFormat->TexelBytes;
--			tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
--		} else {
--			tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
--		}
--#if 1
--		if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
--		    (texImage->Width * texImage->TexFormat->TexelBytes >= 256)
--		    && ((!(t->tile_bits & R300_TXO_MICRO_TILE)
--			 && (texImage->Height >= 8))
--			|| (texImage->Height >= 16))) {
--			/* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
--			   OR if height is smaller than 8 automatically, but if micro tiling is active
--			   the limit is height 16 instead ? */
--			tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
--		}
--#endif
--	} else {
--		/* In case of for instance 8x8 texture (2x2 dxt blocks),
--		   padding after the first two blocks is needed (only
--		   with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
--		/* set tex.height to 1/4 since 1 "macropixel" (dxt-block)
--		   has 4 real pixels. Needed so the kernel module reads
--		   the right amount of data. */
--		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
--		tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
--		tex.height = (imageHeight + 3) / 4;
--		tex.width = (imageWidth + 3) / 4;
--		if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
--			tex.width *= 8;
--		} else {
--			tex.width *= 16;
--		}
--	}
--
--	LOCK_HARDWARE(&rmesa->radeon);
--	do {
--		ret =
--		    drmCommandWriteRead(rmesa->radeon.dri.fd,
--					DRM_RADEON_TEXTURE, &tex,
--					sizeof(drm_radeon_texture_t));
--		if (ret) {
--			if (RADEON_DEBUG & DEBUG_IOCTL)
--				fprintf(stderr,
--					"DRM_RADEON_TEXTURE:  again!\n");
--			usleep(1);
--		}
--	} while (ret == -EAGAIN);
--
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
--	if (ret) {
--		fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret);
--		fprintf(stderr, "   offset=0x%08x\n", offset);
--		fprintf(stderr, "   image width=%d height=%d\n",
--			imageWidth, imageHeight);
--		fprintf(stderr, "    blit width=%d height=%d data=%p\n",
--			t->image[face][hwlevel].width,
--			t->image[face][hwlevel].height,
--			t->image[face][hwlevel].data);
--		_mesa_exit(-1);
--	}
--}
--
--/**
-- * Upload the texture images associated with texture \a t.  This might
-- * require the allocation of texture memory.
-- *
-- * \param rmesa Context pointer
-- * \param t Texture to be uploaded
-- * \param face Cube map face to be uploaded.  Zero for non-cube maps.
-- */
--
--int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
--{
--	const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--	if (t->image_override)
--		return 0;
--
--	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
--		fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
--			(void *)rmesa->radeon.glCtx, (void *)t->base.tObj,
--			t->base.totalSize, t->base.firstLevel,
--			t->base.lastLevel);
--	}
--
--	if (t->base.totalSize == 0)
--		return 0;
--
--	if (RADEON_DEBUG & DEBUG_SYNC) {
--		fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
--		radeonFinish(rmesa->radeon.glCtx);
--	}
--
--	LOCK_HARDWARE(&rmesa->radeon);
--
--	if (t->base.memBlock == NULL) {
--		int heap;
--
--		heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps,
--					  (driTextureObject *) t);
--		if (heap == -1) {
--			UNLOCK_HARDWARE(&rmesa->radeon);
--			return -1;
--		}
--
--		/* Set the base offset of the texture image */
--		t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap]
--		    + t->base.memBlock->ofs;
--		t->offset = t->bufAddr;
--
--		if (!(t->base.tObj->Image[0][0]->IsClientData)) {
--			/* hope it's safe to add that here... */
--			t->offset |= t->tile_bits;
--		}
--	}
--
--	/* Let the world know we've used this memory recently.
--	 */
--	driUpdateTextureLRU((driTextureObject *) t);
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
--	/* Upload any images that are new */
--	if (t->base.dirty_images[face]) {
--		int i;
--		for (i = 0; i < numLevels; i++) {
--			if ((t->base.
--			     dirty_images[face] & (1 <<
--						   (i + t->base.firstLevel))) !=
--			    0) {
--				r300UploadSubImage(rmesa, t, i, 0, 0,
--						   t->image[face][i].width,
--						   t->image[face][i].height,
--						   face);
--			}
--		}
--		t->base.dirty_images[face] = 0;
--	}
--
--	if (RADEON_DEBUG & DEBUG_SYNC) {
--		fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
--		radeonFinish(rmesa->radeon.glCtx);
--	}
--
--	return 0;
--}
-diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
-index e2329f0..25cd4d8 100644
---- a/src/mesa/drivers/dri/r300/r300_texstate.c
-+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
-@@ -47,7 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_context.h"
- #include "r300_state.h"
- #include "r300_ioctl.h"
--#include "radeon_ioctl.h"
-+#include "radeon_mipmap_tree.h"
- #include "r300_tex.h"
- #include "r300_reg.h"
- 
-@@ -143,13 +143,12 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
- 		},
- 	};
- 	const GLuint *format;
--	r300TexObjPtr t;
-+	radeonTexObjPtr t;
- 
- 	if (!tObj)
- 		return;
- 
--	t = (r300TexObjPtr) tObj->DriverData;
--
-+	t = radeon_tex_obj(tObj);
- 
- 	switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
- 	case MESA_FORMAT_Z16:
-@@ -171,13 +170,13 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
- 
- 	switch (tObj->DepthMode) {
- 	case GL_LUMINANCE:
--		t->format = format[0];
-+		t->pp_txformat = format[0];
- 		break;
- 	case GL_INTENSITY:
--		t->format = format[1];
-+		t->pp_txformat = format[1];
- 		break;
- 	case GL_ALPHA:
--		t->format = format[2];
-+		t->pp_txformat = format[2];
- 		break;
- 	default:
- 		/* Error...which should have already been caught by higher
-@@ -190,479 +189,296 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
- 
- 
- /**
-- * Compute sizes and fill in offset and blit information for the given
-- * image (determined by \p face and \p level).
-- *
-- * \param curOffset points to the offset at which the image is to be stored
-- * and is updated by this function according to the size of the image.
-- */
--static void compute_tex_image_offset(
--	struct gl_texture_object *tObj,
--	GLuint face,
--	GLint level,
--	GLint* curOffset)
--{
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--	const struct gl_texture_image* texImage;
--	GLuint blitWidth = R300_BLIT_WIDTH_BYTES;
--	GLuint texelBytes;
--	GLuint size;
--
--	texImage = tObj->Image[0][level + t->base.firstLevel];
--	if (!texImage)
--		return;
--
--	texelBytes = texImage->TexFormat->TexelBytes;
--
--	/* find image size in bytes */
--	if (texImage->IsCompressed) {
--		if ((t->format & R300_TX_FORMAT_DXT1) ==
--			R300_TX_FORMAT_DXT1) {
--			// fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
--			if ((texImage->Width + 3) < 8)	/* width one block */
--				size = texImage->CompressedSize * 4;
--			else if ((texImage->Width + 3) < 16)
--				size = texImage->CompressedSize * 2;
--			else
--				size = texImage->CompressedSize;
--		} else {
--			/* DXT3/5, 16 bytes per block */
--			WARN_ONCE
--				("DXT 3/5 suffers from multitexturing problems!\n");
--			// fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
--			if ((texImage->Width + 3) < 8)
--				size = texImage->CompressedSize * 2;
--			else
--				size = texImage->CompressedSize;
--		}
--	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--		size =
--			((texImage->Width * texelBytes +
--			63) & ~63) * texImage->Height;
--		blitWidth = 64 / texelBytes;
--	} else if (t->tile_bits & R300_TXO_MICRO_TILE) {
--		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
--			though the actual offset may be different (if texture is less than
--			32 bytes width) to the untiled case */
--		int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
--		size =
--			(w * ((texImage->Height + 1) / 2)) *
--			texImage->Depth;
--		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--	} else {
--		int w = (texImage->Width * texelBytes + 31) & ~31;
--		size = w * texImage->Height * texImage->Depth;
--		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--	}
--	assert(size > 0);
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
--			texImage->Width, texImage->Height,
--			texImage->Depth,
--			texImage->TexFormat->TexelBytes,
--			texImage->InternalFormat);
--
--	/* All images are aligned to a 32-byte offset */
--	*curOffset = (*curOffset + 0x1f) & ~0x1f;
--
--	if (texelBytes) {
--		/* fix x and y coords up later together with offset */
--		t->image[face][level].x = *curOffset;
--		t->image[face][level].y = 0;
--		t->image[face][level].width =
--			MIN2(size / texelBytes, blitWidth);
--		t->image[face][level].height =
--			(size / texelBytes) / t->image[face][level].width;
--	} else {
--		t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES;
--		t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES;
--		t->image[face][level].width =
--			MIN2(size, R300_BLIT_WIDTH_BYTES);
--		t->image[face][level].height = size / t->image[face][level].width;
--	}
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr,
--			"level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
--			level, face, texImage->Width, texImage->Height,
--			t->image[face][level].x, t->image[face][level].y,
--			t->image[face][level].width, t->image[face][level].height,
--			size, *curOffset);
--
--	*curOffset += size;
--}
--
--
--
--/**
-- * This function computes the number of bytes of storage needed for
-- * the given texture object (all mipmap levels, all cube faces).
-- * The \c image[face][level].x/y/width/height parameters for upload/blitting
-- * are computed here.  \c filter, \c format, etc. will be set here
-- * too.
-+ * Compute the cached hardware register values for the given texture object.
-  *
-  * \param rmesa Context pointer
-- * \param tObj GL texture object whose images are to be posted to
-- *                 hardware state.
-+ * \param t the r300 texture object
-  */
--static void r300SetTexImages(r300ContextPtr rmesa,
--			     struct gl_texture_object *tObj)
-+static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
- {
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--	const struct gl_texture_image *baseImage =
--	    tObj->Image[0][tObj->BaseLevel];
--	GLint curOffset;
--	GLint i, texelBytes;
--	GLint numLevels;
--	GLint log2Width, log2Height, log2Depth;
--
--	/* Set the hardware texture format
--	 */
-+	const struct gl_texture_image *firstImage;
-+	int firstlevel = t->mt ? t->mt->firstLevel : 0;
-+	    
-+	firstImage = t->base.Image[0][firstlevel];
-+
- 	if (!t->image_override
--	    && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) {
--		if (baseImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
--			r300SetDepthTexMode(tObj);
-+	    && VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
-+		if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
-+			r300SetDepthTexMode(&t->base);
- 		} else {
--			t->format = tx_table[baseImage->TexFormat->MesaFormat].format;
-+			t->pp_txformat = tx_table[firstImage->TexFormat->MesaFormat].format;
- 		}
- 
--		t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter;
-+		t->pp_txfilter |= tx_table[firstImage->TexFormat->MesaFormat].filter;
- 	} else if (!t->image_override) {
- 		_mesa_problem(NULL, "unexpected texture format in %s",
- 			      __FUNCTION__);
- 		return;
- 	}
- 
--	texelBytes = baseImage->TexFormat->TexelBytes;
--
--	/* Compute which mipmap levels we really want to send to the hardware.
--	 */
--	driCalculateTextureFirstLastLevel((driTextureObject *) t);
--	log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
--	log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
--	log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
--
--	numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-+	if (t->image_override)
-+		return;
- 
--	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
-+	t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
-+			| ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)
-+			| ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)
-+			| ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT));
- 
--	/* Calculate mipmap offsets and dimensions for blitting (uploading)
--	 * The idea is that we lay out the mipmap levels within a block of
--	 * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
--	 */
- 	t->tile_bits = 0;
- 
--	/* figure out if this texture is suitable for tiling. */
--#if 0				/* Disabled for now */
--	if (texelBytes) {
--		if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
--		    /* texrect might be able to use micro tiling too in theory? */
--		    (baseImage->Height > 1)) {
--
--			/* allow 32 (bytes) x 1 mip (which will use two times the space
--			   the non-tiled version would use) max if base texture is large enough */
--			if ((numLevels == 1) ||
--			    (((baseImage->Width * texelBytes /
--			       baseImage->Height) <= 32)
--			     && (baseImage->Width * texelBytes > 64))
--			    ||
--			    ((baseImage->Width * texelBytes /
--			      baseImage->Height) <= 16)) {
--				t->tile_bits |= R300_TXO_MICRO_TILE;
--			}
--		}
--
--		if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
--			/* we can set macro tiling even for small textures, they will be untiled anyway */
--			t->tile_bits |= R300_TXO_MACRO_TILE;
--		}
--	}
--#endif
--
--	curOffset = 0;
-+	if (t->base.Target == GL_TEXTURE_CUBE_MAP)
-+		t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP;
-+	if (t->base.Target == GL_TEXTURE_3D)
-+		t->pp_txformat |= R300_TX_FORMAT_3D;
- 
--	if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--		ASSERT(log2Width == log2Height);
--		t->format |= R300_TX_FORMAT_CUBIC_MAP;
- 
--		for(i = 0; i < numLevels; i++) {
--			GLuint face;
--			for(face = 0; face < 6; face++)
--				compute_tex_image_offset(tObj, face, i, &curOffset);
--		}
--	} else {
--		if (tObj->Target == GL_TEXTURE_3D)
--                	t->format |= R300_TX_FORMAT_3D;
--
--		for (i = 0; i < numLevels; i++)
--			compute_tex_image_offset(tObj, 0, i, &curOffset);
--	}
--
--	/* Align the total size of texture memory block.
--	 */
--	t->base.totalSize =
--	    (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
--
--	t->size =
--	    (((tObj->Image[0][t->base.firstLevel]->Width -
--	       1) << R300_TX_WIDTHMASK_SHIFT)
--	     | ((tObj->Image[0][t->base.firstLevel]->Height - 1) <<
--		R300_TX_HEIGHTMASK_SHIFT)
--	     | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) <<
--		R300_TX_DEPTHMASK_SHIFT))
--	    | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
--
--	t->pitch = 0;
--
--	/* Only need to round to nearest 32 for textures, but the blitter
--	 * requires 64-byte aligned pitches, and we may/may not need the
--	 * blitter.   NPOT only!
--	 */
--	if (baseImage->IsCompressed) {
--		t->pitch |=
--		    (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
--	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--		unsigned int align = (64 / texelBytes) - 1;
--		t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width *
--			     texelBytes) + 63) & ~(63);
--		t->size |= R300_TX_SIZE_TXPITCH_EN;
-+	if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
-+		unsigned int align = (64 / t->mt->bpp) - 1;
-+		t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
- 		if (!t->image_override)
--			t->pitch_reg =
--			    (((tObj->Image[0][t->base.firstLevel]->Width) +
--			      align) & ~align) - 1;
--	} else {
--		t->pitch |=
--		    ((tObj->Image[0][t->base.firstLevel]->Width *
--		      texelBytes) + 63) & ~(63);
-+			t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1;
- 	}
- 
- 	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
--	    if (tObj->Image[0][t->base.firstLevel]->Width > 2048)
--		t->pitch_reg |= R500_TXWIDTH_BIT11;
--	    if (tObj->Image[0][t->base.firstLevel]->Height > 2048)
--		t->pitch_reg |= R500_TXHEIGHT_BIT11;
-+	    if (firstImage->Width > 2048)
-+		t->pp_txpitch |= R500_TXWIDTH_BIT11;
-+	    if (firstImage->Height > 2048)
-+		t->pp_txpitch |= R500_TXHEIGHT_BIT11;
- 	}
- }
- 
--/* ================================================================
-- * Texture unit state management
-+/**
-+ * Ensure the given texture is ready for rendering.
-+ *
-+ * Mostly this means populating the texture object's mipmap tree.
-  */
--
--static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit)
-+static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-+	radeonTexObj *t = radeon_tex_obj(texObj);
- 
--	ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
--
--	if (t->base.dirty_images[0]) {
--		R300_FIREVERTICES(rmesa);
-+	if (!radeon_validate_texture_miptree(ctx, texObj))
-+		return GL_FALSE;
- 
--		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock && !t->image_override)
--			return GL_FALSE;
--	}
-+	/* Configure the hardware registers (more precisely, the cached version
-+	 * of the hardware registers). */
-+	setup_hardware_state(rmesa, t);
- 
-+	t->validated = GL_TRUE;
- 	return GL_TRUE;
- }
- 
--static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
-+
-+/**
-+ * Ensure all enabled and complete textures are uploaded along with any buffers being used.
-+ */
-+GLboolean r300ValidateBuffers(GLcontext * ctx)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--
--	ASSERT(tObj->Target == GL_TEXTURE_3D);
--
--	/* r300 does not support mipmaps for 3D textures. */
--	if ((tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR)) {
--		return GL_FALSE;
-+	struct radeon_cs_space_check bos[16];
-+	struct radeon_renderbuffer *rrb;
-+	int num_bo = 0;
-+	int i;
-+	int flushed = 0, ret;
-+again:
-+	num_bo = 0;
-+
-+	rrb = radeon_get_colorbuffer(&rmesa->radeon);
-+	/* color buffer */
-+	if (rrb && rrb->bo) {
-+		bos[num_bo].bo = rrb->bo;
-+		bos[num_bo].read_domains = 0;
-+		bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
-+		bos[num_bo].new_accounted = 0;
-+		num_bo++;
- 	}
- 
--	if (t->base.dirty_images[0]) {
--		R300_FIREVERTICES(rmesa);
--		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock)
--			return GL_FALSE;
-+	/* depth buffer */
-+	rrb = radeon_get_depthbuffer(&rmesa->radeon);
-+	/* color buffer */
-+	if (rrb && rrb->bo) {
-+		bos[num_bo].bo = rrb->bo;
-+		bos[num_bo].read_domains = 0;
-+		bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
-+		bos[num_bo].new_accounted = 0;
-+		num_bo++;
- 	}
-+	
-+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
-+		radeonTexObj *t;
- 
--	return GL_TRUE;
--}
--
--static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--	GLuint face;
--
--	ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
--
--	if (t->base.dirty_images[0] || t->base.dirty_images[1] ||
--	    t->base.dirty_images[2] || t->base.dirty_images[3] ||
--	    t->base.dirty_images[4] || t->base.dirty_images[5]) {
--		/* flush */
--		R300_FIREVERTICES(rmesa);
--		/* layout memory space, once for all faces */
--		r300SetTexImages(rmesa, tObj);
--	}
-+		if (!ctx->Texture.Unit[i]._ReallyEnabled)
-+			continue;
- 
--	/* upload (per face) */
--	for (face = 0; face < 6; face++) {
--		if (t->base.dirty_images[face]) {
--			r300UploadTexImages(rmesa,
--					    (r300TexObjPtr) tObj->DriverData,
--					    face);
-+		if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) {
-+			_mesa_warning(ctx,
-+				      "failed to validate texture for unit %d.\n",
-+				      i);
- 		}
-+		t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
-+		if (t->image_override && t->bo)
-+			bos[num_bo].bo = t->bo;
-+		else if (t->mt->bo)
-+			bos[num_bo].bo = t->mt->bo;
-+		bos[num_bo].read_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
-+		bos[num_bo].write_domain = 0;
-+		bos[num_bo].new_accounted = 0;
-+		num_bo++;
- 	}
- 
--	if (!t->base.memBlock) {
--		/* texmem alloc failed, use s/w fallback */
-+	ret = radeon_cs_space_check(rmesa->radeon.cmdbuf.cs, bos, num_bo);
-+	if (ret == RADEON_CS_SPACE_OP_TO_BIG)
- 		return GL_FALSE;
--	}
--
--	return GL_TRUE;
--}
--
--static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--
--	ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
--
--	if (t->base.dirty_images[0]) {
--		R300_FIREVERTICES(rmesa);
--
--		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock && !t->image_override &&
--		    !rmesa->prefer_gart_client_texturing)
-+	if (ret == RADEON_CS_SPACE_FLUSH) {
-+		r300Flush(ctx);
-+		if (flushed)
- 			return GL_FALSE;
-+		flushed = 1;
-+		goto again;
- 	}
--
- 	return GL_TRUE;
- }
- 
--static GLboolean r300UpdateTexture(GLcontext * ctx, int unit)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--
--	/* Fallback if there's a texture border */
--	if (tObj->Image[0][tObj->BaseLevel]->Border > 0)
--		return GL_FALSE;
--
--	/* Update state if this is a different texture object to last
--	 * time.
--	 */
--	if (rmesa->state.texture.unit[unit].texobj != t) {
--		if (rmesa->state.texture.unit[unit].texobj != NULL) {
--			/* The old texture is no longer bound to this texture unit.
--			 * Mark it as such.
--			 */
--
--			rmesa->state.texture.unit[unit].texobj->base.bound &=
--			    ~(1 << unit);
--		}
--
--		rmesa->state.texture.unit[unit].texobj = t;
--		t->base.bound |= (1 << unit);
--		driUpdateTextureLRU((driTextureObject *) t);	/* XXX: should be locked! */
--	}
--
--	return !t->border_fallback;
--}
--
- void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
- 		      unsigned long long offset, GLint depth, GLuint pitch)
- {
- 	r300ContextPtr rmesa = pDRICtx->driverPrivate;
- 	struct gl_texture_object *tObj =
- 	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
--	r300TexObjPtr t;
-+	radeonTexObjPtr t = radeon_tex_obj(tObj);
- 	uint32_t pitch_val;
- 
- 	if (!tObj)
- 		return;
- 
--	t = (r300TexObjPtr) tObj->DriverData;
--
- 	t->image_override = GL_TRUE;
- 
- 	if (!offset)
- 		return;
- 
--	t->offset = offset;
--	t->pitch_reg &= (1 << 13) -1;
-+	t->bo = NULL;
-+	t->override_offset = offset;
-+	t->pp_txpitch &= (1 << 13) -1;
- 	pitch_val = pitch;
- 
- 	switch (depth) {
- 	case 32:
--		t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
--		t->filter |= tx_table[2].filter;
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
-+		t->pp_txfilter |= tx_table[2].filter;
- 		pitch_val /= 4;
- 		break;
- 	case 24:
- 	default:
--		t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
--		t->filter |= tx_table[4].filter;
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
-+		t->pp_txfilter |= tx_table[4].filter;
- 		pitch_val /= 4;
- 		break;
- 	case 16:
--		t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
--		t->filter |= tx_table[5].filter;
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
-+		t->pp_txfilter |= tx_table[5].filter;
- 		pitch_val /= 2;
- 		break;
- 	}
- 	pitch_val--;
- 
--	t->pitch_reg |= pitch_val;
-+	t->pp_txpitch |= pitch_val;
- }
- 
--static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit)
-+void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
- {
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--
--	if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) {
--		return (r300EnableTextureRect(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
--		return (r300EnableTexture2D(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) {
--		return (r300EnableTexture3D(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) {
--		return (r300EnableTextureCube(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled) {
--		return GL_FALSE;
--	} else {
--		return GL_TRUE;
--	}
--}
-+	struct gl_texture_unit *texUnit;
-+	struct gl_texture_object *texObj;
-+	struct gl_texture_image *texImage;
-+	struct radeon_renderbuffer *rb;
-+	radeon_texture_image *rImage;
-+	radeonContextPtr radeon;
-+	r300ContextPtr rmesa;
-+	GLframebuffer *fb;
-+	radeonTexObjPtr t;
-+	uint32_t pitch_val;
- 
--void r300UpdateTextureState(GLcontext * ctx)
--{
--	int i;
-+	target = GL_TEXTURE_RECTANGLE_ARB;
- 
--	for (i = 0; i < 8; i++) {
--		if (!r300UpdateTextureUnit(ctx, i)) {
--			_mesa_warning(ctx,
--				      "failed to update texture state for unit %d.\n",
--				      i);
--		}
-+	radeon = pDRICtx->driverPrivate;
-+	rmesa = pDRICtx->driverPrivate;
-+
-+	fb = dPriv->driverPrivate;
-+        texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
-+	texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
-+        texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
-+
-+	rImage = get_radeon_texture_image(texImage);
-+	t = radeon_tex_obj(texObj);
-+        if (t == NULL) {
-+    	    return;
-+    	}
-+
-+	radeon_update_renderbuffers(pDRICtx, dPriv);
-+	/* back & depth buffer are useless free them right away */
-+	rb = (void*)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
-+	if (rb && rb->bo) {
-+		radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+	}
-+	rb = (void*)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+	if (rb && rb->bo) {
-+		radeon_bo_unref(rb->bo);
-+		rb->bo = NULL;
-+	}
-+	rb = (void*)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+	if (rb->bo == NULL) {
-+		/* Failed to BO for the buffer */
-+		return;
-+	}
-+	
-+	_mesa_lock_texture(radeon->glCtx, texObj);
-+	if (t->bo) {
-+		t->bo = NULL;
- 	}
-+	if (t->mt) {
-+		t->mt = NULL;
-+	}
-+	if (rImage->mt) {
-+		radeon_miptree_unreference(rImage->mt);
-+		rImage->mt = NULL;
-+	}
-+	fprintf(stderr,"settexbuf %dx%d@%d\n", rb->width, rb->height, rb->cpp);
-+	_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
-+				   rb->width, rb->height, 1, 0, rb->cpp);
-+	texImage->TexFormat = &_mesa_texformat_rgba8888_rev;
-+	rImage->bo = rb->bo;
-+	
-+	t->bo = rb->bo;
-+	radeon_bo_ref(t->bo);
-+	t->tile_bits = 0;
-+	t->image_override = GL_TRUE;
-+	t->override_offset = 0;
-+	t->pp_txpitch &= (1 << 13) -1;
-+	pitch_val = rb->pitch;
-+	switch (rb->cpp) {
-+	case 4:
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
-+		t->pp_txfilter |= tx_table[2].filter;
-+		pitch_val /= 4;
-+		break;
-+	case 3:
-+	default:
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
-+		t->pp_txfilter |= tx_table[4].filter;
-+		pitch_val /= 4;
-+		break;
-+	case 2:
-+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
-+		t->pp_txfilter |= tx_table[5].filter;
-+		pitch_val /= 2;
-+		break;
-+	}
-+	pitch_val--;
-+	t->pp_txsize = ((rb->width - 1) << R300_TX_WIDTHMASK_SHIFT) |
-+              ((rb->height - 1) << R300_TX_HEIGHTMASK_SHIFT);
-+	t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
-+	t->pp_txpitch |= pitch_val;
-+	t->validated = GL_TRUE;
-+	_mesa_unlock_texture(radeon->glCtx, texObj);
-+	return;
- }
-diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
-index 75dae86..926ddd5 100644
---- a/src/mesa/drivers/dri/r300/r500_fragprog.c
-+++ b/src/mesa/drivers/dri/r300/r500_fragprog.c
-@@ -31,6 +31,12 @@
- #include "radeon_program_alu.h"
- 
- 
-+static void reset_srcreg(struct prog_src_register* reg)
-+{
-+	_mesa_bzero(reg, sizeof(*reg));
-+	reg->Swizzle = SWIZZLE_NOOP;
-+}
-+
- static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
- {
- 	gl_state_index fail_value_tokens[STATE_LENGTH] = {
-@@ -99,6 +105,19 @@ static GLboolean transform_TEX(
- 		destredirect = GL_TRUE;
- 	}
- 
-+	if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
-+		int tmpreg = radeonFindFreeTemporary(t);
-+		tgt = radeonAppendInstructions(t->Program, 1);
-+		tgt->Opcode = OPCODE_MOV;
-+		tgt->DstReg.File = PROGRAM_TEMPORARY;
-+		tgt->DstReg.Index = tmpreg;
-+		tgt->SrcReg[0] = inst.SrcReg[0];
-+
-+		reset_srcreg(&inst.SrcReg[0]);
-+		inst.SrcReg[0].File = PROGRAM_TEMPORARY;
-+		inst.SrcReg[0].Index = tmpreg;
-+	}
-+
- 	tgt = radeonAppendInstructions(t->Program, 1);
- 	_mesa_copy_instructions(tgt, &inst, 1);
- 
-diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c
-deleted file mode 100644
-index 5267fe9..0000000
---- a/src/mesa/drivers/dri/r300/radeon_context.c
-+++ /dev/null
-@@ -1,330 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/**
-- * \file radeon_context.c
-- * Common context initialization.
-- *
-- * \author Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#include <dlfcn.h>
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/context.h"
--#include "main/state.h"
--#include "main/matrix.h"
--#include "main/framebuffer.h"
--
--#include "drivers/common/driverfuncs.h"
--#include "swrast/swrast.h"
--
--#include "radeon_screen.h"
--#include "radeon_ioctl.h"
--#include "radeon_macros.h"
--#include "radeon_reg.h"
--
--#include "radeon_state.h"
--#include "r300_state.h"
--
--#include "utils.h"
--#include "vblank.h"
--#include "xmlpool.h"		/* for symbolic values of enum-type options */
--
--#define DRIVER_DATE "20060815"
--
--
--/* Return various strings for glGetString().
-- */
--static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
--{
--	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
--	static char buffer[128];
--
--	switch (name) {
--	case GL_VENDOR:
--		if (IS_R300_CLASS(radeon->radeonScreen))
--			return (GLubyte *) "DRI R300 Project";
--		else
--			return (GLubyte *) "Tungsten Graphics, Inc.";
--
--	case GL_RENDERER:
--	{
--		unsigned offset;
--		GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
--			radeon->radeonScreen->AGPMode;
--		const char* chipname;
--
--		if (IS_R300_CLASS(radeon->radeonScreen))
--			chipname = "R300";
--		else
--			chipname = "R200";
--
--		offset = driGetRendererString(buffer, chipname, DRIVER_DATE,
--					      agp_mode);
--
--		if (IS_R300_CLASS(radeon->radeonScreen)) {
--		sprintf(&buffer[offset], " %sTCL",
--			(radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
--			? "" : "NO-");
--		} else {
--			sprintf(&buffer[offset], " %sTCL",
--			!(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
--			? "" : "NO-");
--		}
--
--		return (GLubyte *) buffer;
--	}
--
--	default:
--		return NULL;
--	}
--}
--
--/* Initialize the driver's misc functions.
-- */
--static void radeonInitDriverFuncs(struct dd_function_table *functions)
--{
--	functions->GetString = radeonGetString;
--}
--
--
--/**
-- * Create and initialize all common fields of the context,
-- * including the Mesa context itself.
-- */
--GLboolean radeonInitContext(radeonContextPtr radeon,
--			    struct dd_function_table* functions,
--			    const __GLcontextModes * glVisual,
--			    __DRIcontextPrivate * driContextPriv,
--			    void *sharedContextPrivate)
--{
--	__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
--	radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
--	GLcontext* ctx;
--	GLcontext* shareCtx;
--	int fthrottle_mode;
--
--	/* Fill in additional standard functions. */
--	radeonInitDriverFuncs(functions);
--
--	radeon->radeonScreen = screen;
--	/* Allocate and initialize the Mesa context */
--	if (sharedContextPrivate)
--		shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
--	else
--		shareCtx = NULL;
--	radeon->glCtx = _mesa_create_context(glVisual, shareCtx,
--					    functions, (void *)radeon);
--	if (!radeon->glCtx)
--		return GL_FALSE;
--
--	ctx = radeon->glCtx;
--	driContextPriv->driverPrivate = radeon;
--
--	/* DRI fields */
--	radeon->dri.context = driContextPriv;
--	radeon->dri.screen = sPriv;
--	radeon->dri.drawable = NULL;
--	radeon->dri.readable = NULL;
--	radeon->dri.hwContext = driContextPriv->hHWContext;
--	radeon->dri.hwLock = &sPriv->pSAREA->lock;
--	radeon->dri.fd = sPriv->fd;
--	radeon->dri.drmMinor = sPriv->drm_version.minor;
--
--	radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
--					       screen->sarea_priv_offset);
--
--	/* Setup IRQs */
--	fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
--	radeon->iw.irq_seq = -1;
--	radeon->irqsEmitted = 0;
--	radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
--			  radeon->radeonScreen->irq);
--
--	radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
--
--	if (!radeon->do_irqs)
--		fprintf(stderr,
--			"IRQ's not enabled, falling back to %s: %d %d\n",
--			radeon->do_usleeps ? "usleeps" : "busy waits",
--			fthrottle_mode, radeon->radeonScreen->irq);
--
--	(*sPriv->systemTime->getUST) (&radeon->swap_ust);
--
--	return GL_TRUE;
--}
--
--
--/**
-- * Cleanup common context fields.
-- * Called by r200DestroyContext/r300DestroyContext
-- */
--void radeonCleanupContext(radeonContextPtr radeon)
--{
--	/* _mesa_destroy_context() might result in calls to functions that
--	 * depend on the DriverCtx, so don't set it to NULL before.
--	 *
--	 * radeon->glCtx->DriverCtx = NULL;
--	 */
--
--	/* free the Mesa context */
--	_mesa_destroy_context(radeon->glCtx);
--
--	if (radeon->state.scissor.pClipRects) {
--		FREE(radeon->state.scissor.pClipRects);
--		radeon->state.scissor.pClipRects = 0;
--	}
--}
--
--
--/**
-- * Swap front and back buffer.
-- */
--void radeonSwapBuffers(__DRIdrawablePrivate * dPriv)
--{
--	if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--		radeonContextPtr radeon;
--		GLcontext *ctx;
--
--		radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--		ctx = radeon->glCtx;
--
--		if (ctx->Visual.doubleBufferMode) {
--			_mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
--			if (radeon->doPageFlip) {
--				radeonPageFlip(dPriv);
--			} else {
--			    radeonCopyBuffer(dPriv, NULL);
--			}
--		}
--	} else {
--		/* XXX this shouldn't be an error but we can't handle it for now */
--		_mesa_problem(NULL, "%s: drawable has no context!",
--			      __FUNCTION__);
--	}
--}
--
--void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
--			 int x, int y, int w, int h )
--{
--    if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--	radeonContextPtr radeon;
--	GLcontext *ctx;
--
--	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--	ctx = radeon->glCtx;
--
--	if (ctx->Visual.doubleBufferMode) {
--	    drm_clip_rect_t rect;
--	    rect.x1 = x + dPriv->x;
--	    rect.y1 = (dPriv->h - y - h) + dPriv->y;
--	    rect.x2 = rect.x1 + w;
--	    rect.y2 = rect.y1 + h;
--	    _mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
--	    radeonCopyBuffer(dPriv, &rect);
--	}
--    } else {
--	/* XXX this shouldn't be an error but we can't handle it for now */
--	_mesa_problem(NULL, "%s: drawable has no context!",
--		      __FUNCTION__);
--    }
--}
--
--/* Force the context `c' to be the current context and associate with it
-- * buffer `b'.
-- */
--GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
--			    __DRIdrawablePrivate * driDrawPriv,
--			    __DRIdrawablePrivate * driReadPriv)
--{
--	if (driContextPriv) {
--		radeonContextPtr radeon =
--			(radeonContextPtr) driContextPriv->driverPrivate;
--
--		if (RADEON_DEBUG & DEBUG_DRI)
--			fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
--				radeon->glCtx);
--
--		if (radeon->dri.drawable != driDrawPriv) {
--			if (driDrawPriv->swap_interval == (unsigned)-1) {
--				driDrawPriv->vblFlags =
--					(radeon->radeonScreen->irq != 0)
--					? driGetDefaultVBlankFlags(&radeon->
--								   optionCache)
--					: VBLANK_FLAG_NO_IRQ;
--
--				driDrawableInitVBlank(driDrawPriv);
--			}
--		}
--
--		radeon->dri.readable = driReadPriv;
--
--		if (radeon->dri.drawable != driDrawPriv ||
--		    radeon->lastStamp != driDrawPriv->lastStamp) {
--			radeon->dri.drawable = driDrawPriv;
--
--			radeonSetCliprects(radeon);
--			r300UpdateViewportOffset(radeon->glCtx);
--		}
--
--		_mesa_make_current(radeon->glCtx,
--				    (GLframebuffer *) driDrawPriv->
--				    driverPrivate,
--				    (GLframebuffer *) driReadPriv->
--				    driverPrivate);
--
--		_mesa_update_state(radeon->glCtx);		
--
--		radeonUpdatePageFlipping(radeon);
--	} else {
--		if (RADEON_DEBUG & DEBUG_DRI)
--			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
--		_mesa_make_current(0, 0, 0);
--	}
--
--	if (RADEON_DEBUG & DEBUG_DRI)
--		fprintf(stderr, "End %s\n", __FUNCTION__);
--	return GL_TRUE;
--}
--
--/* Force the context `c' to be unbound from its buffer.
-- */
--GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv)
--{
--	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
--
--	if (RADEON_DEBUG & DEBUG_DRI)
--		fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
--			radeon->glCtx);
--
--	return GL_TRUE;
--}
--
-diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h
-index 47cbc22..250570f 100644
---- a/src/mesa/drivers/dri/r300/radeon_context.h
-+++ b/src/mesa/drivers/dri/r300/radeon_context.h
-@@ -49,20 +49,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "drm.h"
- #include "dri_util.h"
- 
--struct radeon_context;
--typedef struct radeon_context radeonContextRec;
--typedef struct radeon_context *radeonContextPtr;
--
--/* Rasterizing fallbacks */
--/* See correponding strings in r200_swtcl.c */
--#define RADEON_FALLBACK_TEXTURE		0x0001
--#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
--#define RADEON_FALLBACK_STENCIL		0x0004
--#define RADEON_FALLBACK_RENDER_MODE	0x0008
--#define RADEON_FALLBACK_BLEND_EQ	0x0010
--#define RADEON_FALLBACK_BLEND_FUNC	0x0020
--#define RADEON_FALLBACK_DISABLE		0x0040
--#define RADEON_FALLBACK_BORDER_MODE	0x0080
-+#include "radeon_screen.h"
- 
- #if R200_MERGED
- extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
-@@ -79,155 +66,11 @@ extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
- /* TCL fallbacks */
- extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
- 
--#define RADEON_TCL_FALLBACK_RASTER		0x0001	/* rasterization */
--#define RADEON_TCL_FALLBACK_UNFILLED		0x0002	/* unfilled tris */
--#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE	0x0004	/* twoside tris */
--#define RADEON_TCL_FALLBACK_MATERIAL		0x0008	/* material in vb */
--#define RADEON_TCL_FALLBACK_TEXGEN_0		0x0010	/* texgen, unit 0 */
--#define RADEON_TCL_FALLBACK_TEXGEN_1		0x0020	/* texgen, unit 1 */
--#define RADEON_TCL_FALLBACK_TEXGEN_2		0x0040	/* texgen, unit 2 */
--#define RADEON_TCL_FALLBACK_TEXGEN_3		0x0080	/* texgen, unit 3 */
--#define RADEON_TCL_FALLBACK_TEXGEN_4		0x0100	/* texgen, unit 4 */
--#define RADEON_TCL_FALLBACK_TEXGEN_5		0x0200	/* texgen, unit 5 */
--#define RADEON_TCL_FALLBACK_TCL_DISABLE		0x0400	/* user disable */
--#define RADEON_TCL_FALLBACK_BITMAP		0x0800	/* draw bitmap with points */
--#define RADEON_TCL_FALLBACK_VERTEX_PROGRAM	0x1000	/* vertex program active */
--
- #if R200_MERGED
- #define TCL_FALLBACK( ctx, bit, mode )	radeonTclFallback( ctx, bit, mode )
- #else
- #define TCL_FALLBACK( ctx, bit, mode )	;
- #endif
- 
--struct radeon_dri_mirror {
--	__DRIcontextPrivate *context;	/* DRI context */
--	__DRIscreenPrivate *screen;	/* DRI screen */
--	/**
--	 * DRI drawable bound to this context for drawing.
--	 */
--	__DRIdrawablePrivate *drawable;
--
--	/**
--	 * DRI drawable bound to this context for reading.
--	 */
--	__DRIdrawablePrivate *readable;
--
--	drm_context_t hwContext;
--	drm_hw_lock_t *hwLock;
--	int fd;
--	int drmMinor;
--};
--
--/**
-- * Derived state for internal purposes.
-- */
--struct radeon_scissor_state {
--	drm_clip_rect_t rect;
--	GLboolean enabled;
--
--	GLuint numClipRects;	/* Cliprects active */
--	GLuint numAllocedClipRects;	/* Cliprects available */
--	drm_clip_rect_t *pClipRects;
--};
--
--struct radeon_colorbuffer_state {
--	GLuint clear;
--	GLint drawOffset, drawPitch;
--};
--
--struct radeon_state {
--	struct radeon_colorbuffer_state color;
--	struct radeon_scissor_state scissor;
--};
--
--/**
-- * Common per-context variables shared by R200 and R300.
-- * R200- and R300-specific code "derive" their own context from this
-- * structure.
-- */
--struct radeon_context {
--	GLcontext *glCtx;	/* Mesa context */
--	radeonScreenPtr radeonScreen;	/* Screen private DRI data */
--
--	/* Fallback state */
--	GLuint Fallback;
--	GLuint TclFallback;
--
--	/* Page flipping */
--	GLuint doPageFlip;
--
--	/* Drawable, cliprect and scissor information */
--	GLuint numClipRects;	/* Cliprects for the draw buffer */
--	drm_clip_rect_t *pClipRects;
--	unsigned int lastStamp;
--	GLboolean lost_context;
--	drm_radeon_sarea_t *sarea;	/* Private SAREA data */
--
--	/* Mirrors of some DRI state */
--	struct radeon_dri_mirror dri;
--
--	/* Busy waiting */
--	GLuint do_usleeps;
--	GLuint do_irqs;
--	GLuint irqsEmitted;
--	drm_radeon_irq_wait_t iw;
--
--	/* buffer swap */
--	int64_t swap_ust;
--	int64_t swap_missed_ust;
--
--	GLuint swap_count;
--	GLuint swap_missed_count;
--
--	/* Derived state */
--	struct radeon_state state;
--
--	/* Configuration cache
--	 */
--	driOptionCache optionCache;
--};
--
--#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
--
--extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
--extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
--				int x, int y, int w, int h);
--extern GLboolean radeonInitContext(radeonContextPtr radeon,
--				   struct dd_function_table *functions,
--				   const __GLcontextModes * glVisual,
--				   __DRIcontextPrivate * driContextPriv,
--				   void *sharedContextPrivate);
--extern void radeonCleanupContext(radeonContextPtr radeon);
--extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
--				   __DRIdrawablePrivate * driDrawPriv,
--				   __DRIdrawablePrivate * driReadPriv);
--extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
--
--/* ================================================================
-- * Debugging:
-- */
--#define DO_DEBUG		1
--
--#if DO_DEBUG
--extern int RADEON_DEBUG;
--#else
--#define RADEON_DEBUG		0
--#endif
--
--#define DEBUG_TEXTURE	0x0001
--#define DEBUG_STATE	0x0002
--#define DEBUG_IOCTL	0x0004
--#define DEBUG_PRIMS	0x0008
--#define DEBUG_VERTS	0x0010
--#define DEBUG_FALLBACKS	0x0020
--#define DEBUG_VFMT	0x0040
--#define DEBUG_CODEGEN	0x0080
--#define DEBUG_VERBOSE	0x0100
--#define DEBUG_DRI       0x0200
--#define DEBUG_DMA       0x0400
--#define DEBUG_SANITY    0x0800
--#define DEBUG_SYNC      0x1000
--#define DEBUG_PIXEL     0x2000
--#define DEBUG_MEMORY    0x4000
- 
- #endif				/* __RADEON_CONTEXT_H__ */
-diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c
-deleted file mode 100644
-index 36502eb..0000000
---- a/src/mesa/drivers/dri/r300/radeon_ioctl.c
-+++ /dev/null
-@@ -1,396 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#include <sched.h>
--#include <errno.h>
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/macros.h"
--#include "main/context.h"
--#include "swrast/swrast.h"
--#include "r300_context.h"
--#include "radeon_ioctl.h"
--#include "r300_ioctl.h"
--#include "r300_state.h"
--#include "radeon_reg.h"
--
--#include "drirenderbuffer.h"
--#include "vblank.h"
--
--static void radeonWaitForIdle(radeonContextPtr radeon);
--
--/* ================================================================
-- * SwapBuffers with client-side throttling
-- */
--
--static uint32_t radeonGetLastFrame(radeonContextPtr radeon)
--{
--	drm_radeon_getparam_t gp;
--	int ret;
--	uint32_t frame;
--
--	gp.param = RADEON_PARAM_LAST_FRAME;
--	gp.value = (int *)&frame;
--	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
--				  &gp, sizeof(gp));
--	if (ret) {
--		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
--			ret);
--		exit(1);
--	}
--
--	return frame;
--}
--
--uint32_t radeonGetAge(radeonContextPtr radeon)
--{
--	drm_radeon_getparam_t gp;
--	int ret;
--	uint32_t age;
--
--	gp.param = RADEON_PARAM_LAST_CLEAR;
--	gp.value = (int *)&age;
--	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
--				  &gp, sizeof(gp));
--	if (ret) {
--		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
--			ret);
--		exit(1);
--	}
--
--	return age;
--}
--
--static void radeonEmitIrqLocked(radeonContextPtr radeon)
--{
--	drm_radeon_irq_emit_t ie;
--	int ret;
--
--	ie.irq_seq = &radeon->iw.irq_seq;
--	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT,
--				  &ie, sizeof(ie));
--	if (ret) {
--		fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__,
--			ret);
--		exit(1);
--	}
--}
--
--static void radeonWaitIrq(radeonContextPtr radeon)
--{
--	int ret;
--
--	do {
--		ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT,
--				      &radeon->iw, sizeof(radeon->iw));
--	} while (ret && (errno == EINTR || errno == EBUSY));
--
--	if (ret) {
--		fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__,
--			ret);
--		exit(1);
--	}
--}
--
--static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
--{
--	drm_radeon_sarea_t *sarea = radeon->sarea;
--
--	if (radeon->do_irqs) {
--		if (radeonGetLastFrame(radeon) < sarea->last_frame) {
--			if (!radeon->irqsEmitted) {
--				while (radeonGetLastFrame(radeon) <
--				       sarea->last_frame) ;
--			} else {
--				UNLOCK_HARDWARE(radeon);
--				radeonWaitIrq(radeon);
--				LOCK_HARDWARE(radeon);
--			}
--			radeon->irqsEmitted = 10;
--		}
--
--		if (radeon->irqsEmitted) {
--			radeonEmitIrqLocked(radeon);
--			radeon->irqsEmitted--;
--		}
--	} else {
--		while (radeonGetLastFrame(radeon) < sarea->last_frame) {
--			UNLOCK_HARDWARE(radeon);
--			if (radeon->do_usleeps)
--				DO_USLEEP(1);
--			LOCK_HARDWARE(radeon);
--		}
--	}
--}
--
--/* Copy the back color buffer to the front color buffer.
-- */
--void radeonCopyBuffer(__DRIdrawablePrivate * dPriv,
--		      const drm_clip_rect_t	 * rect)
--{
--	radeonContextPtr radeon;
--	GLint nbox, i, ret;
--	GLboolean missed_target;
--	int64_t ust;
--	__DRIscreenPrivate *psp = dPriv->driScreenPriv;
--
--	assert(dPriv);
--	assert(dPriv->driContextPriv);
--	assert(dPriv->driContextPriv->driverPrivate);
--
--	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL) {
--		fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__,
--			(void *)radeon->glCtx);
--	}
--
--	r300Flush(radeon->glCtx);
--
--	LOCK_HARDWARE(radeon);
--
--	/* Throttle the frame rate -- only allow one pending swap buffers
--	 * request at a time.
--	 */
--	radeonWaitForFrameCompletion(radeon);
--	if (!rect)
--	{
--	    UNLOCK_HARDWARE(radeon);
--	    driWaitForVBlank(dPriv, &missed_target);
--	    LOCK_HARDWARE(radeon);
--	}
--
--	nbox = dPriv->numClipRects;	/* must be in locked region */
--
--	for (i = 0; i < nbox;) {
--		GLint nr = MIN2(i + RADEON_NR_SAREA_CLIPRECTS, nbox);
--		drm_clip_rect_t *box = dPriv->pClipRects;
--		drm_clip_rect_t *b = radeon->sarea->boxes;
--		GLint n = 0;
--
--		for ( ; i < nr ; i++ ) {
--
--		    *b = box[i];
--
--		    if (rect)
--		    {
--			if (rect->x1 > b->x1)
--			    b->x1 = rect->x1;
--			if (rect->y1 > b->y1)
--			    b->y1 = rect->y1;
--			if (rect->x2 < b->x2)
--			    b->x2 = rect->x2;
--			if (rect->y2 < b->y2)
--			    b->y2 = rect->y2;
--
--			if (b->x1 >= b->x2 || b->y1 >= b->y2)
--			    continue;
--		    }
--
--		    b++;
--		    n++;
--		}
--		radeon->sarea->nbox = n;
--
--		if (!n)
--		   continue;
--
--		ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP);
--
--		if (ret) {
--			fprintf(stderr, "DRM_RADEON_SWAP: return = %d\n",
--				ret);
--			UNLOCK_HARDWARE(radeon);
--			exit(1);
--		}
--	}
--
--	UNLOCK_HARDWARE(radeon);
--	if (!rect)
--	{
--	    ((r300ContextPtr)radeon)->hw.all_dirty = GL_TRUE;
--
--	    radeon->swap_count++;
--	    (*psp->systemTime->getUST) (&ust);
--	    if (missed_target) {
--		radeon->swap_missed_count++;
--		radeon->swap_missed_ust = ust - radeon->swap_ust;
--	    }
--
--	    radeon->swap_ust = ust;
--
--	    sched_yield();
--	}
--}
--
--void radeonPageFlip(__DRIdrawablePrivate * dPriv)
--{
--	radeonContextPtr radeon;
--	GLint ret;
--	GLboolean missed_target;
--	__DRIscreenPrivate *psp = dPriv->driScreenPriv;
--
--	assert(dPriv);
--	assert(dPriv->driContextPriv);
--	assert(dPriv->driContextPriv->driverPrivate);
--
--	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL) {
--		fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
--			radeon->sarea->pfCurrentPage);
--	}
--
--	r300Flush(radeon->glCtx);
--	LOCK_HARDWARE(radeon);
--
--	if (!dPriv->numClipRects) {
--		UNLOCK_HARDWARE(radeon);
--		usleep(10000);	/* throttle invisible client 10ms */
--		return;
--	}
--
--	/* Need to do this for the perf box placement:
--	 */
--	{
--		drm_clip_rect_t *box = dPriv->pClipRects;
--		drm_clip_rect_t *b = radeon->sarea->boxes;
--		b[0] = box[0];
--		radeon->sarea->nbox = 1;
--	}
--
--	/* Throttle the frame rate -- only allow a few pending swap buffers
--	 * request at a time.
--	 */
--	radeonWaitForFrameCompletion(radeon);
--	UNLOCK_HARDWARE(radeon);
--	driWaitForVBlank(dPriv, &missed_target);
--	if (missed_target) {
--		radeon->swap_missed_count++;
--		(void)(*psp->systemTime->getUST) (&radeon->swap_missed_ust);
--	}
--	LOCK_HARDWARE(radeon);
--
--	ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_FLIP);
--
--	UNLOCK_HARDWARE(radeon);
--
--	if (ret) {
--		fprintf(stderr, "DRM_RADEON_FLIP: return = %d\n", ret);
--		exit(1);
--	}
--
--	radeon->swap_count++;
--	(void)(*psp->systemTime->getUST) (&radeon->swap_ust);
--
--        driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, 
--                             radeon->sarea->pfCurrentPage);
--
--	if (radeon->sarea->pfCurrentPage == 1) {
--		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
--	} else {
--		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
--	}
--
--	if (IS_R300_CLASS(radeon->radeonScreen)) {
--		r300ContextPtr r300 = (r300ContextPtr)radeon;
--		R300_STATECHANGE(r300, cb);
--		r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + 
--						r300->radeon.radeonScreen->fbLocation;
--		r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
--		
--		if (r300->radeon.radeonScreen->cpp == 4)
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--		else
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--	
--		if (r300->radeon.sarea->tiling_enabled)
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
--	}
--}
--
--void radeonWaitForIdleLocked(radeonContextPtr radeon)
--{
--	int ret;
--	int i = 0;
--
--	do {
--		ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE);
--		if (ret)
--			DO_USLEEP(1);
--	} while (ret && ++i < 100);
--
--	if (ret < 0) {
--		UNLOCK_HARDWARE(radeon);
--		fprintf(stderr, "Error: R300 timed out... exiting\n");
--		exit(-1);
--	}
--}
--
--static void radeonWaitForIdle(radeonContextPtr radeon)
--{
--	LOCK_HARDWARE(radeon);
--	radeonWaitForIdleLocked(radeon);
--	UNLOCK_HARDWARE(radeon);
--}
--
--void radeonFlush(GLcontext * ctx)
--{
--	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
--
--	if (IS_R300_CLASS(radeon->radeonScreen))
--		r300Flush(ctx);
--}
--
--
--/* Make sure all commands have been sent to the hardware and have
-- * completed processing.
-- */
--void radeonFinish(GLcontext * ctx)
--{
--	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
--
--	radeonFlush(ctx);
--
--	if (radeon->do_irqs) {
--		LOCK_HARDWARE(radeon);
--		radeonEmitIrqLocked(radeon);
--		UNLOCK_HARDWARE(radeon);
--		radeonWaitIrq(radeon);
--	} else
--		radeonWaitForIdle(radeon);
--}
-diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.h b/src/mesa/drivers/dri/r300/radeon_ioctl.h
-deleted file mode 100644
-index 3add775..0000000
---- a/src/mesa/drivers/dri/r300/radeon_ioctl.h
-+++ /dev/null
-@@ -1,57 +0,0 @@
--/*
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#ifndef __RADEON_IOCTL_H__
--#define __RADEON_IOCTL_H__
--
--#include "main/simple_list.h"
--#include "radeon_dri.h"
--#include "radeon_lock.h"
--
--#include "xf86drm.h"
--#include "drm.h"
--#if 0
--#include "r200context.h"
--#endif
--#include "radeon_drm.h"
--
--extern void radeonCopyBuffer(__DRIdrawablePrivate * drawable,
--			     const drm_clip_rect_t	* rect);
--extern void radeonPageFlip(__DRIdrawablePrivate * drawable);
--extern void radeonFlush(GLcontext * ctx);
--extern void radeonFinish(GLcontext * ctx);
--extern void radeonWaitForIdleLocked(radeonContextPtr radeon);
--extern uint32_t radeonGetAge(radeonContextPtr radeon);
--
--#endif				/* __RADEON_IOCTL_H__ */
-diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
-deleted file mode 100644
-index 4f47afd..0000000
---- a/src/mesa/drivers/dri/r300/radeon_lock.c
-+++ /dev/null
-@@ -1,137 +0,0 @@
--/**************************************************************************
--
--Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
--                     VA Linux Systems Inc., Fremont, California.
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Gareth Hughes <gareth@valinux.com>
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- *   Kevin E. Martin <martin@valinux.com>
-- */
--
--#include "radeon_lock.h"
--#include "radeon_ioctl.h"
--#include "radeon_state.h"
--#include "r300_context.h"
--#include "r300_state.h"
--
--#include "main/framebuffer.h"
--
--#include "drirenderbuffer.h"
--
--#if DEBUG_LOCKING
--char *prevLockFile = NULL;
--int prevLockLine = 0;
--#endif
--
--/* Turn on/off page flipping according to the flags in the sarea:
-- */
--void radeonUpdatePageFlipping(radeonContextPtr rmesa)
--{
--	int use_back;
--
--	rmesa->doPageFlip = rmesa->sarea->pfState;
--	if (rmesa->glCtx->WinSysDrawBuffer) {
--		driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
--				     rmesa->sarea->pfCurrentPage);
--		r300UpdateDrawBuffer(rmesa->glCtx);
--	}
--
--	use_back = rmesa->glCtx->DrawBuffer ?
--	    (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] ==
--	     BUFFER_BACK_LEFT) : 1;
--	use_back ^= (rmesa->sarea->pfCurrentPage == 1);
--
--	if (use_back) {
--		rmesa->state.color.drawOffset =
--		    rmesa->radeonScreen->backOffset;
--		rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch;
--	} else {
--		rmesa->state.color.drawOffset =
--		    rmesa->radeonScreen->frontOffset;
--		rmesa->state.color.drawPitch =
--		    rmesa->radeonScreen->frontPitch;
--	}
--}
--
--/* Update the hardware state.  This is called if another context has
-- * grabbed the hardware lock, which includes the X server.  This
-- * function also updates the driver's window state after the X server
-- * moves, resizes or restacks a window -- the change will be reflected
-- * in the drawable position and clip rects.  Since the X server grabs
-- * the hardware lock when it changes the window state, this routine will
-- * automatically be called after such a change.
-- */
--void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
--{
--	__DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
--	__DRIdrawablePrivate *const readable = rmesa->dri.readable;
--	__DRIscreenPrivate *sPriv = rmesa->dri.screen;
--	drm_radeon_sarea_t *sarea = rmesa->sarea;
--	r300ContextPtr r300 = (r300ContextPtr) rmesa;
--
--	assert(drawable != NULL);
--
--	drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
--
--	/* The window might have moved, so we might need to get new clip
--	 * rects.
--	 *
--	 * NOTE: This releases and regrabs the hw lock to allow the X server
--	 * to respond to the DRI protocol request for new drawable info.
--	 * Since the hardware state depends on having the latest drawable
--	 * clip rects, all state checking must be done _after_ this call.
--	 */
--	DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
--	if (drawable != readable) {
--		DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable);
--	}
--
--	if (rmesa->lastStamp != drawable->lastStamp) {
--		radeonUpdatePageFlipping(rmesa);
--		radeonSetCliprects(rmesa);
--		r300UpdateViewportOffset(rmesa->glCtx);
--		driUpdateFramebufferSize(rmesa->glCtx, drawable);
--	}
--
--	if (sarea->ctx_owner != rmesa->dri.hwContext) {
--		int i;
--
--		sarea->ctx_owner = rmesa->dri.hwContext;
--		for (i = 0; i < r300->nr_heaps; i++) {
--			DRI_AGE_TEXTURES(r300->texture_heaps[i]);
--		}
--	}
--
--	rmesa->lost_context = GL_TRUE;
--}
-diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h
-deleted file mode 100644
-index a344837..64bdf94
---- a/src/mesa/drivers/dri/r300/radeon_lock.h
-+++ /dev/null
-@@ -1,115 +0,0 @@
--/**************************************************************************
--
--Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
--                     VA Linux Systems Inc., Fremont, California.
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Gareth Hughes <gareth@valinux.com>
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- *   Kevin E. Martin <martin@valinux.com>
-- */
--
--#ifndef __RADEON_LOCK_H__
--#define __RADEON_LOCK_H__
--
--#include "radeon_context.h"
--
--extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
--extern void radeonUpdatePageFlipping(radeonContextPtr rmesa);
--
--/* Turn DEBUG_LOCKING on to find locking conflicts.
-- */
--#define DEBUG_LOCKING	0
--
--#if DEBUG_LOCKING
--extern char *prevLockFile;
--extern int prevLockLine;
--
--#define DEBUG_LOCK()							\
--   do {									\
--      prevLockFile = (__FILE__);					\
--      prevLockLine = (__LINE__);					\
--   } while (0)
--
--#define DEBUG_RESET()							\
--   do {									\
--      prevLockFile = 0;							\
--      prevLockLine = 0;							\
--   } while (0)
--
--#define DEBUG_CHECK_LOCK()						\
--   do {									\
--      if (prevLockFile) {						\
--	 fprintf(stderr,						\
--		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
--		  prevLockFile, prevLockLine, __FILE__, __LINE__);	\
--	 exit(1);							\
--      }									\
--   } while (0)
--
--#else
--
--#define DEBUG_LOCK()
--#define DEBUG_RESET()
--#define DEBUG_CHECK_LOCK()
--
--#endif
--
--/*
-- * !!! We may want to separate locks from locks with validation.  This
-- * could be used to improve performance for those things commands that
-- * do not do any drawing !!!
-- */
--
--/* Lock the hardware and validate our state.
-- */
--#define LOCK_HARDWARE( rmesa )						\
--	do {								\
--		char __ret = 0;						\
--		DEBUG_CHECK_LOCK();					\
--		DRM_CAS((rmesa)->dri.hwLock, (rmesa)->dri.hwContext,	\
--			(DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret); \
--		if (__ret)						\
--			radeonGetLock((rmesa), 0);			\
--		DEBUG_LOCK();						\
--	} while (0)
--
--#define UNLOCK_HARDWARE( rmesa )					\
--	do {								\
--		DRM_UNLOCK((rmesa)->dri.fd,				\
--			(rmesa)->dri.hwLock,				\
--			(rmesa)->dri.hwContext);			\
--		DEBUG_RESET();						\
--	} while (0)
--
--#endif				/* __RADEON_LOCK_H__ */
-diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c
-index 58bc0d5..8a945d8 100644
---- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
-+++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c
-@@ -35,7 +35,7 @@
- 
- #include "radeon_program_pair.h"
- 
--#include "radeon_context.h"
-+#include "radeon_common.h"
- 
- #include "shader/prog_print.h"
- 
-diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
-deleted file mode 100644
-index 16f9fb9..0000000
---- a/src/mesa/drivers/dri/r300/radeon_span.c
-+++ /dev/null
-@@ -1,349 +0,0 @@
--/**************************************************************************
--
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
--                     VA Linux Systems Inc., Fremont, California.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Kevin E. Martin <martin@valinux.com>
-- *   Gareth Hughes <gareth@valinux.com>
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- *
-- */
--
--#include "main/glheader.h"
--#include "swrast/swrast.h"
--
--#include "r300_state.h"
--#include "radeon_ioctl.h"
--#include "r300_ioctl.h"
--#include "radeon_span.h"
--
--#include "drirenderbuffer.h"
--
--#define DBG 0
--
--/*
-- * Note that all information needed to access pixels in a renderbuffer
-- * should be obtained through the gl_renderbuffer parameter, not per-context
-- * information.
-- */
--#define LOCAL_VARS						\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
--   const GLuint bottom = dPriv->h - 1;				\
--   GLubyte *buf = (GLubyte *) drb->flippedData			\
--      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
--   GLuint p;							\
--   (void) p;
--
--#define LOCAL_DEPTH_VARS				\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
--   const GLuint bottom = dPriv->h - 1;			\
--   GLuint xo = dPriv->x;				\
--   GLuint yo = dPriv->y;				\
--   GLubyte *buf = (GLubyte *) drb->Base.Data;
--
--#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
--
--#define Y_FLIP(Y) (bottom - (Y))
--
--#define HW_LOCK()
--
--#define HW_UNLOCK()
--
--/* ================================================================
-- * Color buffer
-- */
--
--/* 16 bit, RGB565 color spanline and pixel functions
-- */
--#define SPANTMP_PIXEL_FMT GL_RGB
--#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
--
--#define TAG(x)    radeon##x##_RGB565
--#define TAG2(x,y) radeon##x##_RGB565##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
--#include "spantmp2.h"
--
--/* 32 bit, ARGB8888 color spanline and pixel functions
-- */
--#define SPANTMP_PIXEL_FMT GL_BGRA
--#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
--
--#define TAG(x)    radeon##x##_ARGB8888
--#define TAG2(x,y) radeon##x##_ARGB8888##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
--#include "spantmp2.h"
--
--/* ================================================================
-- * Depth buffer
-- */
--
--/* The Radeon family has depth tiling on all the time, so we have to convert
-- * the x,y coordinates into the memory bus address (mba) in the same
-- * manner as the engine.  In each case, the linear block address (ba)
-- * is calculated, and then wired with x and y to produce the final
-- * memory address.
-- * The chip will do address translation on its own if the surface registers
-- * are set up correctly. It is not quite enough to get it working with hyperz
-- * too...
-- */
--
--static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
--{
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
--		return 4 * (x + y * pitch);
--	} else {
--		GLuint ba, address = 0;	/* a[0..1] = 0           */
--
--#ifdef COMPILE_R300
--		ba = (y / 8) * (pitch / 8) + (x / 8);
--#else
--		ba = (y / 16) * (pitch / 16) + (x / 16);
--#endif
--
--		address |= (x & 0x7) << 2;	/* a[2..4] = x[0..2]     */
--		address |= (y & 0x3) << 5;	/* a[5..6] = y[0..1]     */
--		address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
--		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
--
--		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
--		address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
--		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
--
--		return address;
--	}
--}
--
--static INLINE GLuint
--radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
--{
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
--		return 2 * (x + y * pitch);
--	} else {
--		GLuint ba, address = 0;	/* a[0]    = 0           */
--
--		ba = (y / 16) * (pitch / 32) + (x / 32);
--
--		address |= (x & 0x7) << 1;	/* a[1..3] = x[0..2]     */
--		address |= (y & 0x7) << 4;	/* a[4..6] = y[0..2]     */
--		address |= (x & 0x8) << 4;	/* a[7]    = x[3]        */
--		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
--		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
--		address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
--		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
--
--		return address;
--	}
--}
--
--/* 16-bit depth buffer functions
-- */
--#define VALUE_TYPE GLushort
--
--#define WRITE_DEPTH( _x, _y, d )					\
--   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
--
--#define READ_DEPTH( d, _x, _y )						\
--   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
--
--#define TAG(x) radeon##x##_z16
--#include "depthtmp.h"
--
--/* 24 bit depth, 8 bit stencil depthbuffer functions
-- *
-- * Careful: It looks like the R300 uses ZZZS byte order while the R200
-- * uses SZZZ for 24 bit depth, 8 bit stencil mode.
-- */
--#define VALUE_TYPE GLuint
--
--#ifdef COMPILE_R300
--#define WRITE_DEPTH( _x, _y, d )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0x000000ff;							\
--   tmp |= ((d << 8) & 0xffffff00);					\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--#else
--#define WRITE_DEPTH( _x, _y, d )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0xff000000;							\
--   tmp |= ((d) & 0x00ffffff);						\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--#endif
--
--#ifdef COMPILE_R300
--#define READ_DEPTH( d, _x, _y )						\
--  do { \
--    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
--					 _y + yo )) & 0xffffff00) >> 8; \
--  }while(0)
--#else
--#define READ_DEPTH( d, _x, _y )						\
--   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
--					 _y + yo )) & 0x00ffffff;
--#endif
--
--#define TAG(x) radeon##x##_z24_s8
--#include "depthtmp.h"
--
--/* ================================================================
-- * Stencil buffer
-- */
--
--/* 24 bit depth, 8 bit stencil depthbuffer functions
-- */
--#ifdef COMPILE_R300
--#define WRITE_STENCIL( _x, _y, d )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0xffffff00;							\
--   tmp |= (d) & 0xff;							\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--#else
--#define WRITE_STENCIL( _x, _y, d )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   tmp &= 0x00ffffff;							\
--   tmp |= (((d) & 0xff) << 24);						\
--   *(GLuint *)(buf + offset) = tmp;					\
--} while (0)
--#endif
--
--#ifdef COMPILE_R300
--#define READ_STENCIL( d, _x, _y )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   d = tmp & 0x000000ff;						\
--} while (0)
--#else
--#define READ_STENCIL( d, _x, _y )					\
--do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
--   d = (tmp & 0xff000000) >> 24;					\
--} while (0)
--#endif
--
--#define TAG(x) radeon##x##_z24_s8
--#include "stenciltmp.h"
--
--/* Move locking out to get reasonable span performance (10x better
-- * than doing this in HW_LOCK above).  WaitForIdle() is the main
-- * culprit.
-- */
--
--static void radeonSpanRenderStart(GLcontext * ctx)
--{
--	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--#ifdef COMPILE_R300
--	r300ContextPtr r300 = (r300ContextPtr) rmesa;
--	R300_FIREVERTICES(r300);
--#else
--	RADEON_FIREVERTICES(rmesa);
--#endif
--	LOCK_HARDWARE(rmesa);
--	radeonWaitForIdleLocked(rmesa);
--
--	/* Read the first pixel in the frame buffer.  This should
--	 * be a noop, right?  In fact without this conform fails as reading
--	 * from the framebuffer sometimes produces old results -- the
--	 * on-card read cache gets mixed up and doesn't notice that the
--	 * framebuffer has been updated.
--	 *
--	 * Note that we should probably be reading some otherwise unused
--	 * region of VRAM, otherwise we might get incorrect results when
--	 * reading pixels from the top left of the screen.
--	 *
--	 * I found this problem on an R420 with glean's texCube test.
--	 * Note that the R200 span code also *writes* the first pixel in the
--	 * framebuffer, but I've found this to be unnecessary.
--	 *  -- Nicolai Hähnle, June 2008
--	 */
--	{
--		int p;
--		driRenderbuffer *drb =
--			(driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
--		volatile int *buf =
--			(volatile int *)(rmesa->dri.screen->pFB + drb->offset);
--		p = *buf;
--	}
--}
--
--static void radeonSpanRenderFinish(GLcontext * ctx)
--{
--	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--	_swrast_flush(ctx);
--	UNLOCK_HARDWARE(rmesa);
--}
--
--void radeonInitSpanFuncs(GLcontext * ctx)
--{
--	struct swrast_device_driver *swdd =
--	    _swrast_GetDeviceDriverReference(ctx);
--	swdd->SpanRenderStart = radeonSpanRenderStart;
--	swdd->SpanRenderFinish = radeonSpanRenderFinish;
--}
--
--/**
-- * Plug in the Get/Put routines for the given driRenderbuffer.
-- */
--void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
--{
--	if (drb->Base.InternalFormat == GL_RGBA) {
--		if (vis->redBits == 5 && vis->greenBits == 6
--		    && vis->blueBits == 5) {
--			radeonInitPointers_RGB565(&drb->Base);
--		} else {
--			radeonInitPointers_ARGB8888(&drb->Base);
--		}
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
--		radeonInitDepthPointers_z16(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
--		radeonInitDepthPointers_z24_s8(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
--		radeonInitStencilPointers_z24_s8(&drb->Base);
--	}
--}
-diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c
-deleted file mode 100644
-index c401da6..0000000
---- a/src/mesa/drivers/dri/r300/radeon_state.c
-+++ /dev/null
-@@ -1,244 +0,0 @@
--/**************************************************************************
--
--Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
--
--The Weather Channel (TM) funded Tungsten Graphics to develop the
--initial release of the Radeon 8500 driver under the XFree86 license.
--This notice must be preserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- */
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/api_arrayelt.h"
--#include "main/enums.h"
--#include "main/framebuffer.h"
--#include "main/colormac.h"
--#include "main/light.h"
--
--#include "swrast/swrast.h"
--#include "vbo/vbo.h"
--#include "tnl/tnl.h"
--#include "tnl/t_pipeline.h"
--#include "swrast_setup/swrast_setup.h"
--
--#include "radeon_ioctl.h"
--#include "radeon_state.h"
--#include "r300_ioctl.h"
--
--
--/* =============================================================
-- * Scissoring
-- */
--
--static GLboolean intersect_rect(drm_clip_rect_t * out,
--				drm_clip_rect_t * a, drm_clip_rect_t * b)
--{
--	*out = *a;
--	if (b->x1 > out->x1)
--		out->x1 = b->x1;
--	if (b->y1 > out->y1)
--		out->y1 = b->y1;
--	if (b->x2 < out->x2)
--		out->x2 = b->x2;
--	if (b->y2 < out->y2)
--		out->y2 = b->y2;
--	if (out->x1 >= out->x2)
--		return GL_FALSE;
--	if (out->y1 >= out->y2)
--		return GL_FALSE;
--	return GL_TRUE;
--}
--
--void radeonRecalcScissorRects(radeonContextPtr radeon)
--{
--	drm_clip_rect_t *out;
--	int i;
--
--	/* Grow cliprect store?
--	 */
--	if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) {
--		while (radeon->state.scissor.numAllocedClipRects <
--		       radeon->numClipRects) {
--			radeon->state.scissor.numAllocedClipRects += 1;	/* zero case */
--			radeon->state.scissor.numAllocedClipRects *= 2;
--		}
--
--		if (radeon->state.scissor.pClipRects)
--			FREE(radeon->state.scissor.pClipRects);
--
--		radeon->state.scissor.pClipRects =
--		    MALLOC(radeon->state.scissor.numAllocedClipRects *
--			   sizeof(drm_clip_rect_t));
--
--		if (radeon->state.scissor.pClipRects == NULL) {
--			radeon->state.scissor.numAllocedClipRects = 0;
--			return;
--		}
--	}
--
--	out = radeon->state.scissor.pClipRects;
--	radeon->state.scissor.numClipRects = 0;
--
--	for (i = 0; i < radeon->numClipRects; i++) {
--		if (intersect_rect(out,
--				   &radeon->pClipRects[i],
--				   &radeon->state.scissor.rect)) {
--			radeon->state.scissor.numClipRects++;
--			out++;
--		}
--	}
--}
--
--void radeonUpdateScissor(GLcontext* ctx)
--{
--	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
--
--	if (radeon->dri.drawable) {
--		__DRIdrawablePrivate *dPriv = radeon->dri.drawable;
--		int x1 = dPriv->x + ctx->Scissor.X;
--		int y1 = dPriv->y + dPriv->h - (ctx->Scissor.Y + ctx->Scissor.Height);
--
--		radeon->state.scissor.rect.x1 = x1;
--		radeon->state.scissor.rect.y1 = y1;
--		radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width;
--		radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height;
--
--		radeonRecalcScissorRects(radeon);
--	}
--}
--
--static void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
--{
--	if (ctx->Scissor.Enabled) {
--		/* We don't pipeline cliprect changes */
--		r300Flush(ctx);
--		radeonUpdateScissor(ctx);
--	}
--}
--
--
--/**
-- * Update cliprects and scissors.
-- */
--void radeonSetCliprects(radeonContextPtr radeon)
--{
--	__DRIdrawablePrivate *const drawable = radeon->dri.drawable;
--	__DRIdrawablePrivate *const readable = radeon->dri.readable;
--	GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate;
--	GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate;
--
--	if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
--		/* Can't ignore 2d windows if we are page flipping. */
--		if (drawable->numBackClipRects == 0 || radeon->doPageFlip ||
--		    radeon->sarea->pfCurrentPage == 1) {
--			radeon->numClipRects = drawable->numClipRects;
--			radeon->pClipRects = drawable->pClipRects;
--		} else {
--			radeon->numClipRects = drawable->numBackClipRects;
--			radeon->pClipRects = drawable->pBackClipRects;
--		}
--	} else {
--		/* front buffer (or none, or multiple buffers */
--		radeon->numClipRects = drawable->numClipRects;
--		radeon->pClipRects = drawable->pClipRects;
--	}
--
--	if ((draw_fb->Width != drawable->w) ||
--	    (draw_fb->Height != drawable->h)) {
--		_mesa_resize_framebuffer(radeon->glCtx, draw_fb,
--					 drawable->w, drawable->h);
--		draw_fb->Initialized = GL_TRUE;
--	}
--
--	if (drawable != readable) {
--		if ((read_fb->Width != readable->w) ||
--		    (read_fb->Height != readable->h)) {
--			_mesa_resize_framebuffer(radeon->glCtx, read_fb,
--						 readable->w, readable->h);
--			read_fb->Initialized = GL_TRUE;
--		}
--	}
--
--	if (radeon->state.scissor.enabled)
--		radeonRecalcScissorRects(radeon);
--
--	radeon->lastStamp = drawable->lastStamp;
--}
--
--
--/**
-- * Handle common enable bits.
-- * Called as a fallback by r200Enable/r300Enable.
-- */
--void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state)
--{
--	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
--
--	switch(cap) {
--	case GL_SCISSOR_TEST:
--		/* We don't pipeline cliprect & scissor changes */
--		r300Flush(ctx);
--
--		radeon->state.scissor.enabled = state;
--		radeonUpdateScissor(ctx);
--		break;
--
--	default:
--		return;
--	}
--}
--
--
--/**
-- * Initialize default state.
-- * This function is called once at context init time from
-- * r200InitState/r300InitState
-- */
--void radeonInitState(radeonContextPtr radeon)
--{
--	radeon->Fallback = 0;
--
--	if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) {
--		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
--	} else {
--		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
--	}
--}
--
--
--/**
-- * Initialize common state functions.
-- * Called by r200InitStateFuncs/r300InitStateFuncs
-- */
--void radeonInitStateFuncs(struct dd_function_table *functions)
--{
--	functions->Scissor = radeonScissor;
--}
-diff --git a/src/mesa/drivers/dri/r300/radeon_state.h b/src/mesa/drivers/dri/r300/radeon_state.h
-deleted file mode 100644
-index 821cb40..0000000
---- a/src/mesa/drivers/dri/r300/radeon_state.h
-+++ /dev/null
-@@ -1,43 +0,0 @@
--/*
--Copyright (C) 2004 Nicolai Haehnle.  All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Nicolai Haehnle <prefect_@gmx.net>
-- */
--
--#ifndef __RADEON_STATE_H__
--#define __RADEON_STATE_H__
--
--extern void radeonRecalcScissorRects(radeonContextPtr radeon);
--extern void radeonSetCliprects(radeonContextPtr radeon);
--extern void radeonUpdateScissor(GLcontext* ctx);
--
--extern void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state);
--
--extern void radeonInitState(radeonContextPtr radeon);
--extern void radeonInitStateFuncs(struct dd_function_table* functions);
--
--#endif
-diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
-index f223b2d..f469c6f 100644
---- a/src/mesa/drivers/dri/radeon/Makefile
-+++ b/src/mesa/drivers/dri/radeon/Makefile
-@@ -4,25 +4,36 @@
- TOP = ../../../../..
- include $(TOP)/configs/current
- 
-+CFLAGS += $(RADEON_CFLAGS)
-+
- LIBNAME = radeon_dri.so
- 
- MINIGLX_SOURCES = server/radeon_dri.c 
- 
-+RADEON_COMMON_SOURCES = \
-+	radeon_texture.c \
-+	radeon_common_context.c \
-+	radeon_common.c \
-+	radeon_dma.c \
-+	radeon_lock.c \
-+	radeon_bo_legacy.c \
-+	radeon_cs_legacy.c \
-+	radeon_mipmap_tree.c \
-+	radeon_span.c
-+
- DRIVER_SOURCES = \
- 	radeon_context.c \
- 	radeon_ioctl.c \
--	radeon_lock.c \
- 	radeon_screen.c \
- 	radeon_state.c \
- 	radeon_state_init.c \
- 	radeon_tex.c \
--	radeon_texmem.c \
- 	radeon_texstate.c \
- 	radeon_tcl.c \
- 	radeon_swtcl.c \
--	radeon_span.c \
- 	radeon_maos.c \
--	radeon_sanity.c 
-+	radeon_sanity.c \
-+	$(RADEON_COMMON_SOURCES)
- 
- C_SOURCES = \
- 	$(COMMON_SOURCES) \
-@@ -30,6 +41,8 @@ C_SOURCES = \
- 
- DRIVER_DEFINES = -DRADEON_COMMON=0
- 
-+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
-+
- X86_SOURCES = 
- 
- include ../Makefile.template
+From c4030c794274b22ba6ccb7c919900b41f5c723f2 Mon Sep 17 00:00:00 2001
+From: Dave Airlie <airlied@redhat.com>
+Date: Wed, 4 Mar 2009 16:51:14 +1000
+Subject: [PATCH] radeon/r100/r200: import latest merge
+
+---
+ src/mesa/drivers/dri/radeon/radeon_bo_drm.h        |  182 ++++
+ src/mesa/drivers/dri/radeon/radeon_bo_legacy.c     |  825 +++++++++++++++++
+ src/mesa/drivers/dri/radeon/radeon_bo_legacy.h     |   47 +
+ src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h  |   67 ++
+ src/mesa/drivers/dri/radeon/radeon_cmdbuf.h        |  143 +++
+ src/mesa/drivers/dri/radeon/radeon_common.c        |  849 +++++++++++++++++
+ src/mesa/drivers/dri/radeon/radeon_common.h        |   55 ++
+ .../drivers/dri/radeon/radeon_common_context.c     |  589 ++++++++++++
+ .../drivers/dri/radeon/radeon_common_context.h     |  508 ++++++++++
+ src/mesa/drivers/dri/radeon/radeon_cs_drm.h        |  207 +++++
+ src/mesa/drivers/dri/radeon/radeon_cs_legacy.c     |  504 ++++++++++
+ src/mesa/drivers/dri/radeon/radeon_cs_legacy.h     |   40 +
+ src/mesa/drivers/dri/radeon/radeon_dma.c           |  323 +++++++
+ src/mesa/drivers/dri/radeon/radeon_dma.h           |   51 +
+ src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c   |  360 ++++++++
+ src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h   |   97 ++
+ src/mesa/drivers/dri/radeon/radeon_texture.c       |  966 ++++++++++++++++++++
+ src/mesa/drivers/dri/radeon/radeon_texture.h       |  118 +++
+ 18 files changed, 5931 insertions(+), 0 deletions(-)
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_bo_drm.h
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_common.c
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_common.h
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_common_context.c
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_common_context.h
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_cs_drm.h
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_dma.c
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_dma.h
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_texture.c
+ create mode 100644 src/mesa/drivers/dri/radeon/radeon_texture.h
+
 diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h
 new file mode 100644
 index 0000000..1ed13f1
@@ -22162,1355 +3361,6 @@ index 0000000..a200e90
 +#endif
 +
 +#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_compat.c b/src/mesa/drivers/dri/radeon/radeon_compat.c
-deleted file mode 100644
-index 46b490d..0000000
---- a/src/mesa/drivers/dri/radeon/radeon_compat.c
-+++ /dev/null
-@@ -1,301 +0,0 @@
--/**************************************************************************
--
--Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
--               Tungsten Graphics Inc., Austin, Texas.
--
--All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining a
--copy of this software and associated documentation files (the "Software"),
--to deal in the Software without restriction, including without limitation
--on the rights to use, copy, modify, merge, publish, distribute, sub
--license, and/or sell copies of the Software, and to permit persons to whom
--the Software is furnished to do so, subject to the following conditions:
--
--The above copyright notice and this permission notice (including the next
--paragraph) shall be included in all copies or substantial portions of the
--Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
--IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
--FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
--ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
--DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
--OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
--USE OR OTHER DEALINGS IN THE SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Keith Whitwell <keith@tungstengraphics.com>
-- *
-- */
--
--#include "main/glheader.h"
--#include "main/imports.h"
--
--#include "radeon_context.h"
--#include "radeon_state.h"
--#include "radeon_ioctl.h"
--
--
--static struct { 
--	int start; 
--	int len; 
--	const char *name;
--} packet[RADEON_MAX_STATE_PACKETS] = {
--	{ RADEON_PP_MISC,7,"RADEON_PP_MISC" },
--	{ RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
--	{ RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
--	{ RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
--	{ RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
--	{ RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
--	{ RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
--	{ RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
--	{ RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
--	{ RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
--	{ RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
--	{ RADEON_RE_MISC,1,"RADEON_RE_MISC" },
--	{ RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
--	{ RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
--	{ RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
--	{ RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
--	{ RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
--	{ RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
--	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
--	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
--	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
--};
--
--
--static void radeonCompatEmitPacket( radeonContextPtr rmesa, 
--				    struct radeon_state_atom *state )
--{
--   drm_radeon_sarea_t *sarea = rmesa->sarea;
--   drm_radeon_context_regs_t *ctx = &sarea->context_state;
--   drm_radeon_texture_regs_t *tex0 = &sarea->tex_state[0];
--   drm_radeon_texture_regs_t *tex1 = &sarea->tex_state[1];
--   int i;
--   int *buf = state->cmd;
--
--   for ( i = 0 ; i < state->cmd_size ; ) {
--      drm_radeon_cmd_header_t *header = (drm_radeon_cmd_header_t *)&buf[i++];
--
--      if (RADEON_DEBUG & DEBUG_STATE)
--	 fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id,
--		 packet[(int)header->packet.packet_id].name);
--
--      switch (header->packet.packet_id) {
--      case RADEON_EMIT_PP_MISC:
--	 ctx->pp_misc = buf[i++]; 
--	 ctx->pp_fog_color = buf[i++];
--	 ctx->re_solid_color = buf[i++];
--	 ctx->rb3d_blendcntl = buf[i++];
--	 ctx->rb3d_depthoffset = buf[i++];
--	 ctx->rb3d_depthpitch = buf[i++];
--	 ctx->rb3d_zstencilcntl = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
--	 break;
--      case RADEON_EMIT_PP_CNTL:
--	 ctx->pp_cntl = buf[i++];
--	 ctx->rb3d_cntl = buf[i++];
--	 ctx->rb3d_coloroffset = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
--	 break;
--      case RADEON_EMIT_RB3D_COLORPITCH:
--	 ctx->rb3d_colorpitch = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
--	 break;
--      case RADEON_EMIT_RE_LINE_PATTERN:
--	 ctx->re_line_pattern = buf[i++];
--	 ctx->re_line_state = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_LINE;
--	 break;
--      case RADEON_EMIT_SE_LINE_WIDTH:
--	 ctx->se_line_width = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_LINE;
--	 break;
--      case RADEON_EMIT_PP_LUM_MATRIX:
--	 ctx->pp_lum_matrix = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
--	 break;
--      case RADEON_EMIT_PP_ROT_MATRIX_0:
--	 ctx->pp_rot_matrix_0 = buf[i++];
--	 ctx->pp_rot_matrix_1 = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
--	 break;
--      case RADEON_EMIT_RB3D_STENCILREFMASK:
--	 ctx->rb3d_stencilrefmask = buf[i++];
--	 ctx->rb3d_ropcntl = buf[i++];
--	 ctx->rb3d_planemask = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_MASKS;
--	 break;
--      case RADEON_EMIT_SE_VPORT_XSCALE:
--	 ctx->se_vport_xscale = buf[i++];
--	 ctx->se_vport_xoffset = buf[i++];
--	 ctx->se_vport_yscale = buf[i++];
--	 ctx->se_vport_yoffset = buf[i++];
--	 ctx->se_vport_zscale = buf[i++];
--	 ctx->se_vport_zoffset = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_VIEWPORT;
--	 break;
--      case RADEON_EMIT_SE_CNTL:
--	 ctx->se_cntl = buf[i++];
--	 ctx->se_coord_fmt = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT;
--	 break;
--      case RADEON_EMIT_SE_CNTL_STATUS:
--	 ctx->se_cntl_status = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_SETUP;
--	 break;
--      case RADEON_EMIT_RE_MISC:
--	 ctx->re_misc = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_MISC;
--	 break;
--      case RADEON_EMIT_PP_TXFILTER_0:
--	 tex0->pp_txfilter = buf[i++];
--	 tex0->pp_txformat = buf[i++];
--	 tex0->pp_txoffset = buf[i++];
--	 tex0->pp_txcblend = buf[i++];
--	 tex0->pp_txablend = buf[i++];
--	 tex0->pp_tfactor = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_TEX0;
--	 break;
--      case RADEON_EMIT_PP_BORDER_COLOR_0:
--	 tex0->pp_border_color = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_TEX0;
--	 break;
--      case RADEON_EMIT_PP_TXFILTER_1:
--	 tex1->pp_txfilter = buf[i++];
--	 tex1->pp_txformat = buf[i++];
--	 tex1->pp_txoffset = buf[i++];
--	 tex1->pp_txcblend = buf[i++];
--	 tex1->pp_txablend = buf[i++];
--	 tex1->pp_tfactor = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_TEX1;
--	 break;
--      case RADEON_EMIT_PP_BORDER_COLOR_1:
--	 tex1->pp_border_color = buf[i++];
--	 sarea->dirty |= RADEON_UPLOAD_TEX1;
--	 break;
--
--      case RADEON_EMIT_SE_ZBIAS_FACTOR:
--	 i++;
--	 i++;
--	 break;
--
--      case RADEON_EMIT_PP_TXFILTER_2:
--      case RADEON_EMIT_PP_BORDER_COLOR_2:
--      case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
--      case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
--      default:
--	 /* These states aren't understood by radeon drm 1.1 */
--	 fprintf(stderr, "Tried to emit unsupported state\n");
--	 return;
--      }
--   }
--}
--
--
--
--static void radeonCompatEmitStateLocked( radeonContextPtr rmesa )
--{
--   struct radeon_state_atom *atom;
--
--   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
--      return;
--
--   foreach(atom, &rmesa->hw.atomlist) {
--      if (rmesa->hw.all_dirty)
--	 atom->dirty = GL_TRUE;
--      if (atom->is_tcl)
--	 atom->dirty = GL_FALSE;
--      if (atom->dirty)
--	 radeonCompatEmitPacket(rmesa, atom);
--   }
-- 
--   rmesa->hw.is_dirty = GL_FALSE;
--   rmesa->hw.all_dirty = GL_FALSE;
--}
--
--
--static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa,
--					     GLuint hw_primitive,
--					     GLuint nverts,
--					     drm_clip_rect_t *pbox,
--					     GLuint nbox )
--{
--   int i;
--
--   for ( i = 0 ; i < nbox ; ) {
--      int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox );
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
--      drm_radeon_vertex_t vtx;
--      
--      rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS;
--      rmesa->sarea->nbox = nr - i;
--
--      for ( ; i < nr ; i++) 
--	 *b++ = pbox[i];
--      
--      if (RADEON_DEBUG & DEBUG_IOCTL)
--	 fprintf(stderr, 
--		 "RadeonFlushVertexBuffer: prim %x buf %d verts %d "
--		 "disc %d nbox %d\n",
--		 hw_primitive, 
--		 rmesa->dma.current.buf->buf->idx, 
--		 nverts, 
--		 nr == nbox,
--		 rmesa->sarea->nbox );
--
--      vtx.prim = hw_primitive;
--      vtx.idx = rmesa->dma.current.buf->buf->idx;
--      vtx.count = nverts;
--      vtx.discard = (nr == nbox);      
--
--      drmCommandWrite( rmesa->dri.fd, 
--		       DRM_RADEON_VERTEX,
--		       &vtx, sizeof(vtx));
--   }
--}
--
--
--
--/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer!  
-- */
--void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
--				GLuint vertex_format,
--				GLuint hw_primitive,
--				GLuint nrverts )
--{
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   LOCK_HARDWARE( rmesa );
--
--   radeonCompatEmitStateLocked( rmesa );
--   rmesa->sarea->vc_format = vertex_format;
--   
--   if (rmesa->state.scissor.enabled) {
--      radeonCompatEmitPrimitiveLocked( rmesa, 
--				       hw_primitive,
--				       nrverts,
--				       rmesa->state.scissor.pClipRects,
--				       rmesa->state.scissor.numClipRects );
--   }
--   else {
--      radeonCompatEmitPrimitiveLocked( rmesa, 
--				       hw_primitive,
--				       nrverts,
--				       rmesa->pClipRects,
--				       rmesa->numClipRects );
--   }
--
--
--   UNLOCK_HARDWARE( rmesa );
--}
--
-diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
-index ea81a32..e4202c7 100644
---- a/src/mesa/drivers/dri/radeon/radeon_context.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
-@@ -53,6 +53,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "drivers/common/driverfuncs.h"
- 
-+#include "radeon_common.h"
- #include "radeon_context.h"
- #include "radeon_ioctl.h"
- #include "radeon_state.h"
-@@ -72,40 +73,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "vblank.h"
- #include "utils.h"
- #include "xmlpool.h" /* for symbolic values of enum-type options */
--#ifndef RADEON_DEBUG
--int RADEON_DEBUG = (0);
--#endif
--
--
--/* Return various strings for glGetString().
-- */
--static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   static char buffer[128];
--   unsigned   offset;
--   GLuint agp_mode = (rmesa->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
--      rmesa->radeonScreen->AGPMode;
--
--   switch ( name ) {
--   case GL_VENDOR:
--      return (GLubyte *)"Tungsten Graphics, Inc.";
--
--   case GL_RENDERER:
--      offset = driGetRendererString( buffer, "Radeon", DRIVER_DATE,
--				     agp_mode );
--
--      sprintf( & buffer[ offset ], " %sTCL",
--	       !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
--	       ? "" : "NO-" );
--
--      return (GLubyte *)buffer;
--
--   default:
--      return NULL;
--   }
--}
--
- 
- /* Extension strings exported by the R100 driver.
-  */
-@@ -160,15 +127,6 @@ static const struct tnl_pipeline_stage *radeon_pipeline[] = {
-    NULL,
- };
- 
--
--
--/* Initialize the driver's misc functions.
-- */
--static void radeonInitDriverFuncs( struct dd_function_table *functions )
--{
--    functions->GetString	= radeonGetString;
--}
--
- static const struct dri_debug_control debug_control[] =
- {
-     { "fall",  DEBUG_FALLBACKS },
-@@ -188,6 +146,51 @@ static const struct dri_debug_control debug_control[] =
-     { NULL,    0 }
- };
- 
-+static void r100_get_lock(radeonContextPtr radeon)
-+{
-+   r100ContextPtr rmesa = (r100ContextPtr)radeon;
-+   drm_radeon_sarea_t *sarea = radeon->sarea;
-+
-+   RADEON_STATECHANGE(rmesa, ctx);
-+   if (rmesa->radeon.sarea->tiling_enabled) {
-+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
-+	 RADEON_COLOR_TILE_ENABLE;
-+   } else {
-+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
-+	 ~RADEON_COLOR_TILE_ENABLE;
-+   }
-+   
-+   if (sarea->ctx_owner != rmesa->radeon.dri.hwContext) {
-+      sarea->ctx_owner = rmesa->radeon.dri.hwContext;
-+      
-+      if (!radeon->radeonScreen->kernel_mm)
-+         radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
-+   }
-+}
-+
-+static void r100_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
-+{
-+}
-+
-+static void r100_vtbl_pre_emit_state(radeonContextPtr radeon)
-+{
-+   r100ContextPtr rmesa = (r100ContextPtr)radeon;
-+   
-+   /* r100 always needs to emit ZBS to avoid TCL lockups */
-+   rmesa->hw.zbs.dirty = 1;
-+   radeon->hw.is_dirty = 1;
-+}
-+
-+
-+static void r100_init_vtbl(radeonContextPtr radeon)
-+{
-+   radeon->vtbl.get_lock = r100_get_lock;
-+   radeon->vtbl.update_viewport_offset = radeonUpdateViewportOffset;
-+   radeon->vtbl.update_draw_buffer = radeonUpdateDrawBuffer;
-+   radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header;
-+   radeon->vtbl.swtcl_flush = r100_swtcl_flush;
-+   radeon->vtbl.pre_emit_state = r100_vtbl_pre_emit_state;
-+}
- 
- /* Create the device specific context.
-  */
-@@ -199,8 +202,8 @@ radeonCreateContext( const __GLcontextModes *glVisual,
-    __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
-    radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
-    struct dd_function_table functions;
--   radeonContextPtr rmesa;
--   GLcontext *ctx, *shareCtx;
-+   r100ContextPtr rmesa;
-+   GLcontext *ctx;
-    int i;
-    int tcl_mode, fthrottle_mode;
- 
-@@ -209,10 +212,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
-    assert(screen);
- 
-    /* Allocate the Radeon context */
--   rmesa = (radeonContextPtr) CALLOC( sizeof(*rmesa) );
-+   rmesa = (r100ContextPtr) CALLOC( sizeof(*rmesa) );
-    if ( !rmesa )
-       return GL_FALSE;
- 
-+   r100_init_vtbl(&rmesa->radeon);
-+
-    /* init exp fog table data */
-    radeonInitStaticFogData();
-    
-@@ -220,12 +225,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
-     * Do this here so that initialMaxAnisotropy is set before we create
-     * the default textures.
-     */
--   driParseConfigFiles (&rmesa->optionCache, &screen->optionCache,
-+   driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
- 			screen->driScreen->myNum, "radeon");
--   rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
-+   rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
-                                                  "def_max_anisotropy");
- 
--   if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
-+   if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
-       if ( sPriv->drm_version.minor < 13 )
- 	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
- 			  "disabling.\n", sPriv->drm_version.minor );
-@@ -240,65 +245,23 @@ radeonCreateContext( const __GLcontextModes *glVisual,
-     * (the texture functions are especially important)
-     */
-    _mesa_init_driver_functions( &functions );
--   radeonInitDriverFuncs( &functions );
-    radeonInitTextureFuncs( &functions );
- 
--   /* Allocate the Mesa context */
--   if (sharedContextPrivate)
--      shareCtx = ((radeonContextPtr) sharedContextPrivate)->glCtx;
--   else
--      shareCtx = NULL;
--   rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
--                                       &functions, (void *) rmesa);
--   if (!rmesa->glCtx) {
--      FREE(rmesa);
--      return GL_FALSE;
--   }
--   driContextPriv->driverPrivate = rmesa;
--
--   /* Init radeon context data */
--   rmesa->dri.context = driContextPriv;
--   rmesa->dri.screen = sPriv;
--   rmesa->dri.drawable = NULL;
--   rmesa->dri.readable = NULL;
--   rmesa->dri.hwContext = driContextPriv->hHWContext;
--   rmesa->dri.hwLock = &sPriv->pSAREA->lock;
--   rmesa->dri.fd = sPriv->fd;
--   rmesa->dri.drmMinor = sPriv->drm_version.minor;
--
--   rmesa->radeonScreen = screen;
--   rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
--				       screen->sarea_priv_offset);
--
--
--   rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address;
--
--   (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
--   make_empty_list( & rmesa->swapped );
--
--   rmesa->nr_heaps = screen->numTexHeaps;
--   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
--      rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
--	    screen->texSize[i],
--	    12,
--	    RADEON_NR_TEX_REGIONS,
--	    (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
--	    & rmesa->sarea->tex_age[i],
--	    & rmesa->swapped,
--	    sizeof( radeonTexObj ),
--	    (destroy_texture_object_t *) radeonDestroyTexObj );
--
--      driSetTextureSwapCounterLocation( rmesa->texture_heaps[i],
--					& rmesa->c_textureSwaps );
-+   if (!radeonInitContext(&rmesa->radeon, &functions,
-+			  glVisual, driContextPriv,
-+			  sharedContextPrivate)) {
-+     FREE(rmesa);
-+     return GL_FALSE;
-    }
--   rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
-+
-+   rmesa->radeon.texture_depth = driQueryOptioni (&rmesa->radeon.optionCache,
- 					   "texture_depth");
--   if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
--      rmesa->texture_depth = ( screen->cpp == 4 ) ?
-+   if (rmesa->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
-+      rmesa->radeon.texture_depth = ( screen->cpp == 4 ) ?
- 	 DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
- 
--   rmesa->swtcl.RenderIndex = ~0;
--   rmesa->hw.all_dirty = GL_TRUE;
-+   rmesa->radeon.swtcl.RenderIndex = ~0;
-+   rmesa->radeon.hw.all_dirty = GL_TRUE;
- 
-    /* Set the maximum texture size small enough that we can guarentee that
-     * all texture units can bind a maximal texture and have all of them in
-@@ -306,26 +269,13 @@ radeonCreateContext( const __GLcontextModes *glVisual,
-     * setting allow larger textures.
-     */
- 
--   ctx = rmesa->glCtx;
--   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache,
-+   ctx = rmesa->radeon.glCtx;
-+   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
- 						 "texture_units");
-    ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
-    ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
- 
--   i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures");
--
--   driCalculateMaxTextureLevels( rmesa->texture_heaps,
--				 rmesa->nr_heaps,
--				 & ctx->Const,
--				 4,
--				 11, /* max 2D texture size is 2048x2048 */
--				 8,  /* 256^3 */
--				 9,  /* \todo: max cube texture size seems to be 512x512(x6) */
--				 11, /* max rect texture size is 2048x2048. */
--				 12,
--				 GL_FALSE,
--				 i );
--
-+   i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
- 
-    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
- 
-@@ -388,38 +338,38 @@ radeonCreateContext( const __GLcontextModes *glVisual,
-    }
- 
-    driInitExtensions( ctx, card_extensions, GL_TRUE );
--   if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
-+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
-       _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
--   if (rmesa->glCtx->Mesa_DXTn) {
-+   if (rmesa->radeon.glCtx->Mesa_DXTn) {
-       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-       _mesa_enable_extension( ctx, "GL_S3_s3tc" );
-    }
--   else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) {
-+   else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
-       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-    }
- 
--   if (rmesa->dri.drmMinor >= 9)
-+   if (rmesa->radeon.dri.drmMinor >= 9)
-       _mesa_enable_extension( ctx, "GL_NV_texture_rectangle");
- 
-    /* XXX these should really go right after _mesa_init_driver_functions() */
-+   radeonInitSpanFuncs( ctx );
-    radeonInitIoctlFuncs( ctx );
-    radeonInitStateFuncs( ctx );
--   radeonInitSpanFuncs( ctx );
-    radeonInitState( rmesa );
-    radeonInitSwtcl( ctx );
- 
-    _mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0, 
- 			 ctx->Const.MaxArrayLockSize, 32 );
- 
--   fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode");
--   rmesa->iw.irq_seq = -1;
--   rmesa->irqsEmitted = 0;
--   rmesa->do_irqs = (rmesa->radeonScreen->irq != 0 &&
--		     fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
-+   fthrottle_mode = driQueryOptioni(&rmesa->radeon.optionCache, "fthrottle_mode");
-+   rmesa->radeon.iw.irq_seq = -1;
-+   rmesa->radeon.irqsEmitted = 0;
-+   rmesa->radeon.do_irqs = (rmesa->radeon.radeonScreen->irq != 0 &&
-+			    fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
- 
--   rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
-+   rmesa->radeon.do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
- 
--   (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
-+   (*sPriv->systemTime->getUST)( & rmesa->radeon.swap_ust );
- 
- 
- #if DO_DEBUG
-@@ -427,20 +377,20 @@ radeonCreateContext( const __GLcontextModes *glVisual,
- 				       debug_control );
- #endif
- 
--   tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode");
--   if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
-+   tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
-+   if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
-       fprintf(stderr, "disabling 3D acceleration\n");
-       FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1);
-    } else if (tcl_mode == DRI_CONF_TCL_SW ||
--	      !(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
--      if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
--	 rmesa->radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
-+	      !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
-+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
-+	 rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
- 	 fprintf(stderr, "Disabling HW TCL support\n");
-       }
--      TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
-+      TCL_FALLBACK(rmesa->radeon.glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
-    }
- 
--   if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
-+   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
- /*       _tnl_need_dlist_norm_lengths( ctx, GL_FALSE ); */
-    }
-    return GL_TRUE;
-@@ -454,179 +404,41 @@ radeonCreateContext( const __GLcontextModes *glVisual,
- void radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
- {
-    GET_CURRENT_CONTEXT(ctx);
--   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
--   radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
-+   r100ContextPtr rmesa = (r100ContextPtr) driContextPriv->driverPrivate;
-+   r100ContextPtr current = ctx ? R100_CONTEXT(ctx) : NULL;
- 
-    /* check if we're deleting the currently bound context */
-    if (rmesa == current) {
--      RADEON_FIREVERTICES( rmesa );
-+      radeon_firevertices(&rmesa->radeon);
-       _mesa_make_current(NULL, NULL, NULL);
-    }
- 
-    /* Free radeon context resources */
-    assert(rmesa); /* should never be null */
-    if ( rmesa ) {
--      GLboolean   release_texture_heaps;
- 
-+      _swsetup_DestroyContext( rmesa->radeon.glCtx );
-+      _tnl_DestroyContext( rmesa->radeon.glCtx );
-+      _vbo_DestroyContext( rmesa->radeon.glCtx );
-+      _swrast_DestroyContext( rmesa->radeon.glCtx );
- 
--      release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
--      _swsetup_DestroyContext( rmesa->glCtx );
--      _tnl_DestroyContext( rmesa->glCtx );
--      _vbo_DestroyContext( rmesa->glCtx );
--      _swrast_DestroyContext( rmesa->glCtx );
--
--      radeonDestroySwtcl( rmesa->glCtx );
--      radeonReleaseArrays( rmesa->glCtx, ~0 );
--      if (rmesa->dma.current.buf) {
--	 radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
--	 radeonFlushCmdBuf( rmesa, __FUNCTION__ );
-+      radeonDestroySwtcl( rmesa->radeon.glCtx );
-+      radeonReleaseArrays( rmesa->radeon.glCtx, ~0 );
-+      if (rmesa->radeon.dma.current) {
-+	 radeonReleaseDmaRegion( &rmesa->radeon );
-+	 rcommonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ );
-       }
- 
-       _mesa_vector4f_free( &rmesa->tcl.ObjClean );
- 
--      if (rmesa->state.scissor.pClipRects) {
--	 FREE(rmesa->state.scissor.pClipRects);
--	 rmesa->state.scissor.pClipRects = NULL;
--      }
--
--      if ( release_texture_heaps ) {
--         /* This share group is about to go away, free our private
--          * texture object data.
--          */
--         int i;
--
--         for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
--	    driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
--	    rmesa->texture_heaps[ i ] = NULL;
--         }
--
--	 assert( is_empty_list( & rmesa->swapped ) );
-+      if (rmesa->radeon.state.scissor.pClipRects) {
-+	 FREE(rmesa->radeon.state.scissor.pClipRects);
-+	 rmesa->radeon.state.scissor.pClipRects = NULL;
-       }
- 
--      /* free the Mesa context */
--      rmesa->glCtx->DriverCtx = NULL;
--      _mesa_destroy_context( rmesa->glCtx );
--
--      /* free the option cache */
--      driDestroyOptionCache (&rmesa->optionCache);
-+      radeonCleanupContext(&rmesa->radeon);
- 
-       FREE( rmesa );
-    }
- }
- 
--
--
--
--void
--radeonSwapBuffers( __DRIdrawablePrivate *dPriv )
--{
--
--   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--      radeonContextPtr rmesa;
--      GLcontext *ctx;
--      rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--      ctx = rmesa->glCtx;
--      if (ctx->Visual.doubleBufferMode) {
--         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
--
--         if ( rmesa->doPageFlip ) {
--            radeonPageFlip( dPriv );
--         }
--         else {
--	     radeonCopyBuffer( dPriv, NULL );
--         }
--      }
--   }
--   else {
--      /* XXX this shouldn't be an error but we can't handle it for now */
--      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
--   }
--}
--
--void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
--			 int x, int y, int w, int h )
--{
--    if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
--	radeonContextPtr radeon;
--	GLcontext *ctx;
--
--	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--	ctx = radeon->glCtx;
--
--	if (ctx->Visual.doubleBufferMode) {
--	    drm_clip_rect_t rect;
--	    rect.x1 = x + dPriv->x;
--	    rect.y1 = (dPriv->h - y - h) + dPriv->y;
--	    rect.x2 = rect.x1 + w;
--	    rect.y2 = rect.y1 + h;
--	    _mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
--	    radeonCopyBuffer(dPriv, &rect);
--	}
--    } else {
--	/* XXX this shouldn't be an error but we can't handle it for now */
--	_mesa_problem(NULL, "%s: drawable has no context!",
--		      __FUNCTION__);
--    }
--}
--
--/* Make context `c' the current context and bind it to the given
-- * drawing and reading surfaces.
-- */
--GLboolean
--radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
--                   __DRIdrawablePrivate *driDrawPriv,
--                   __DRIdrawablePrivate *driReadPriv )
--{
--   if ( driContextPriv ) {
--      radeonContextPtr newCtx = 
--	 (radeonContextPtr) driContextPriv->driverPrivate;
--
--      if (RADEON_DEBUG & DEBUG_DRI)
--	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) newCtx->glCtx);
--
--      newCtx->dri.readable = driReadPriv;
--
--      if ( (newCtx->dri.drawable != driDrawPriv) ||
--           newCtx->lastStamp != driDrawPriv->lastStamp ) {
--	 if (driDrawPriv->swap_interval == (unsigned)-1) {
--	    driDrawPriv->vblFlags = (newCtx->radeonScreen->irq != 0)
--	       ? driGetDefaultVBlankFlags(&newCtx->optionCache)
--	       : VBLANK_FLAG_NO_IRQ;
--
--	    driDrawableInitVBlank( driDrawPriv );
--	 }
--
--	 newCtx->dri.drawable = driDrawPriv;
--
--	 radeonSetCliprects(newCtx);
--	 radeonUpdateViewportOffset( newCtx->glCtx );
--      }
--
--      _mesa_make_current( newCtx->glCtx,
--			  (GLframebuffer *) driDrawPriv->driverPrivate,
--			  (GLframebuffer *) driReadPriv->driverPrivate );
--
--      _mesa_update_state( newCtx->glCtx );
--   } else {
--      if (RADEON_DEBUG & DEBUG_DRI)
--	 fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
--      _mesa_make_current( NULL, NULL, NULL );
--   }
--
--   if (RADEON_DEBUG & DEBUG_DRI)
--      fprintf(stderr, "End %s\n", __FUNCTION__);
--   return GL_TRUE;
--}
--
--/* Force the context `c' to be unbound from its buffer.
-- */
--GLboolean
--radeonUnbindContext( __DRIcontextPrivate *driContextPriv )
--{
--   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
--
--   if (RADEON_DEBUG & DEBUG_DRI)
--      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) rmesa->glCtx);
--
--   return GL_TRUE;
--}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
-index 53df766..2efabd1 100644
---- a/src/mesa/drivers/dri/radeon/radeon_context.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
-@@ -48,91 +48,23 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "drm.h"
- #include "radeon_drm.h"
- #include "texmem.h"
--
- #include "main/macros.h"
- #include "main/mtypes.h"
- #include "main/colormac.h"
--
--struct radeon_context;
--typedef struct radeon_context radeonContextRec;
--typedef struct radeon_context *radeonContextPtr;
--
--/* This union is used to avoid warnings/miscompilation
--   with float to uint32_t casts due to strict-aliasing */
--typedef union {
--	GLfloat f;
--	uint32_t ui32;
--} float_ui32_type;
--
--#include "radeon_lock.h"
- #include "radeon_screen.h"
--#include "main/mm.h"
--
--#include "math/m_vector.h"
--
--#define TEX_0   0x1
--#define TEX_1   0x2
--#define TEX_2   0x4
--#define TEX_ALL 0x7
--
--/* Rasterizing fallbacks */
--/* See correponding strings in r200_swtcl.c */
--#define RADEON_FALLBACK_TEXTURE		0x0001
--#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
--#define RADEON_FALLBACK_STENCIL		0x0004
--#define RADEON_FALLBACK_RENDER_MODE	0x0008
--#define RADEON_FALLBACK_BLEND_EQ	0x0010
--#define RADEON_FALLBACK_BLEND_FUNC	0x0020
--#define RADEON_FALLBACK_DISABLE 	0x0040
--#define RADEON_FALLBACK_BORDER_MODE	0x0080
--
--/* The blit width for texture uploads
-- */
--#define BLIT_WIDTH_BYTES 1024
- 
--/* Use the templated vertex format:
-- */
--#define COLOR_IS_RGBA
--#define TAG(x) radeon##x
--#include "tnl_dd/t_dd_vertex.h"
--#undef TAG
--
--typedef void (*radeon_tri_func) (radeonContextPtr,
--				 radeonVertex *,
--				 radeonVertex *, radeonVertex *);
--
--typedef void (*radeon_line_func) (radeonContextPtr,
--				  radeonVertex *, radeonVertex *);
-+#include "radeon_common.h"
- 
--typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
--
--struct radeon_colorbuffer_state {
--	GLuint clear;
--	int roundEnable;
--};
- 
--struct radeon_depthbuffer_state {
--	GLuint clear;
--	GLfloat scale;
--};
-+struct r100_context;
-+typedef struct r100_context r100ContextRec;
-+typedef struct r100_context *r100ContextPtr;
- 
--struct radeon_scissor_state {
--	drm_clip_rect_t rect;
--	GLboolean enabled;
-+#include "radeon_lock.h"
- 
--	GLuint numClipRects;	/* Cliprects active */
--	GLuint numAllocedClipRects;	/* Cliprects available */
--	drm_clip_rect_t *pClipRects;
--};
- 
--struct radeon_stencilbuffer_state {
--	GLboolean hwBuffer;
--	GLuint clear;		/* rb3d_stencilrefmask value */
--};
- 
--struct radeon_stipple_state {
--	GLuint mask[32];
--};
-+#define R100_TEX_ALL 0x7
- 
- /* used for both tcl_vtx and vc_frmt tex bits (they are identical) */
- #define RADEON_ST_BIT(unit) \
-@@ -141,42 +73,6 @@ struct radeon_stipple_state {
- #define RADEON_Q_BIT(unit) \
- (unit == 0 ? RADEON_CP_VC_FRMT_Q0 : (RADEON_CP_VC_FRMT_Q1 >> 2) << (2 * unit))
- 
--typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
--
--/* Texture object in locally shared texture space.
-- */
--struct radeon_tex_obj {
--	driTextureObject base;
--
--	GLuint bufAddr;		/* Offset to start of locally
--				   shared texture block */
--
--	GLuint dirty_state;	/* Flags (1 per texunit) for
--				   whether or not this texobj
--				   has dirty hardware state
--				   (pp_*) that needs to be
--				   brought into the
--				   texunit. */
--
--	drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
--	/* Six, for the cube faces */
--
--	GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
--
--	GLuint pp_txfilter;	/* hardware register values */
--	GLuint pp_txformat;
--	GLuint pp_txoffset;	/* Image location in texmem.
--				   All cube faces follow. */
--	GLuint pp_txsize;	/* npot only */
--	GLuint pp_txpitch;	/* npot only */
--	GLuint pp_border_color;
--	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
--
--	GLboolean border_fallback;
--
--	GLuint tile_bits;	/* hw texture tile bits used on this texture */
--};
--
- struct radeon_texture_env_state {
- 	radeonTexObjPtr texobj;
- 	GLenum format;
-@@ -187,17 +83,6 @@ struct radeon_texture_state {
- 	struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS];
- };
- 
--struct radeon_state_atom {
--	struct radeon_state_atom *next, *prev;
--	const char *name;	/* for debug */
--	int cmd_size;		/* size in bytes */
--	GLuint is_tcl;
--	int *cmd;		/* one or more cmd's */
--	int *lastcmd;		/* one or more cmd's */
--	GLboolean dirty;	/* dirty-mark in emit_state_list */
--	 GLboolean(*check) (GLcontext *);	/* is this state active? */
--};
--
- /* Trying to keep these relatively short as the variables are becoming
-  * extravagently long.  Drop the driver name prefix off the front of
-  * everything - I think we know which driver we're in by now, and keep the
-@@ -410,10 +295,7 @@ struct radeon_state_atom {
- #define SHN_SHININESS      1
- #define SHN_STATE_SIZE     2
- 
--struct radeon_hw_state {
--	/* Head of the linked list of state atoms. */
--	struct radeon_state_atom atomlist;
--
-+struct r100_hw_state {
- 	/* Hardware state, stored as cmdbuf commands:  
- 	 *   -- Need to doublebuffer for
- 	 *           - eliding noop statechange loops? (except line stipple count)
-@@ -438,86 +320,16 @@ struct radeon_hw_state {
- 	struct radeon_state_atom glt;
- 	struct radeon_state_atom txr[3];	/* for NPOT */
- 
--	int max_state_size;	/* Number of bytes necessary for a full state emit. */
--	GLboolean is_dirty, all_dirty;
- };
- 
--struct radeon_state {
--	/* Derived state for internal purposes:
--	 */
--	struct radeon_colorbuffer_state color;
--	struct radeon_depthbuffer_state depth;
--	struct radeon_scissor_state scissor;
--	struct radeon_stencilbuffer_state stencil;
-+
-+struct r100_state {
- 	struct radeon_stipple_state stipple;
- 	struct radeon_texture_state texture;
- };
- 
--/* Need refcounting on dma buffers:
-- */
--struct radeon_dma_buffer {
--	int refcount;		/* the number of retained regions in buf */
--	drmBufPtr buf;
--};
--
--#define GET_START(rvb) (rmesa->radeonScreen->gart_buffer_offset +			\
--			(rvb)->address - rmesa->dma.buf0_address +	\
--			(rvb)->start)
--
--/* A retained region, eg vertices for indexed vertices.
-- */
--struct radeon_dma_region {
--	struct radeon_dma_buffer *buf;
--	char *address;		/* == buf->address */
--	int start, end, ptr;	/* offsets from start of buf */
--	int aos_start;
--	int aos_stride;
--	int aos_size;
--};
--
--struct radeon_dma {
--	/* Active dma region.  Allocations for vertices and retained
--	 * regions come from here.  Also used for emitting random vertices,
--	 * these may be flushed by calling flush_current();
--	 */
--	struct radeon_dma_region current;
--
--	void (*flush) (radeonContextPtr);
--
--	char *buf0_address;	/* start of buf[0], for index calcs */
--	GLuint nr_released_bufs;	/* flush after so many buffers released */
--};
--
--struct radeon_dri_mirror {
--	__DRIcontextPrivate *context;	/* DRI context */
--	__DRIscreenPrivate *screen;	/* DRI screen */
--
--   /**
--    * DRI drawable bound to this context for drawing.
--    */
--	__DRIdrawablePrivate *drawable;
--
--   /**
--    * DRI drawable bound to this context for reading.
--    */
--	__DRIdrawablePrivate *readable;
--
--	drm_context_t hwContext;
--	drm_hw_lock_t *hwLock;
--	int fd;
--	int drmMinor;
--};
--
- #define RADEON_CMD_BUF_SZ  (8*1024)
--
--struct radeon_store {
--	GLuint statenr;
--	GLuint primnr;
--	char cmd_buf[RADEON_CMD_BUF_SZ];
--	int cmd_used;
--	int elts_start;
--};
--
-+#define R200_ELT_BUF_SZ  (8*1024)
- /* radeon_tcl.c
-  */
- struct radeon_tcl_info {
-@@ -529,30 +341,23 @@ struct radeon_tcl_info {
- 	 */
- 	GLvector4f ObjClean;
- 
--	struct radeon_dma_region *aos_components[8];
-+        struct radeon_aos aos[8];
- 	GLuint nr_aos_components;
- 
- 	GLuint *Elts;
- 
--	struct radeon_dma_region indexed_verts;
--	struct radeon_dma_region obj;
--	struct radeon_dma_region rgba;
--	struct radeon_dma_region spec;
--	struct radeon_dma_region fog;
--	struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS];
--	struct radeon_dma_region norm;
-+	struct radeon_bo *indexed_bo;
-+
-+        int elt_cmd_offset; /** Offset into the cmdbuf */
-+	int elt_cmd_start;
-+        int elt_used;
- };
- 
- /* radeon_swtcl.c
-  */
--struct radeon_swtcl_info {
--	GLuint RenderIndex;
--	GLuint vertex_size;
-+struct r100_swtcl_info {
- 	GLuint vertex_format;
- 
--	struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
--	GLuint vertex_attr_count;
--
- 	GLubyte *verts;
- 
- 	/* Fallback rasterization functions
-@@ -561,10 +366,6 @@ struct radeon_swtcl_info {
- 	radeon_line_func draw_line;
- 	radeon_tri_func draw_tri;
- 
--	GLuint hw_primitive;
--	GLenum render_primitive;
--	GLuint numverts;
--
-    /**
-     * Offset of the 4UB color data within a hardware (swtcl) vertex.
-     */
-@@ -576,22 +377,9 @@ struct radeon_swtcl_info {
- 	GLuint specoffset;
- 
- 	GLboolean needproj;
--
--	struct radeon_dma_region indexed_verts;
- };
- 
--struct radeon_ioctl {
--	GLuint vertex_offset;
--	GLuint vertex_size;
--};
- 
--#define RADEON_MAX_PRIMS 64
--
--struct radeon_prim {
--	GLuint start;
--	GLuint end;
--	GLuint prim;
--};
- 
- /* A maximum total of 20 elements per vertex:  3 floats for position, 3
-  * floats for normal, 4 floats for color, 4 bytes for secondary color,
-@@ -602,59 +390,18 @@ struct radeon_prim {
-  */
- #define RADEON_MAX_VERTEX_SIZE 20
- 
--struct radeon_context {
--	GLcontext *glCtx;	/* Mesa context */
-+struct r100_context {
-+        struct radeon_context radeon;
- 
- 	/* Driver and hardware state management
- 	 */
--	struct radeon_hw_state hw;
--	struct radeon_state state;
--
--	/* Texture object bookkeeping
--	 */
--	unsigned nr_heaps;
--	driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
--	driTextureObject swapped;
--	int texture_depth;
--	float initialMaxAnisotropy;
--
--	/* Rasterization and vertex state:
--	 */
--	GLuint TclFallback;
--	GLuint Fallback;
--	GLuint NewGLState;
--	 DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
-+	struct r100_hw_state hw;
-+	struct r100_state state;
- 
- 	/* Vertex buffers
- 	 */
- 	struct radeon_ioctl ioctl;
--	struct radeon_dma dma;
- 	struct radeon_store store;
--	/* A full state emit as of the first state emit in the main store, in case
--	 * the context is lost.
--	 */
--	struct radeon_store backup_store;
--
--	/* Page flipping
--	 */
--	GLuint doPageFlip;
--
--	/* Busy waiting
--	 */
--	GLuint do_usleeps;
--	GLuint do_irqs;
--	GLuint irqsEmitted;
--	drm_radeon_irq_wait_t iw;
--
--	/* Drawable, cliprect and scissor information
--	 */
--	GLuint numClipRects;	/* Cliprects for the draw buffer */
--	drm_clip_rect_t *pClipRects;
--	unsigned int lastStamp;
--	GLboolean lost_context;
--	GLboolean save_on_next_emit;
--	radeonScreenPtr radeonScreen;	/* Screen private DRI data */
--	drm_radeon_sarea_t *sarea;	/* Private SAREA data */
- 
- 	/* TCL stuff
- 	 */
-@@ -667,29 +414,13 @@ struct radeon_context {
- 	GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS];
- 	GLuint last_ReallyEnabled;
- 
--	/* VBI
--	 */
--	int64_t swap_ust;
--	int64_t swap_missed_ust;
--
--	GLuint swap_count;
--	GLuint swap_missed_count;
--
- 	/* radeon_tcl.c
- 	 */
- 	struct radeon_tcl_info tcl;
- 
- 	/* radeon_swtcl.c
- 	 */
--	struct radeon_swtcl_info swtcl;
--
--	/* Mirrors of some DRI state
--	 */
--	struct radeon_dri_mirror dri;
--
--	/* Configuration cache
--	 */
--	driOptionCache optionCache;
-+	struct r100_swtcl_info swtcl;
- 
- 	GLboolean using_hyperz;
- 	GLboolean texmicrotile;
-@@ -703,23 +434,11 @@ struct radeon_context {
- 	GLuint c_textureSwaps;
- 	GLuint c_textureBytes;
- 	GLuint c_vertexBuffers;
-+
- };
- 
--#define RADEON_CONTEXT(ctx)		((radeonContextPtr)(ctx->DriverCtx))
--
--static INLINE GLuint radeonPackColor(GLuint cpp,
--                                     GLubyte r, GLubyte g,
--                                     GLubyte b, GLubyte a)
--{
--	switch (cpp) {
--	case 2:
--		return PACK_COLOR_565(r, g, b);
--	case 4:
--		return PACK_COLOR_8888(a, r, g, b);
--	default:
--		return 0;
--	}
--}
-+#define R100_CONTEXT(ctx)		((r100ContextPtr)(ctx->DriverCtx))
-+
- 
- #define RADEON_OLD_PACKETS 1
- 
-@@ -727,37 +446,11 @@ extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv);
- extern GLboolean radeonCreateContext(const __GLcontextModes * glVisual,
- 				     __DRIcontextPrivate * driContextPriv,
- 				     void *sharedContextPrivate);
--extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
--extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
--				int x, int y, int w, int h);
- extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
- 				   __DRIdrawablePrivate * driDrawPriv,
- 				   __DRIdrawablePrivate * driReadPriv);
- extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
- 
--/* ================================================================
-- * Debugging:
-- */
--#define DO_DEBUG		1
--
--#if DO_DEBUG
--extern int RADEON_DEBUG;
--#else
--#define RADEON_DEBUG		0
--#endif
--
--#define DEBUG_TEXTURE	0x0001
--#define DEBUG_STATE	0x0002
--#define DEBUG_IOCTL	0x0004
--#define DEBUG_PRIMS	0x0008
--#define DEBUG_VERTS	0x0010
--#define DEBUG_FALLBACKS	0x0020
--#define DEBUG_VFMT	0x0040
--#define DEBUG_CODEGEN	0x0080
--#define DEBUG_VERBOSE	0x0100
--#define DEBUG_DRI       0x0200
--#define DEBUG_DMA       0x0400
--#define DEBUG_SANITY    0x0800
--#define DEBUG_SYNC      0x1000
-+
- 
- #endif				/* __RADEON_CONTEXT_H__ */
 diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_drm.h b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h
 new file mode 100644
 index 0000000..984725a
@@ -24666,2440 +4516,6 @@ index 0000000..cee3744
 +
 +void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize);
 +#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
-index 09acf6b..fb3a236 100644
---- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
-@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "swrast/swrast.h"
- 
- #include "radeon_context.h"
-+#include "radeon_common.h"
- #include "radeon_state.h"
- #include "radeon_ioctl.h"
- #include "radeon_tcl.h"
-@@ -58,75 +59,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define RADEON_IDLE_RETRY           16
- 
- 
--static void radeonWaitForIdle( radeonContextPtr rmesa );
--static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
--				    const char * caller );
--
--static void print_state_atom( struct radeon_state_atom *state )
--{
--   int i;
--
--   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
--
--   if (RADEON_DEBUG & DEBUG_VERBOSE) 
--      for (i = 0 ; i < state->cmd_size ; i++) 
--	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
--
--}
--
--static void radeonSaveHwState( radeonContextPtr rmesa )
--{
--   struct radeon_state_atom *atom;
--   char * dest = rmesa->backup_store.cmd_buf;
--
--   if (RADEON_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "%s\n", __FUNCTION__);
--   
--   rmesa->backup_store.cmd_used = 0;
--
--   foreach( atom, &rmesa->hw.atomlist ) {
--      if ( atom->check( rmesa->glCtx ) ) {
--	 int size = atom->cmd_size * 4;
--	 memcpy( dest, atom->cmd, size);
--	 dest += size;
--	 rmesa->backup_store.cmd_used += size;
--	 if (RADEON_DEBUG & DEBUG_STATE)
--	    print_state_atom( atom );
--      }
--   }
--
--   assert( rmesa->backup_store.cmd_used <= RADEON_CMD_BUF_SZ );
--   if (RADEON_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "Returning to radeonEmitState\n");
--}
--
--/* At this point we were in FlushCmdBufLocked but we had lost our context, so
-- * we need to unwire our current cmdbuf, hook the one with the saved state in
-- * it, flush it, and then put the current one back.  This is so commands at the
-- * start of a cmdbuf can rely on the state being kept from the previous one.
-- */
--static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
--{
--   GLuint nr_released_bufs;
--   struct radeon_store saved_store;
--
--   if (rmesa->backup_store.cmd_used == 0)
--      return;
--
--   if (RADEON_DEBUG & DEBUG_STATE)
--      fprintf(stderr, "Emitting backup state on lost context\n");
--
--   rmesa->lost_context = GL_FALSE;
--
--   nr_released_bufs = rmesa->dma.nr_released_bufs;
--   saved_store = rmesa->store;
--   rmesa->dma.nr_released_bufs = 0;
--   rmesa->store = rmesa->backup_store;
--   radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
--   rmesa->dma.nr_released_bufs = nr_released_bufs;
--   rmesa->store = saved_store;
--}
--
- /* =============================================================
-  * Kernel command buffer handling
-  */
-@@ -134,893 +66,308 @@ static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
- /* The state atoms will be emitted in the order they appear in the atom list,
-  * so this step is important.
-  */
--void radeonSetUpAtomList( radeonContextPtr rmesa )
-+void radeonSetUpAtomList( r100ContextPtr rmesa )
- {
--   int i, mtu = rmesa->glCtx->Const.MaxTextureUnits;
--
--   make_empty_list(&rmesa->hw.atomlist);
--   rmesa->hw.atomlist.name = "atom-list";
--
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ctx);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.set);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lin);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msk);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.vpt);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tcl);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msc);
-+   int i, mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
-+
-+   make_empty_list(&rmesa->radeon.hw.atomlist);
-+   rmesa->radeon.hw.atomlist.name = "atom-list";
-+
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc);
-    for (i = 0; i < mtu; ++i) {
--       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]);
--       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]);
--       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.cube[i]);
-+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]);
-+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]);
-+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]);
-    }
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl);
-    for (i = 0; i < 3 + mtu; ++i)
--      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mat[i]);
-+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]);
-    for (i = 0; i < 8; ++i)
--      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lit[i]);
-+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]);
-    for (i = 0; i < 6; ++i)
--      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ucp[i]);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.eye);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.grd);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.fog);
--   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.glt);
--}
--
--void radeonEmitState( radeonContextPtr rmesa )
--{
--   struct radeon_state_atom *atom;
--   char *dest;
--
--   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   if (rmesa->save_on_next_emit) {
--      radeonSaveHwState(rmesa);
--      rmesa->save_on_next_emit = GL_FALSE;
--   }
--
--   /* this code used to return here but now it emits zbs */
--
--   /* To avoid going across the entire set of states multiple times, just check
--    * for enough space for the case of emitting all state, and inline the
--    * radeonAllocCmdBuf code here without all the checks.
--    */
--   radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size);
--   dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
--
--   /* We always always emit zbs, this is due to a bug found by keithw in
--      the hardware and rediscovered after Erics changes by me.
--      if you ever touch this code make sure you emit zbs otherwise
--      you get tcl lockups on at least M7/7500 class of chips - airlied */
--   rmesa->hw.zbs.dirty=1;
--
--   if (RADEON_DEBUG & DEBUG_STATE) {
--      foreach(atom, &rmesa->hw.atomlist) {
--	 if (atom->dirty || rmesa->hw.all_dirty) {
--	    if (atom->check(rmesa->glCtx))
--	       print_state_atom(atom);
--	    else
--	       fprintf(stderr, "skip state %s\n", atom->name);
--	 }
--      }
--   }
--
--   foreach(atom, &rmesa->hw.atomlist) {
--      if (rmesa->hw.all_dirty)
--	 atom->dirty = GL_TRUE;
--      if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) &&
--	   atom->is_tcl)
--	 atom->dirty = GL_FALSE;
--      if (atom->dirty) {
--	 if (atom->check(rmesa->glCtx)) {
--	    int size = atom->cmd_size * 4;
--	    memcpy(dest, atom->cmd, size);
--	    dest += size;
--	    rmesa->store.cmd_used += size;
--	    atom->dirty = GL_FALSE;
--	 }
--      }
--   }
--
--   assert(rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ);
-- 
--   rmesa->hw.is_dirty = GL_FALSE;
--   rmesa->hw.all_dirty = GL_FALSE;
-+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog);
-+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt);
- }
- 
- /* Fire a section of the retained (indexed_verts) buffer as a regular
-  * primtive.  
-  */
--extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
-+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
- 				GLuint vertex_format,
- 				GLuint primitive,
- 				GLuint vertex_nr )
- {
--   drm_radeon_cmd_header_t *cmd;
--
-+   BATCH_LOCALS(&rmesa->radeon);
- 
-    assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-    
--   radeonEmitState( rmesa );
-+   radeonEmitState(&rmesa->radeon);
- 
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
--	      rmesa->store.cmd_used/4);
--   
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ,
--						       __FUNCTION__ );
- #if RADEON_OLD_PACKETS
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
--   cmd[2].i = rmesa->ioctl.vertex_offset;
--   cmd[3].i = vertex_nr;
--   cmd[4].i = vertex_format;
--   cmd[5].i = (primitive | 
--	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
--	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
--	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
--	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
--
--   if (RADEON_DEBUG & DEBUG_PRIMS)
--      fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
--	      __FUNCTION__,
--	      cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
--#else
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
--   cmd[2].i = vertex_format;
--   cmd[3].i = (primitive | 
--	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
--	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
--	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
--	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
--	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
--
--
--   if (RADEON_DEBUG & DEBUG_PRIMS)
--      fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
--	      __FUNCTION__,
--	      cmd[1].i, cmd[2].i, cmd[3].i);
-+   BEGIN_BATCH(8);
-+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3);
-+   OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
-+   OUT_BATCH(vertex_nr);
-+   OUT_BATCH(vertex_format);
-+   OUT_BATCH(primitive |  RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
-+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
-+	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-+   END_BATCH();
-+   
-+#else   
-+   BEGIN_BATCH(4);
-+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1);
-+   OUT_BATCH(vertex_format);
-+   OUT_BATCH(primitive |
-+	     RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
-+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-+	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
-+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
-+	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-+   END_BATCH();
- #endif
- }
- 
--
--void radeonFlushElts( radeonContextPtr rmesa )
-+void radeonFlushElts( GLcontext *ctx )
- {
--   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
--   int dwords;
--#if RADEON_OLD_PACKETS
--   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
--#else
--   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
--#endif
--
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&rmesa->radeon);
-+   int nr;
-+   uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start);
-+   int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw);
-+   
-    if (RADEON_DEBUG & DEBUG_IOCTL)
-       fprintf(stderr, "%s\n", __FUNCTION__);
- 
--   assert( rmesa->dma.flush == radeonFlushElts );
--   rmesa->dma.flush = NULL;
-+   assert( rmesa->radeon.dma.flush == radeonFlushElts );
-+   rmesa->radeon.dma.flush = NULL;
- 
--   /* Cope with odd number of elts:
--    */
--   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
--   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
-+   nr = rmesa->tcl.elt_used;
-+
-+   rmesa->radeon.cmdbuf.cs->cdw += dwords;
- 
- #if RADEON_OLD_PACKETS
--   cmd[1] |= (dwords - 3) << 16;
-+   cmd[1] |= (dwords + 3) << 16;
-    cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
- #else
--   cmd[1] |= (dwords - 3) << 16;
-+   cmd[1] |= (dwords + 2) << 16;
-    cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
- #endif
- 
-+   rmesa->radeon.cmdbuf.cs->section_cdw += dwords;
-+   END_BATCH();
-+
-    if (RADEON_DEBUG & DEBUG_SYNC) {
-       fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
--      radeonFinish( rmesa->glCtx );
-+      radeonFinish( rmesa->radeon.glCtx );
-    }
--}
- 
-+}
- 
--GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
-+GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
- 				    GLuint vertex_format,
- 				    GLuint primitive,
- 				    GLuint min_nr )
- {
--   drm_radeon_cmd_header_t *cmd;
-    GLushort *retval;
-+   int align_min_nr;
-+   BATCH_LOCALS(&rmesa->radeon);
- 
-    if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
-+      fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
- 
-    assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-    
--   radeonEmitState( rmesa );
-+   radeonEmitState(&rmesa->radeon);
-    
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa,
--						       ELTS_BUFSZ(min_nr),
--						       __FUNCTION__ );
-+   rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw;
-+
-+   /* round up min_nr to align the state */
-+   align_min_nr = (min_nr + 1) & ~1;
-+
- #if RADEON_OLD_PACKETS
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
--   cmd[2].i = rmesa->ioctl.vertex_offset;
--   cmd[3].i = 0xffff;
--   cmd[4].i = vertex_format;
--   cmd[5].i = (primitive | 
--	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
--	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
--	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
--
--   retval = (GLushort *)(cmd+6);
--#else   
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
--   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
--   cmd[2].i = vertex_format;
--   cmd[3].i = (primitive | 
--	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
--	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
--	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
--	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
--
--   retval = (GLushort *)(cmd+4);
-+   BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4);
-+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0);
-+   OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
-+   OUT_BATCH(0xffff);
-+   OUT_BATCH(vertex_format);
-+   OUT_BATCH(primitive | 
-+	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
-+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
-+
-+#else
-+   BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4);
-+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0);
-+   OUT_BATCH(vertex_format);
-+   OUT_BATCH(primitive | 
-+	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
-+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-+	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
-+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
- #endif
- 
--   if (RADEON_DEBUG & DEBUG_PRIMS)
--      fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
--	      __FUNCTION__,
--	      cmd[1].i, vertex_format, primitive);
- 
--   assert(!rmesa->dma.flush);
--   rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
--   rmesa->dma.flush = radeonFlushElts;
-+   rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw;
-+   rmesa->tcl.elt_used = min_nr;
- 
--   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
-+   retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset);
-+   
-+   if (RADEON_DEBUG & DEBUG_PRIMS)
-+      fprintf(stderr, "%s: header prim %x \n",
-+	      __FUNCTION__, primitive);
-+
-+   assert(!rmesa->radeon.dma.flush);
-+   rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-+   rmesa->radeon.dma.flush = radeonFlushElts;
- 
-    return retval;
- }
- 
--
--
--void radeonEmitVertexAOS( radeonContextPtr rmesa,
-+void radeonEmitVertexAOS( r100ContextPtr rmesa,
- 			  GLuint vertex_size,
-+			  struct radeon_bo *bo,
- 			  GLuint offset )
- {
- #if RADEON_OLD_PACKETS
--   rmesa->ioctl.vertex_size = vertex_size;
-    rmesa->ioctl.vertex_offset = offset;
-+   rmesa->ioctl.bo = bo;
- #else
--   drm_radeon_cmd_header_t *cmd;
-+   BATCH_LOCALS(&rmesa->radeon);
- 
-    if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
-       fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
- 	      __FUNCTION__, vertex_size, offset);
- 
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
--						  __FUNCTION__ );
-+   BEGIN_BATCH(7);
-+   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
-+   OUT_BATCH(1);
-+   OUT_BATCH(vertex_size | (vertex_size << 8));
-+   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
-+   END_BATCH();
- 
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
--   cmd[2].i = 1;
--   cmd[3].i = vertex_size | (vertex_size << 8);
--   cmd[4].i = offset;
- #endif
- }
- 		       
- 
--void radeonEmitAOS( radeonContextPtr rmesa,
--		    struct radeon_dma_region **component,
-+void radeonEmitAOS( r100ContextPtr rmesa,
- 		    GLuint nr,
- 		    GLuint offset )
- {
- #if RADEON_OLD_PACKETS
-    assert( nr == 1 );
--   assert( component[0]->aos_size == component[0]->aos_stride );
--   rmesa->ioctl.vertex_size = component[0]->aos_size;
-+   rmesa->ioctl.bo = rmesa->tcl.aos[0].bo;
-    rmesa->ioctl.vertex_offset = 
--      (component[0]->aos_start + offset * component[0]->aos_stride * 4);
-+     (rmesa->tcl.aos[0].offset + offset * rmesa->tcl.aos[0].stride * 4);
- #else
--   drm_radeon_cmd_header_t *cmd;
--   int sz = AOS_BUFSZ(nr);
-+   BATCH_LOCALS(&rmesa->radeon);
-+   uint32_t voffset;
-+   //   int sz = AOS_BUFSZ(nr);
-+   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
-    int i;
--   int *tmp;
- 
-    if (RADEON_DEBUG & DEBUG_IOCTL)
-       fprintf(stderr, "%s\n", __FUNCTION__);
- 
--
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz,
--						  __FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16);
--   cmd[2].i = nr;
--   tmp = &cmd[0].i;
--   cmd += 3;
--
--   for (i = 0 ; i < nr ; i++) {
--      if (i & 1) {
--	 cmd[0].i |= ((component[i]->aos_stride << 24) | 
--		      (component[i]->aos_size << 16));
--	 cmd[2].i = (component[i]->aos_start + 
--		     offset * component[i]->aos_stride * 4);
--	 cmd += 3;
--      }
--      else {
--	 cmd[0].i = ((component[i]->aos_stride << 8) | 
--		     (component[i]->aos_size << 0));
--	 cmd[1].i = (component[i]->aos_start + 
--		     offset * component[i]->aos_stride * 4);
--      }
--   }
--
--   if (RADEON_DEBUG & DEBUG_VERTS) {
--      fprintf(stderr, "%s:\n", __FUNCTION__);
--      for (i = 0 ; i < sz ; i++)
--	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
--   }
--#endif
--}
--
--/* using already shifted color_fmt! */
--void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */
--		   GLuint color_fmt,
--		   GLuint src_pitch,
--		   GLuint src_offset,
--		   GLuint dst_pitch,
--		   GLuint dst_offset,
--		   GLint srcx, GLint srcy,
--		   GLint dstx, GLint dsty,
--		   GLuint w, GLuint h )
--{
--   drm_radeon_cmd_header_t *cmd;
--
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
--	      __FUNCTION__, 
--	      src_pitch, src_offset, srcx, srcy,
--	      dst_pitch, dst_offset, dstx, dsty,
--	      w, h);
--
--   assert( (src_pitch & 63) == 0 );
--   assert( (dst_pitch & 63) == 0 );
--   assert( (src_offset & 1023) == 0 ); 
--   assert( (dst_offset & 1023) == 0 ); 
--   assert( w < (1<<16) );
--   assert( h < (1<<16) );
--
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
--						  __FUNCTION__ );
--
--
--   cmd[0].i = 0;
--   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
--   cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16);
--   cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
--	       RADEON_GMC_DST_PITCH_OFFSET_CNTL |
--	       RADEON_GMC_BRUSH_NONE |
--	       color_fmt |
--	       RADEON_GMC_SRC_DATATYPE_COLOR |
--	       RADEON_ROP3_S |
--	       RADEON_DP_SRC_SOURCE_MEMORY |
--	       RADEON_GMC_CLR_CMP_CNTL_DIS |
--	       RADEON_GMC_WR_MSK_DIS );
--
--   cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
--   cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
--   cmd[5].i = (srcx << 16) | srcy;
--   cmd[6].i = (dstx << 16) | dsty; /* dst */
--   cmd[7].i = (w << 16) | h;
--}
--
--
--void radeonEmitWait( radeonContextPtr rmesa, GLuint flags )
--{
--   drm_radeon_cmd_header_t *cmd;
--
--   assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
--
--   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
--					   __FUNCTION__ );
--   cmd[0].i = 0;
--   cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
--   cmd[0].wait.flags = flags;
--}
--
--
--static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
--				    const char * caller )
--{
--   int ret, i;
--   drm_radeon_cmd_buffer_t cmd;
--
--   if (rmesa->lost_context)
--      radeonBackUpAndEmitLostStateLocked(rmesa);
--
--   if (RADEON_DEBUG & DEBUG_IOCTL) {
--      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
--
--      if (RADEON_DEBUG & DEBUG_VERBOSE) 
--	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
--	    fprintf(stderr, "%d: %x\n", i/4, 
--		    *(int *)(&rmesa->store.cmd_buf[i]));
--   }
--
--   if (RADEON_DEBUG & DEBUG_DMA)
--      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
--	      rmesa->dma.nr_released_bufs);
--
--
--   if (RADEON_DEBUG & DEBUG_SANITY) {
--      if (rmesa->state.scissor.enabled) 
--	 ret = radeonSanityCmdBuffer( rmesa, 
--				      rmesa->state.scissor.numClipRects,
--				      rmesa->state.scissor.pClipRects);
--      else
--	 ret = radeonSanityCmdBuffer( rmesa, 
--				      rmesa->numClipRects,
--				      rmesa->pClipRects);
--      if (ret) {
--	 fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);	 
--	 goto out;
-+   BEGIN_BATCH(sz+2+(nr * 2));
-+   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
-+   OUT_BATCH(nr);
-+
-+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
-+		   (rmesa->tcl.aos[i].stride << 8) |
-+		   (rmesa->tcl.aos[i + 1].components << 16) |
-+		   (rmesa->tcl.aos[i + 1].stride << 24));
-+			
-+	 voffset =  rmesa->tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->tcl.aos[i].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-+	 voffset =  rmesa->tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->tcl.aos[i+1].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-       }
--   }
--
--
--   cmd.bufsz = rmesa->store.cmd_used;
--   cmd.buf = rmesa->store.cmd_buf;
--
--   if (rmesa->state.scissor.enabled) {
--      cmd.nbox = rmesa->state.scissor.numClipRects;
--      cmd.boxes = rmesa->state.scissor.pClipRects;
--   } else {
--      cmd.nbox = rmesa->numClipRects;
--      cmd.boxes = rmesa->pClipRects;
--   }
--
--   ret = drmCommandWrite( rmesa->dri.fd,
--			  DRM_RADEON_CMDBUF,
--			  &cmd, sizeof(cmd) );
--
--   if (ret)
--      fprintf(stderr, "drmCommandWrite: %d\n", ret);
--
--   if (RADEON_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
--      radeonWaitForIdleLocked( rmesa );
--   }
--
-- out:
--   rmesa->store.primnr = 0;
--   rmesa->store.statenr = 0;
--   rmesa->store.cmd_used = 0;
--   rmesa->dma.nr_released_bufs = 0;
--   rmesa->save_on_next_emit = 1;
--
--   return ret;
--}
--
--
--/* Note: does not emit any commands to avoid recursion on
-- * radeonAllocCmdBuf.
-- */
--void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
--{
--   int ret;
--
--	      
--   LOCK_HARDWARE( rmesa );
--
--   ret = radeonFlushCmdBufLocked( rmesa, caller );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if (ret) {
--      fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret);
--      exit(ret);
--   }
--}
--
--/* =============================================================
-- * Hardware vertex buffer handling
-- */
--
--
--void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
--{
--   struct radeon_dma_buffer *dmabuf;
--   int fd = rmesa->dri.fd;
--   int index = 0;
--   int size = 0;
--   drmDMAReq dma;
--   int ret;
--
--   if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
--      fprintf(stderr, "%s\n", __FUNCTION__);  
--
--   if (rmesa->dma.flush) {
--      rmesa->dma.flush( rmesa );
--   }
--
--   if (rmesa->dma.current.buf)
--      radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
--
--   if (rmesa->dma.nr_released_bufs > 4)
--      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
--
--   dma.context = rmesa->dri.hwContext;
--   dma.send_count = 0;
--   dma.send_list = NULL;
--   dma.send_sizes = NULL;
--   dma.flags = 0;
--   dma.request_count = 1;
--   dma.request_size = RADEON_BUFFER_SIZE;
--   dma.request_list = &index;
--   dma.request_sizes = &size;
--   dma.granted_count = 0;
--
--   LOCK_HARDWARE(rmesa);	/* no need to validate */
--
--   ret = drmDMA( fd, &dma );
-       
--   if (ret != 0) {
--      /* Free some up this way?
--       */
--      if (rmesa->dma.nr_released_bufs) {
--	 radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
-+      if (nr & 1) {
-+	 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
-+		   (rmesa->tcl.aos[nr - 1].stride << 8));
-+	 voffset =  rmesa->tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
-+	 OUT_BATCH_RELOC(voffset,
-+			 rmesa->tcl.aos[nr - 1].bo,
-+			 voffset,
-+			 RADEON_GEM_DOMAIN_GTT,
-+			 0, 0);
-       }
--      
--      if (RADEON_DEBUG & DEBUG_DMA)
--	 fprintf(stderr, "Waiting for buffers\n");
--
--      radeonWaitForIdleLocked( rmesa );
--      ret = drmDMA( fd, &dma );
--
--      if ( ret != 0 ) {
--	 UNLOCK_HARDWARE( rmesa );
--	 fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
--	 exit( -1 );
-+   } else {
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
-+		   (rmesa->tcl.aos[i].stride << 8) |
-+		   (rmesa->tcl.aos[i + 1].components << 16) |
-+		   (rmesa->tcl.aos[i + 1].stride << 24));
-+	 
-+	 voffset =  rmesa->tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
-+	 OUT_BATCH(voffset);
-+	 voffset =  rmesa->tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
-+	 OUT_BATCH(voffset);
-       }
--   }
--
--   UNLOCK_HARDWARE(rmesa);
--
--   if (RADEON_DEBUG & DEBUG_DMA)
--      fprintf(stderr, "Allocated buffer %d\n", index);
--
--   dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
--   dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
--   dmabuf->refcount = 1;
--
--   rmesa->dma.current.buf = dmabuf;
--   rmesa->dma.current.address = dmabuf->buf->address;
--   rmesa->dma.current.end = dmabuf->buf->total;
--   rmesa->dma.current.start = 0;
--   rmesa->dma.current.ptr = 0;
--
--   rmesa->c_vertexBuffers++;
--}
--
--void radeonReleaseDmaRegion( radeonContextPtr rmesa,
--			     struct radeon_dma_region *region,
--			     const char *caller )
--{
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
--   
--   if (!region->buf)
--      return;
--
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
--
--   if (--region->buf->refcount == 0) {
--      drm_radeon_cmd_header_t *cmd;
--
--      if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
--	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
--		 region->buf->buf->idx);  
-       
--      cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), 
--						     __FUNCTION__ );
--      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
--      cmd->dma.buf_idx = region->buf->buf->idx;
--      FREE(region->buf);
--      rmesa->dma.nr_released_bufs++;
--   }
--
--   region->buf = NULL;
--   region->start = 0;
--}
--
--/* Allocates a region from rmesa->dma.current.  If there isn't enough
-- * space in current, grab a new buffer (and discard what was left of current)
-- */
--void radeonAllocDmaRegion( radeonContextPtr rmesa, 
--			   struct radeon_dma_region *region,
--			   int bytes,
--			   int alignment )
--{
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
--
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
--
--   if (region->buf)
--      radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
--
--   alignment--;
--   rmesa->dma.current.start = rmesa->dma.current.ptr = 
--      (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
--      radeonRefillCurrentDmaRegion( rmesa );
--
--   region->start = rmesa->dma.current.start;
--   region->ptr = rmesa->dma.current.start;
--   region->end = rmesa->dma.current.start + bytes;
--   region->address = rmesa->dma.current.address;
--   region->buf = rmesa->dma.current.buf;
--   region->buf->refcount++;
--
--   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
--   rmesa->dma.current.start = 
--      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
--}
--
--/* ================================================================
-- * SwapBuffers with client-side throttling
-- */
--
--static uint32_t radeonGetLastFrame (radeonContextPtr rmesa) 
--{
--   drm_radeon_getparam_t gp;
--   int ret;
--   uint32_t frame;
--
--   gp.param = RADEON_PARAM_LAST_FRAME;
--   gp.value = (int *)&frame;
--   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
--			      &gp, sizeof(gp) );
--
--   if ( ret ) {
--      fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--
--   return frame;
--}
--
--static void radeonEmitIrqLocked( radeonContextPtr rmesa )
--{
--   drm_radeon_irq_emit_t ie;
--   int ret;
--
--   ie.irq_seq = &rmesa->iw.irq_seq;
--   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, 
--			      &ie, sizeof(ie) );
--   if ( ret ) {
--      fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--}
--
--
--static void radeonWaitIrq( radeonContextPtr rmesa )
--{
--   int ret;
--
--   do {
--      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
--			     &rmesa->iw, sizeof(rmesa->iw) );
--   } while (ret && (errno == EINTR || errno == EBUSY));
--
--   if ( ret ) {
--      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
--      exit(1);
--   }
--}
--
--
--static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
--{
--   drm_radeon_sarea_t *sarea = rmesa->sarea;
--
--   if (rmesa->do_irqs) {
--      if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
--	 if (!rmesa->irqsEmitted) {
--	    while (radeonGetLastFrame (rmesa) < sarea->last_frame)
--	       ;
--	 }
--	 else {
--	    UNLOCK_HARDWARE( rmesa ); 
--	    radeonWaitIrq( rmesa );	
--	    LOCK_HARDWARE( rmesa ); 
--	 }
--	 rmesa->irqsEmitted = 10;
-+      if (nr & 1) {
-+	 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
-+		   (rmesa->tcl.aos[nr - 1].stride << 8));
-+	 voffset =  rmesa->tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
-+	 OUT_BATCH(voffset);
-       }
--
--      if (rmesa->irqsEmitted) {
--	 radeonEmitIrqLocked( rmesa );
--	 rmesa->irqsEmitted--;
-+      for (i = 0; i + 1 < nr; i += 2) {
-+	 voffset =  rmesa->tcl.aos[i + 0].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->tcl.aos[i+0].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-+	 voffset =  rmesa->tcl.aos[i + 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->tcl.aos[i+1].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-       }
--   } 
--   else {
--      while (radeonGetLastFrame (rmesa) < sarea->last_frame) {
--	 UNLOCK_HARDWARE( rmesa ); 
--	 if (rmesa->do_usleeps) 
--	    DO_USLEEP( 1 );
--	 LOCK_HARDWARE( rmesa ); 
-+      if (nr & 1) {
-+	 voffset =  rmesa->tcl.aos[nr - 1].offset +
-+	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
-+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-+			       rmesa->tcl.aos[nr-1].bo,
-+			       RADEON_GEM_DOMAIN_GTT,
-+			       0, 0);
-       }
-    }
--}
--
--/* Copy the back color buffer to the front color buffer.
-- */
--void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
--		       const drm_clip_rect_t	  *rect)
--{
--   radeonContextPtr rmesa;
--   GLint nbox, i, ret;
--   GLboolean   missed_target;
--   int64_t ust;
--   __DRIscreenPrivate *psp;
--
--   assert(dPriv);
--   assert(dPriv->driContextPriv);
--   assert(dPriv->driContextPriv->driverPrivate);
--
--   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--
--   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
--      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
--   }
--
--   RADEON_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
--
--   /* Throttle the frame rate -- only allow one pending swap buffers
--    * request at a time.
--    */
--   radeonWaitForFrameCompletion( rmesa );
--   if (!rect)
--   {
--       UNLOCK_HARDWARE( rmesa );
--       driWaitForVBlank( dPriv, & missed_target );
--       LOCK_HARDWARE( rmesa );
--   }
-+   END_BATCH();
- 
--   nbox = dPriv->numClipRects; /* must be in locked region */
--
--   for ( i = 0 ; i < nbox ; ) {
--      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
--      drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
--      GLint n = 0;
--
--      for ( ; i < nr ; i++ ) {
--
--	  *b = box[i];
--
--	  if (rect)
--	  {
--	      if (rect->x1 > b->x1)
--		  b->x1 = rect->x1;
--	      if (rect->y1 > b->y1)
--		  b->y1 = rect->y1;
--	      if (rect->x2 < b->x2)
--		  b->x2 = rect->x2;
--	      if (rect->y2 < b->y2)
--		  b->y2 = rect->y2;
--
--	      if (b->x1 >= b->x2 || b->y1 >= b->y2)
--		  continue;
--	  }
--
--	  b++;
--	  n++;
--      }
--      rmesa->sarea->nbox = n;
--
--      if (!n)
--	 continue;
--
--      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
--
--      if ( ret ) {
--	 fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
--	 UNLOCK_HARDWARE( rmesa );
--	 exit( 1 );
--      }
--   }
--
--   UNLOCK_HARDWARE( rmesa );
--   if (!rect)
--   {
--       psp = dPriv->driScreenPriv;
--       rmesa->swap_count++;
--       (*psp->systemTime->getUST)( & ust );
--       if ( missed_target ) {
--	   rmesa->swap_missed_count++;
--	   rmesa->swap_missed_ust = ust - rmesa->swap_ust;
--       }
--
--       rmesa->swap_ust = ust;
--       rmesa->hw.all_dirty = GL_TRUE;
--   }
--}
--
--void radeonPageFlip( __DRIdrawablePrivate *dPriv )
--{
--   radeonContextPtr rmesa;
--   GLint ret;
--   GLboolean   missed_target;
--   __DRIscreenPrivate *psp;
--
--   assert(dPriv);
--   assert(dPriv->driContextPriv);
--   assert(dPriv->driContextPriv->driverPrivate);
--
--   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--   psp = dPriv->driScreenPriv;
--
--   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
--      fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
--	      rmesa->sarea->pfCurrentPage);
--   }
--
--   RADEON_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
--
--   /* Need to do this for the perf box placement:
--    */
--   if (dPriv->numClipRects)
--   {
--      drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
--      b[0] = box[0];
--      rmesa->sarea->nbox = 1;
--   }
--
--   /* Throttle the frame rate -- only allow a few pending swap buffers
--    * request at a time.
--    */
--   radeonWaitForFrameCompletion( rmesa );
--   UNLOCK_HARDWARE( rmesa );
--   driWaitForVBlank( dPriv, & missed_target );
--   if ( missed_target ) {
--      rmesa->swap_missed_count++;
--      (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
--   }
--   LOCK_HARDWARE( rmesa );
--
--   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if ( ret ) {
--      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
--      exit( 1 );
--   }
--
--   rmesa->swap_count++;
--   (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
--
--   /* Get ready for drawing next frame.  Update the renderbuffers'
--    * flippedOffset/Pitch fields so we draw into the right place.
--    */
--   driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
--                        rmesa->sarea->pfCurrentPage);
--
--   radeonUpdateDrawBuffer(rmesa->glCtx);
-+#endif
- }
- 
--
- /* ================================================================
-  * Buffer clear
-  */
-@@ -1028,9 +375,9 @@ void radeonPageFlip( __DRIdrawablePrivate *dPriv )
- 
- static void radeonClear( GLcontext *ctx, GLbitfield mask )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--   drm_radeon_sarea_t *sarea = rmesa->sarea;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-+   drm_radeon_sarea_t *sarea = rmesa->radeon.sarea;
-    uint32_t clear;
-    GLuint flags = 0;
-    GLuint color_mask = 0;
-@@ -1042,8 +389,8 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
-    }
- 
-    {
--      LOCK_HARDWARE( rmesa );
--      UNLOCK_HARDWARE( rmesa );
-+      LOCK_HARDWARE( &rmesa->radeon );
-+      UNLOCK_HARDWARE( &rmesa->radeon );
-       if ( dPriv->numClipRects == 0 ) 
- 	 return;
-    }
-@@ -1067,7 +414,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
-       mask &= ~BUFFER_BIT_DEPTH;
-    }
- 
--   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
-+   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->radeon.state.stencil.hwBuffer ) {
-       flags |= RADEON_STENCIL;
-       mask &= ~BUFFER_BIT_STENCIL;
-    }
-@@ -1083,16 +430,16 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
- 
-    if (rmesa->using_hyperz) {
-       flags |= RADEON_USE_COMP_ZBUF;
--/*      if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) 
-+/*      if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL) 
-          flags |= RADEON_USE_HIERZ; */
--      if (!(rmesa->state.stencil.hwBuffer) ||
-+      if (!(rmesa->radeon.state.stencil.hwBuffer) ||
- 	 ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
--	    ((rmesa->state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
-+	    ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
- 	  flags |= RADEON_CLEAR_FASTZ;
-       }
-    }
- 
--   LOCK_HARDWARE( rmesa );
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    /* compute region after locking: */
-    cx = ctx->DrawBuffer->_Xmin;
-@@ -1112,7 +459,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
- 
-       gp.param = RADEON_PARAM_LAST_CLEAR;
-       gp.value = (int *)&clear;
--      ret = drmCommandWriteRead( rmesa->dri.fd,
-+      ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
- 				 DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
- 
-       if ( ret ) {
-@@ -1124,20 +471,20 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
- 	 break;
-       }
- 
--      if ( rmesa->do_usleeps ) {
--	 UNLOCK_HARDWARE( rmesa );
-+      if ( rmesa->radeon.do_usleeps ) {
-+	 UNLOCK_HARDWARE( &rmesa->radeon );
- 	 DO_USLEEP( 1 );
--	 LOCK_HARDWARE( rmesa );
-+	 LOCK_HARDWARE( &rmesa->radeon );
-       }
-    }
- 
-    /* Send current state to the hardware */
--   radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
-+   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
- 
-    for ( i = 0 ; i < dPriv->numClipRects ; ) {
-       GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
-       drm_clip_rect_t *box = dPriv->pClipRects;
--      drm_clip_rect_t *b = rmesa->sarea->boxes;
-+      drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
-       drm_radeon_clear_t clear;
-       drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
-       GLint n = 0;
-@@ -1172,106 +519,40 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
- 	 }
-       }
- 
--      rmesa->sarea->nbox = n;
-+      rmesa->radeon.sarea->nbox = n;
- 
-       clear.flags       = flags;
--      clear.clear_color = rmesa->state.color.clear;
--      clear.clear_depth = rmesa->state.depth.clear;
-+      clear.clear_color = rmesa->radeon.state.color.clear;
-+      clear.clear_depth = rmesa->radeon.state.depth.clear;
-       clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
--      clear.depth_mask  = rmesa->state.stencil.clear;
-+      clear.depth_mask  = rmesa->radeon.state.stencil.clear;
-       clear.depth_boxes = depth_boxes;
- 
-       n--;
--      b = rmesa->sarea->boxes;
-+      b = rmesa->radeon.sarea->boxes;
-       for ( ; n >= 0 ; n-- ) {
- 	 depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
- 	 depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
- 	 depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2;
- 	 depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2;
- 	 depth_boxes[n].f[CLEAR_DEPTH] = 
--	    (float)rmesa->state.depth.clear;
-+	    (float)rmesa->radeon.state.depth.clear;
-       }
- 
--      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
-+      ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
- 			     &clear, sizeof(drm_radeon_clear_t));
- 
-       if ( ret ) {
--	 UNLOCK_HARDWARE( rmesa );
-+	 UNLOCK_HARDWARE( &rmesa->radeon );
- 	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
- 	 exit( 1 );
-       }
-    }
- 
--   UNLOCK_HARDWARE( rmesa );
--   rmesa->hw.all_dirty = GL_TRUE;
-+   UNLOCK_HARDWARE( &rmesa->radeon );
-+   rmesa->radeon.hw.all_dirty = GL_TRUE;
- }
- 
--
--void radeonWaitForIdleLocked( radeonContextPtr rmesa )
--{
--    int fd = rmesa->dri.fd;
--    int to = 0;
--    int ret, i = 0;
--
--    rmesa->c_drawWaits++;
--
--    do {
--        do {
--            ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE);
--        } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY );
--    } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) );
--
--    if ( ret < 0 ) {
--	UNLOCK_HARDWARE( rmesa );
--	fprintf( stderr, "Error: Radeon timed out... exiting\n" );
--	exit( -1 );
--    }
--}
--
--
--static void radeonWaitForIdle( radeonContextPtr rmesa )
--{
--   LOCK_HARDWARE(rmesa);
--   radeonWaitForIdleLocked( rmesa );
--   UNLOCK_HARDWARE(rmesa);
--}
--
--
--void radeonFlush( GLcontext *ctx )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
--
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
--
--   radeonEmitState( rmesa );
--   
--   if (rmesa->store.cmd_used)
--      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
--}
--
--/* Make sure all commands have been sent to the hardware and have
-- * completed processing.
-- */
--void radeonFinish( GLcontext *ctx )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   radeonFlush( ctx );
--
--   if (rmesa->do_irqs) {
--      LOCK_HARDWARE( rmesa );
--      radeonEmitIrqLocked( rmesa );
--      UNLOCK_HARDWARE( rmesa );
--      radeonWaitIrq( rmesa );
--   }
--   else
--      radeonWaitForIdle( rmesa );
--}
--
--
- void radeonInitIoctlFuncs( GLcontext *ctx )
- {
-     ctx->Driver.Clear = radeonClear;
-diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
-index 4e3a44d..18805d4 100644
---- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
-@@ -38,31 +38,32 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "main/simple_list.h"
- #include "radeon_lock.h"
-+#include "radeon_bocs_wrapper.h"
- 
--
--extern void radeonEmitState( radeonContextPtr rmesa );
--extern void radeonEmitVertexAOS( radeonContextPtr rmesa,
-+extern void radeonEmitVertexAOS( r100ContextPtr rmesa,
- 				 GLuint vertex_size,
-+				 struct radeon_bo *bo,
- 				 GLuint offset );
- 
--extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
-+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
- 				GLuint vertex_format,
- 				GLuint primitive,
- 				GLuint vertex_nr );
- 
--extern void radeonFlushElts( radeonContextPtr rmesa );
-+extern void radeonFlushElts( GLcontext *ctx );
-+			    
- 
--extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
-+extern GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
- 					   GLuint vertex_format,
- 					   GLuint primitive,
- 					   GLuint min_nr );
- 
--extern void radeonEmitAOS( radeonContextPtr rmesa,
--			   struct radeon_dma_region **regions,
-+
-+extern void radeonEmitAOS( r100ContextPtr rmesa,
- 			   GLuint n,
- 			   GLuint offset );
- 
--extern void radeonEmitBlit( radeonContextPtr rmesa,
-+extern void radeonEmitBlit( r100ContextPtr rmesa,
- 			    GLuint color_fmt,
- 			    GLuint src_pitch,
- 			    GLuint src_offset,
-@@ -72,30 +73,15 @@ extern void radeonEmitBlit( radeonContextPtr rmesa,
- 			    GLint dstx, GLint dsty,
- 			    GLuint w, GLuint h );
- 
--extern void radeonEmitWait( radeonContextPtr rmesa, GLuint flags );
--
--extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * );
--extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa );
-+extern void radeonEmitWait( r100ContextPtr rmesa, GLuint flags );
- 
--extern void radeonAllocDmaRegion( radeonContextPtr rmesa,
--				  struct radeon_dma_region *region,
--				  int bytes, 
--				  int alignment );
-+extern void radeonFlushCmdBuf( r100ContextPtr rmesa, const char * );
- 
--extern void radeonReleaseDmaRegion( radeonContextPtr rmesa,
--				    struct radeon_dma_region *region,
--				    const char *caller );
--
--extern void radeonCopyBuffer( __DRIdrawablePrivate *drawable,
--			      const drm_clip_rect_t	 *rect);
--extern void radeonPageFlip( __DRIdrawablePrivate *drawable );
- extern void radeonFlush( GLcontext *ctx );
- extern void radeonFinish( GLcontext *ctx );
--extern void radeonWaitForIdleLocked( radeonContextPtr rmesa );
--extern void radeonWaitForVBlank( radeonContextPtr rmesa );
- extern void radeonInitIoctlFuncs( GLcontext *ctx );
--extern void radeonGetAllParams( radeonContextPtr rmesa );
--extern void radeonSetUpAtomList( radeonContextPtr rmesa );
-+extern void radeonGetAllParams( r100ContextPtr rmesa );
-+extern void radeonSetUpAtomList( r100ContextPtr rmesa );
- 
- /* ================================================================
-  * Helper macros:
-@@ -105,33 +91,33 @@ extern void radeonSetUpAtomList( radeonContextPtr rmesa );
-  */
- #define RADEON_NEWPRIM( rmesa )			\
- do {						\
--   if ( rmesa->dma.flush )			\
--      rmesa->dma.flush( rmesa );	\
-+   if ( rmesa->radeon.dma.flush )			\
-+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
- } while (0)
- 
- /* Can accomodate several state changes and primitive changes without
-  * actually firing the buffer.
-  */
-+
- #define RADEON_STATECHANGE( rmesa, ATOM )			\
- do {								\
-    RADEON_NEWPRIM( rmesa );					\
-    rmesa->hw.ATOM.dirty = GL_TRUE;				\
--   rmesa->hw.is_dirty = GL_TRUE;				\
-+   rmesa->radeon.hw.is_dirty = GL_TRUE;				\
- } while (0)
- 
--#define RADEON_DB_STATE( ATOM )			        \
-+#define RADEON_DB_STATE( ATOM )				\
-    memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd,	\
- 	   rmesa->hw.ATOM.cmd_size * 4)
- 
--static INLINE int RADEON_DB_STATECHANGE( 
--   radeonContextPtr rmesa,
--   struct radeon_state_atom *atom )
-+static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa,
-+					struct radeon_state_atom *atom )
- {
-    if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
--      int *tmp;
-+      GLuint *tmp;
-       RADEON_NEWPRIM( rmesa );
-       atom->dirty = GL_TRUE;
--      rmesa->hw.is_dirty = GL_TRUE;
-+      rmesa->radeon.hw.is_dirty = GL_TRUE;
-       tmp = atom->cmd; 
-       atom->cmd = atom->lastcmd;
-       atom->lastcmd = tmp;
-@@ -141,16 +127,6 @@ static INLINE int RADEON_DB_STATECHANGE(
-       return 0;
- }
- 
--
--/* Fire the buffered vertices no matter what.
-- */
--#define RADEON_FIREVERTICES( rmesa )			\
--do {							\
--   if ( rmesa->store.cmd_used || rmesa->dma.flush ) {	\
--      radeonFlush( rmesa->glCtx );			\
--   }							\
--} while (0)
--
- /* Command lengths.  Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
-  * are available, you will also be adding an rmesa->state.max_state_size because
-  * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
-@@ -167,36 +143,37 @@ do {							\
- #define VBUF_BUFSZ	(4 * sizeof(int))
- #endif
- 
--/* Ensure that a minimum amount of space is available in the command buffer.
-- * This is used to ensure atomicity of state updates with the rendering requests
-- * that rely on them.
-- *
-- * An alternative would be to implement a "soft lock" such that when the buffer
-- * wraps at an inopportune time, we grab the lock, flush the current buffer,
-- * and hang on to the lock until the critical section is finished and we flush
-- * the buffer again and unlock.
-- */
--static INLINE void radeonEnsureCmdBufSpace( radeonContextPtr rmesa,
--					      int bytes )
--{
--   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
--      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
--   assert( bytes <= RADEON_CMD_BUF_SZ );
--}
- 
--/* Alloc space in the command buffer
-- */
--static INLINE char *radeonAllocCmdBuf( radeonContextPtr rmesa,
--					 int bytes, const char *where )
-+static inline uint32_t cmdpacket3(int cmd_type)
- {
--   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
--      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
-+  drm_radeon_cmd_header_t cmd;
-+
-+  cmd.i = 0;
-+  cmd.header.cmd_type = cmd_type;
-+
-+  return (uint32_t)cmd.i;
- 
--   {
--      char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
--      rmesa->store.cmd_used += bytes;
--      return head;
--   }
- }
- 
-+#define OUT_BATCH_PACKET3(packet, num_extra) do {	      \
-+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
-+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3));				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    } else {						      \
-+      OUT_BATCH(CP_PACKET2);				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    }							      \
-+  } while(0)
-+
-+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {	      \
-+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
-+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP));	      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    } else {						      \
-+      OUT_BATCH(CP_PACKET2);				      \
-+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
-+    }							      \
-+  } while(0)
-+
-+
- #endif /* __RADEON_IOCTL_H__ */
-diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
-index 64bb3ca..9a7e76b 100644
---- a/src/mesa/drivers/dri/radeon/radeon_lock.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
-@@ -41,12 +41,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "main/glheader.h"
- #include "main/mtypes.h"
--#include "radeon_context.h"
-+#include "main/colormac.h"
-+#include "dri_util.h"
-+#include "radeon_screen.h"
-+#include "radeon_common.h"
- #include "radeon_lock.h"
--#include "radeon_tex.h"
--#include "radeon_state.h"
--#include "radeon_ioctl.h"
--
- #include "drirenderbuffer.h"
- 
- #if DEBUG_LOCKING
-@@ -56,13 +55,28 @@ int prevLockLine = 0;
- 
- /* Turn on/off page flipping according to the flags in the sarea:
-  */
--static void radeonUpdatePageFlipping(radeonContextPtr rmesa)
-+void radeonUpdatePageFlipping(radeonContextPtr rmesa)
- {
-+	int use_back;
-+	__DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
-+	GLframebuffer *fb = drawable->driverPrivate;
-+
- 	rmesa->doPageFlip = rmesa->sarea->pfState;
- 	if (rmesa->glCtx->WinSysDrawBuffer) {
--		driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
--				     rmesa->sarea->pfCurrentPage);
-+		rmesa->vtbl.update_draw_buffer(rmesa->glCtx);
- 	}
-+
-+	use_back = rmesa->glCtx->DrawBuffer ?
-+	    (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] ==
-+	     BUFFER_BACK_LEFT) : 1;
-+	use_back ^= (rmesa->sarea->pfCurrentPage == 1);
-+
-+	if (use_back)
-+		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+	else
-+		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+
-+	rmesa->state.depth.rrb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
- }
- 
- /* Update the hardware state.  This is called if another context has
-@@ -80,6 +94,8 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
- 	__DRIscreenPrivate *sPriv = rmesa->dri.screen;
- 	drm_radeon_sarea_t *sarea = rmesa->sarea;
- 
-+	assert(drawable != NULL);
-+
- 	drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
- 
- 	/* The window might have moved, so we might need to get new clip
-@@ -98,27 +114,11 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
- 	if (rmesa->lastStamp != drawable->lastStamp) {
- 		radeonUpdatePageFlipping(rmesa);
- 		radeonSetCliprects(rmesa);
--		radeonUpdateViewportOffset(rmesa->glCtx);
-+		rmesa->vtbl.update_viewport_offset(rmesa->glCtx);
- 		driUpdateFramebufferSize(rmesa->glCtx, drawable);
- 	}
- 
--	RADEON_STATECHANGE(rmesa, ctx);
--	if (rmesa->sarea->tiling_enabled) {
--		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
--		    RADEON_COLOR_TILE_ENABLE;
--	} else {
--		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
--		    ~RADEON_COLOR_TILE_ENABLE;
--	}
--
--	if (sarea->ctx_owner != rmesa->dri.hwContext) {
--		int i;
--		sarea->ctx_owner = rmesa->dri.hwContext;
--
--		for (i = 0; i < rmesa->nr_heaps; i++) {
--			DRI_AGE_TEXTURES(rmesa->texture_heaps[i]);
--		}
--	}
-+	rmesa->vtbl.get_lock(rmesa);
- 
- 	rmesa->lost_context = GL_TRUE;
- }
-diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h
-index 86e96aa..f5ebb8d 100644
---- a/src/mesa/drivers/dri/radeon/radeon_lock.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_lock.h
-@@ -39,8 +39,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  *   Kevin E. Martin <martin@valinux.com>
-  */
- 
--#ifndef __RADEON_LOCK_H__
--#define __RADEON_LOCK_H__
-+#ifndef COMMON_LOCK_H
-+#define COMMON_LOCK_H
-+
-+#include "main/colormac.h"
-+#include "radeon_screen.h"
-+#include "radeon_common.h"
- 
- extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
- 
-@@ -94,19 +98,23 @@ extern int prevLockLine;
-    do {								\
-       char __ret = 0;						\
-       DEBUG_CHECK_LOCK();					\
--      DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext,		\
--	       (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret );	\
--      if ( __ret )						\
--	 radeonGetLock( (rmesa), 0 );				\
--      DEBUG_LOCK();						\
-+      if (!(rmesa)->radeonScreen->driScreen->dri2.enabled) {		\
-+	DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext,		\
-+		 (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret );	\
-+	if ( __ret )							\
-+	  radeonGetLock( (rmesa), 0 );					\
-+      }									\
-+      DEBUG_LOCK();							\
-    } while (0)
- 
- #define UNLOCK_HARDWARE( rmesa )					\
-    do {									\
--      DRM_UNLOCK( (rmesa)->dri.fd,					\
--		  (rmesa)->dri.hwLock,					\
--		  (rmesa)->dri.hwContext );				\
--      DEBUG_RESET();							\
-+     if (!(rmesa)->radeonScreen->driScreen->dri2.enabled) {		\
-+       DRM_UNLOCK( (rmesa)->dri.fd,					\
-+		   (rmesa)->dri.hwLock,					\
-+		   (rmesa)->dri.hwContext );				\
-+       DEBUG_RESET();							\
-+     }									\
-    } while (0)
- 
--#endif				/* __RADEON_LOCK_H__ */
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
-index 31eea13..7f5da16 100644
---- a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
-@@ -48,160 +48,35 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_maos.h"
- #include "radeon_tcl.h"
- 
--#if 0
--/* Usage:
-- *   - from radeon_tcl_render
-- *   - call radeonEmitArrays to ensure uptodate arrays in dma
-- *   - emit primitives (new type?) which reference the data
-- *       -- need to use elts for lineloop, quads, quadstrip/flat
-- *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
-- *
-- */
--static void emit_ubyte_rgba3( GLcontext *ctx,
--		       struct radeon_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
-+static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
-+			GLvoid *data, int stride, int count)
- {
-    int i;
--   radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d out %p\n",
--	      __FUNCTION__, count, stride, (void *)out);
--
--   for (i = 0; i < count; i++) {
--      out->red   = *data;
--      out->green = *(data+1);
--      out->blue  = *(data+2);
--      out->alpha = 0xFF;
--      out++;
--      data += stride;
--   }
--}
--
--static void emit_ubyte_rgba4( GLcontext *ctx,
--			      struct radeon_dma_region *rvb,
--			      char *data,
--			      int stride,
--			      int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
-+   uint32_t *out;
-+   int size = 1;
-+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- 
-    if (RADEON_DEBUG & DEBUG_VERTS)
-       fprintf(stderr, "%s count %d stride %d\n",
- 	      __FUNCTION__, count, stride);
- 
--   if (stride == 4)
--       COPY_DWORDS( out, data, count );
--   else
--      for (i = 0; i < count; i++) {
--	 *out++ = LE32_TO_CPU(*(int *)data);
--	 data += stride;
--      }
--}
--
--
--static void emit_ubyte_rgba( GLcontext *ctx,
--			     struct radeon_dma_region *rvb,
--			     char *data,
--			     int size,
--			     int stride,
--			     int count )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
--
--   assert (!rvb->buf);
--
-    if (stride == 0) {
--      radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
-+      radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
-       count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = 1;
-+      aos->stride = 0;
-    }
-    else {
--      radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 1;
--      rvb->aos_size = 1;
-+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
-+      aos->stride = size;
-    }
- 
--   /* Emit the data
--    */
--   switch (size) {
--   case 3:
--      emit_ubyte_rgba3( ctx, rvb, data, stride, count );
--      break;
--   case 4:
--      emit_ubyte_rgba4( ctx, rvb, data, stride, count );
--      break;
--   default:
--      assert(0);
--      exit(1);
--      break;
--   }
--}
--#endif
--
--#if defined(USE_X86_ASM)
--#define COPY_DWORDS( dst, src, nr )					\
--do {									\
--	int __tmp;							\
--	__asm__ __volatile__( "rep ; movsl"				\
--			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
--			      : "0" (nr),				\
--			        "D" ((long)dst),			\
--			        "S" ((long)src) );			\
--} while (0)
--#else
--#define COPY_DWORDS( dst, src, nr )		\
--do {						\
--   int j;					\
--   for ( j = 0 ; j < nr ; j++ )			\
--      dst[j] = ((int *)src)[j];			\
--   dst += nr;					\
--} while (0)
--#endif
--
--static void emit_vecfog( GLcontext *ctx,
--			 struct radeon_dma_region *rvb,
--			 char *data,
--			 int stride,
--			 int count )
--{
--   int i;
--   GLfloat *out;
--
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   aos->components = size;
-+   aos->count = count;
- 
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   assert (!rvb->buf);
--
--   if (stride == 0) {
--      radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
--      count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = 1;
--   }
--   else {
--      radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 1;
--      rvb->aos_size = 1;
--   }
- 
-    /* Emit the data
-     */
--   out = (GLfloat *)(rvb->address + rvb->start);
-+   out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
-    for (i = 0; i < count; i++) {
-       out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
-       out++;
-@@ -209,169 +84,9 @@ static void emit_vecfog( GLcontext *ctx,
-    }
- }
- 
--static void emit_vec4( GLcontext *ctx,
--		       struct radeon_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 4)
--      COPY_DWORDS( out, data, count );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out++;
--	 data += stride;
--      }
--}
--
--
--static void emit_vec8( GLcontext *ctx,
--		       struct radeon_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 8)
--      COPY_DWORDS( out, data, count*2 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out += 2;
--	 data += stride;
--      }
--}
--
--static void emit_vec12( GLcontext *ctx,
--		       struct radeon_dma_region *rvb,
--		       char *data,
--		       int stride,
--		       int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d out %p data %p\n",
--	      __FUNCTION__, count, stride, (void *)out, (void *)data);
--
--   if (stride == 12)
--      COPY_DWORDS( out, data, count*3 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out[2] = *(int *)(data+8);
--	 out += 3;
--	 data += stride;
--      }
--}
--
--static void emit_vec16( GLcontext *ctx,
--			struct radeon_dma_region *rvb,
--			char *data,
--			int stride,
--			int count )
--{
--   int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d stride %d\n",
--	      __FUNCTION__, count, stride);
--
--   if (stride == 16)
--      COPY_DWORDS( out, data, count*4 );
--   else
--      for (i = 0; i < count; i++) {
--	 out[0] = *(int *)data;
--	 out[1] = *(int *)(data+4);
--	 out[2] = *(int *)(data+8);
--	 out[3] = *(int *)(data+12);
--	 out += 4;
--	 data += stride;
--      }
--}
--
--
--static void emit_vector( GLcontext *ctx,
--			 struct radeon_dma_region *rvb,
--			 char *data,
--			 int size,
--			 int stride,
--			 int count )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if (RADEON_DEBUG & DEBUG_VERTS)
--      fprintf(stderr, "%s count %d size %d stride %d\n",
--	      __FUNCTION__, count, size, stride);
--
--   assert (!rvb->buf);
--
--   if (stride == 0) {
--      radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
--      count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = size;
--   }
--   else {
--      radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 );	/* alignment? */
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = size;
--      rvb->aos_size = size;
--   }
--
--   /* Emit the data
--    */
--   switch (size) {
--   case 1:
--      emit_vec4( ctx, rvb, data, stride, count );
--      break;
--   case 2:
--      emit_vec8( ctx, rvb, data, stride, count );
--      break;
--   case 3:
--      emit_vec12( ctx, rvb, data, stride, count );
--      break;
--   case 4:
--      emit_vec16( ctx, rvb, data, stride, count );
--      break;
--   default:
--      assert(0);
--      exit(1);
--      break;
--   }
--
--}
--
--
--
--static void emit_s0_vec( GLcontext *ctx,
--			 struct radeon_dma_region *rvb,
--			 char *data,
--			 int stride,
--			 int count )
-+static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count)
- {
-    int i;
--   int *out = (int *)(rvb->address + rvb->start);
--
-    if (RADEON_DEBUG & DEBUG_VERTS)
-       fprintf(stderr, "%s count %d stride %d\n",
- 	      __FUNCTION__, count, stride);
-@@ -384,14 +99,9 @@ static void emit_s0_vec( GLcontext *ctx,
-    }
- }
- 
--static void emit_stq_vec( GLcontext *ctx,
--			 struct radeon_dma_region *rvb,
--			 char *data,
--			 int stride,
--			 int count )
-+static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count)
- {
-    int i;
--   int *out = (int *)(rvb->address + rvb->start);
- 
-    if (RADEON_DEBUG & DEBUG_VERTS)
-       fprintf(stderr, "%s count %d stride %d\n",
-@@ -409,21 +119,16 @@ static void emit_stq_vec( GLcontext *ctx,
- 
- 
- 
--static void emit_tex_vector( GLcontext *ctx,
--			     struct radeon_dma_region *rvb,
--			     char *data,
--			     int size,
--			     int stride,
--			     int count )
-+static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos,
-+			    GLvoid *data, int size, int stride, int count)
- {
-    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-    int emitsize;
-+   uint32_t *out;
- 
-    if (RADEON_DEBUG & DEBUG_VERTS)
-       fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
- 
--   assert (!rvb->buf);
--
-    switch (size) {
-    case 4: emitsize = 3; break;
-    case 3: emitsize = 3; break;
-@@ -432,34 +137,33 @@ static void emit_tex_vector( GLcontext *ctx,
- 
- 
-    if (stride == 0) {
--      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
-+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * 4, 32);
-       count = 1;
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = 0;
--      rvb->aos_size = emitsize;
-+      aos->stride = 0;
-    }
-    else {
--      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
--      rvb->aos_start = GET_START(rvb);
--      rvb->aos_stride = emitsize;
--      rvb->aos_size = emitsize;
-+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * count * 4, 32);
-+      aos->stride = emitsize;
-    }
- 
-+   aos->components = emitsize;
-+   aos->count = count;
- 
-    /* Emit the data
-     */
-+   out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
-    switch (size) {
-    case 1:
--      emit_s0_vec( ctx, rvb, data, stride, count ); 
-+      emit_s0_vec( out, data, stride, count );
-       break;
-    case 2:
--      emit_vec8( ctx, rvb, data, stride, count );
-+      radeonEmitVec8( out, data, stride, count );
-       break;
-    case 3:
--      emit_vec12( ctx, rvb, data, stride, count );
-+      radeonEmitVec12( out, data, stride, count );
-       break;
-    case 4:
--      emit_stq_vec( ctx, rvb, data, stride, count );
-+      emit_stq_vec( out, data, stride, count );
-       break;
-    default:
-       assert(0);
-@@ -476,9 +180,8 @@ static void emit_tex_vector( GLcontext *ctx,
-  */
- void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-    struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
--   struct radeon_dma_region **component = rmesa->tcl.aos_components;
-    GLuint nr = 0;
-    GLuint vfmt = 0;
-    GLuint count = VB->Count;
-@@ -491,12 +194,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- 
-    if (1) {
-       if (!rmesa->tcl.obj.buf) 
--	 emit_vector( ctx, 
--		      &rmesa->tcl.obj, 
--		      (char *)VB->ObjPtr->data,
--		      VB->ObjPtr->size,
--		      VB->ObjPtr->stride,
--		      count);
-+	rcommon_emit_vector( ctx, 
-+			     &(rmesa->tcl.aos[nr]),
-+			     (char *)VB->ObjPtr->data,
-+			     VB->ObjPtr->size,
-+			     VB->ObjPtr->stride,
-+			     count);
- 
-       switch( VB->ObjPtr->size ) {
-       case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
-@@ -505,21 +208,21 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
-       default:
-          break;
-       }
--      component[nr++] = &rmesa->tcl.obj;
-+      nr++;
-    }
-    
- 
-    if (inputs & VERT_BIT_NORMAL) {
-       if (!rmesa->tcl.norm.buf)
--	 emit_vector( ctx, 
--		      &(rmesa->tcl.norm), 
--		      (char *)VB->NormalPtr->data,
--		      3,
--		      VB->NormalPtr->stride,
--		      count);
-+	 rcommon_emit_vector( ctx, 
-+			      &(rmesa->tcl.aos[nr]),
-+			      (char *)VB->NormalPtr->data,
-+			      3,
-+			      VB->NormalPtr->stride,
-+			      count);
- 
-       vfmt |= RADEON_CP_VC_FRMT_N0;
--      component[nr++] = &rmesa->tcl.norm;
-+      nr++;
-    }
- 
-    if (inputs & VERT_BIT_COLOR0) {
-@@ -537,31 +240,30 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
-       }
- 
-       if (!rmesa->tcl.rgba.buf)
--	 emit_vector( ctx,
--		      &(rmesa->tcl.rgba),
--		      (char *)VB->ColorPtr[0]->data,
--		      emitsize,
--		      VB->ColorPtr[0]->stride,
--		      count);
--
--
--      component[nr++] = &rmesa->tcl.rgba;
-+	rcommon_emit_vector( ctx,
-+			     &(rmesa->tcl.aos[nr]),
-+			     (char *)VB->ColorPtr[0]->data,
-+			     emitsize,
-+			     VB->ColorPtr[0]->stride,
-+			     count);
-+
-+      nr++;
-    }
- 
- 
-    if (inputs & VERT_BIT_COLOR1) {
-       if (!rmesa->tcl.spec.buf) {
- 
--	 emit_vector( ctx,
--		      &rmesa->tcl.spec,
--		      (char *)VB->SecondaryColorPtr[0]->data,
--		      3,
--		      VB->SecondaryColorPtr[0]->stride,
--		      count);
-+	rcommon_emit_vector( ctx,
-+			     &(rmesa->tcl.aos[nr]),
-+			     (char *)VB->SecondaryColorPtr[0]->data,
-+			     3,
-+			     VB->SecondaryColorPtr[0]->stride,
-+			     count);
-       }
- 
-       vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
--      component[nr++] = &rmesa->tcl.spec;
-+      nr++;
-    }
- 
- /* FIXME: not sure if this is correct. May need to stitch this together with
-@@ -570,13 +272,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
-    if (inputs & VERT_BIT_FOG) {
-       if (!rmesa->tcl.fog.buf)
- 	 emit_vecfog( ctx,
--		      &(rmesa->tcl.fog),
-+		      &(rmesa->tcl.aos[nr]),
- 		      (char *)VB->FogCoordPtr->data,
- 		      VB->FogCoordPtr->stride,
- 		      count);
- 
-       vfmt |= RADEON_CP_VC_FRMT_FPFOG;
--      component[nr++] = &rmesa->tcl.fog;
-+      nr++;
-    }
- 
- 
-@@ -587,11 +289,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
-       if (inputs & VERT_BIT_TEX(unit)) {
- 	 if (!rmesa->tcl.tex[unit].buf)
- 	    emit_tex_vector( ctx,
--			     &(rmesa->tcl.tex[unit]),
-+			     &(rmesa->tcl.aos[nr]),
- 			     (char *)VB->TexCoordPtr[unit]->data,
- 			     VB->TexCoordPtr[unit]->size,
- 			     VB->TexCoordPtr[unit]->stride,
- 			     count );
-+	 nr++;
- 
- 	 vfmt |= RADEON_ST_BIT(unit);
-          /* assume we need the 3rd coord if texgen is active for r/q OR at least
-@@ -609,7 +312,6 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- 		 (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
- 	       radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
- 	 }
--	 component[nr++] = &rmesa->tcl.tex[unit];
-       }
-    }
- 
-@@ -625,31 +327,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- 
- void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
--   GLuint unit;
--
--#if 0
--   if (RADEON_DEBUG & DEBUG_VERTS) 
--      _tnl_print_vert_flags( __FUNCTION__, newinputs );
--#endif
--
--   if (newinputs & VERT_BIT_POS) 
--     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
--
--   if (newinputs & VERT_BIT_NORMAL) 
--      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
--
--   if (newinputs & VERT_BIT_COLOR0) 
--      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
--
--   if (newinputs & VERT_BIT_COLOR1) 
--      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
--      
--   if (newinputs & VERT_BIT_FOG)
--      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-+   int i;
- 
--   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
--      if (newinputs & VERT_BIT_TEX(unit))
--         radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
-+   for (i = 0; i < rmesa->tcl.nr_aos_components; i++) {
-+     if (rmesa->tcl.aos[i].bo) {
-+       radeon_bo_unref(rmesa->tcl.aos[i].bo);
-+       rmesa->tcl.aos[i].bo = NULL;
-+     }
-    }
- }
-diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
-index 126d072..d468a97 100644
---- a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
-@@ -310,7 +310,7 @@ static void init_tcl_verts( void )
- 
- void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
-    GLuint req = 0;
-    GLuint unit;
-@@ -374,14 +374,15 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- 	 break;
- 
-    if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
--       rmesa->tcl.indexed_verts.buf)
-+       rmesa->tcl.aos[0].bo)
-       return;
- 
--   if (rmesa->tcl.indexed_verts.buf)
-+   if (rmesa->tcl.aos[0].bo)
-       radeonReleaseArrays( ctx, ~0 );
- 
--   radeonAllocDmaRegion( rmesa,
--			 &rmesa->tcl.indexed_verts, 
-+   radeonAllocDmaRegion( &rmesa->radeon,
-+			 &rmesa->tcl.aos[0].bo,
-+			 &rmesa->tcl.aos[0].offset,
- 			 VB->Count * setup_tab[i].vertex_size * 4, 
- 			 4);
- 
-@@ -421,15 +422,11 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- 
- 
-    setup_tab[i].emit( ctx, 0, VB->Count, 
--		      rmesa->tcl.indexed_verts.address + 
--		      rmesa->tcl.indexed_verts.start );
-+		      rmesa->tcl.aos[0].bo->ptr + rmesa->tcl.aos[0].offset);
- 
-+   //   rmesa->tcl.aos[0].size = setup_tab[i].vertex_size;
-+   rmesa->tcl.aos[0].stride = setup_tab[i].vertex_size;
-    rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
--   rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
--   rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
--   rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
--
--   rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts;
-    rmesa->tcl.nr_aos_components = 1;
- }
- 
-@@ -437,13 +434,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
- 
- void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
--
--#if 0
--   if (RADEON_DEBUG & DEBUG_VERTS) 
--      _tnl_print_vert_flags( __FUNCTION__, newinputs );
--#endif
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-+   int i;
- 
--   if (newinputs) 
--     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
-+   for (i = 0; i < rmesa->tcl.nr_aos_components; i++) {
-+      if (rmesa->tcl.aos[i].bo) {
-+         radeon_bo_unref(rmesa->tcl.aos[i].bo);
-+         rmesa->tcl.aos[i].bo = NULL;
-+      }
-+   }
- }
 diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
 new file mode 100644
 index 0000000..3203ee1
@@ -27569,5710 +4985,6 @@ index 0000000..43dfa48
 +
 +
 +#endif /* __RADEON_MIPMAP_TREE_H_ */
-diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.c b/src/mesa/drivers/dri/radeon/radeon_sanity.c
-index 6613757..bbed838 100644
---- a/src/mesa/drivers/dri/radeon/radeon_sanity.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_sanity.c
-@@ -973,7 +973,7 @@ static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf )
- }
- 
- 
--int radeonSanityCmdBuffer( radeonContextPtr rmesa,
-+int radeonSanityCmdBuffer( r100ContextPtr rmesa,
- 			   int nbox,
- 			   drm_clip_rect_t *boxes )
- {
-diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.h b/src/mesa/drivers/dri/radeon/radeon_sanity.h
-index 1ec06bc..f30eb1c 100644
---- a/src/mesa/drivers/dri/radeon/radeon_sanity.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_sanity.h
-@@ -1,7 +1,7 @@
- #ifndef RADEON_SANITY_H
- #define RADEON_SANITY_H
- 
--extern int radeonSanityCmdBuffer( radeonContextPtr rmesa,
-+extern int radeonSanityCmdBuffer( r100ContextPtr rmesa,
- 				  int nbox,
- 				  drm_clip_rect_t *boxes );
- 
-diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
-index 56c22fa..086a268 100644
---- a/src/mesa/drivers/dri/radeon/radeon_screen.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
-@@ -35,6 +35,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  * \author  Gareth Hughes <gareth@valinux.com>
-  */
- 
-+#include <errno.h>
- #include "main/glheader.h"
- #include "main/imports.h"
- #include "main/mtypes.h"
-@@ -45,32 +46,39 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_chipset.h"
- #include "radeon_macros.h"
- #include "radeon_screen.h"
-+#include "radeon_common.h"
-+#include "radeon_span.h"
- #if !RADEON_COMMON
- #include "radeon_context.h"
--#include "radeon_span.h"
- #include "radeon_tex.h"
- #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
- #include "r200_context.h"
- #include "r200_ioctl.h"
--#include "r200_span.h"
- #include "r200_tex.h"
- #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
- #include "r300_context.h"
- #include "r300_fragprog.h"
- #include "r300_tex.h"
--#include "radeon_span.h"
- #endif
- 
- #include "utils.h"
- #include "vblank.h"
- #include "drirenderbuffer.h"
- 
-+#include "radeon_bocs_wrapper.h"
-+
- #include "GL/internal/dri_interface.h"
- 
- /* Radeon configuration
-  */
- #include "xmlpool.h"
- 
-+#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
-+DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
-+        DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
-+        DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
-+DRI_CONF_OPT_END
-+
- #if !RADEON_COMMON	/* R100 */
- PUBLIC const char __driConfigOptions[] =
- DRI_CONF_BEGIN
-@@ -80,6 +88,7 @@ DRI_CONF_BEGIN
-         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
-         DRI_CONF_MAX_TEXTURE_UNITS(3,2,3)
-         DRI_CONF_HYPERZ(false)
-+        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
-     DRI_CONF_SECTION_END
-     DRI_CONF_SECTION_QUALITY
-         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
-@@ -95,7 +104,7 @@ DRI_CONF_BEGIN
-         DRI_CONF_NO_RAST(false)
-     DRI_CONF_SECTION_END
- DRI_CONF_END;
--static const GLuint __driNConfigOptions = 14;
-+static const GLuint __driNConfigOptions = 15;
- 
- #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
- 
-@@ -107,6 +116,7 @@ DRI_CONF_BEGIN
-         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
-         DRI_CONF_MAX_TEXTURE_UNITS(6,2,6)
-         DRI_CONF_HYPERZ(false)
-+        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
-     DRI_CONF_SECTION_END
-     DRI_CONF_SECTION_QUALITY
-         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
-@@ -126,7 +136,7 @@ DRI_CONF_BEGIN
-         DRI_CONF_NV_VERTEX_PROGRAM(false)
-     DRI_CONF_SECTION_END
- DRI_CONF_END;
--static const GLuint __driNConfigOptions = 16;
-+static const GLuint __driNConfigOptions = 17;
- 
- extern const struct dri_extension blend_extensions[];
- extern const struct dri_extension ARB_vp_extension[];
-@@ -149,11 +159,7 @@ DRI_CONF_OPT_BEGIN_V(texture_coord_units,int,def, # min ":" # max ) \
-         DRI_CONF_DESC(de,"Anzahl der Texturkoordinateneinheiten") \
- DRI_CONF_OPT_END
- 
--#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
--DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
--        DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
--        DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
--DRI_CONF_OPT_END
-+
- 
- #define DRI_CONF_DISABLE_S3TC(def) \
- DRI_CONF_OPT_BEGIN(disable_s3tc,bool,def) \
-@@ -209,7 +215,6 @@ static const GLuint __driNConfigOptions = 17;
- extern const struct dri_extension gl_20_extension[];
- 
- #ifndef RADEON_DEBUG
--int RADEON_DEBUG = 0;
- 
- static const struct dri_debug_control debug_control[] = {
- 	{"fall", DEBUG_FALLBACKS},
-@@ -351,137 +356,17 @@ static const __DRItexOffsetExtension r300texOffsetExtension = {
-     { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
-    r300SetTexOffset,
- };
--#endif
- 
--/* Create the device specific screen private data struct.
-- */
--static radeonScreenPtr
--radeonCreateScreen( __DRIscreenPrivate *sPriv )
--{
--   radeonScreenPtr screen;
--   RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
--   unsigned char *RADEONMMIO;
--   int i;
--   int ret;
--   uint32_t temp;
--
--   if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
--      fprintf(stderr,"\nERROR!  sizeof(RADEONDRIRec) does not match passed size from device driver\n");
--      return GL_FALSE;
--   }
--
--   /* Allocate the private area */
--   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
--   if ( !screen ) {
--      __driUtilMessage("%s: Could not allocate memory for screen structure",
--		       __FUNCTION__);
--      return NULL;
--   }
--
--#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
--	RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
-+static const __DRItexBufferExtension r300TexBufferExtension = {
-+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
-+   r300SetTexBuffer,
-+};
- #endif
- 
--   /* parse information in __driConfigOptions */
--   driParseOptionInfo (&screen->optionCache,
--		       __driConfigOptions, __driNConfigOptions);
--
--   /* This is first since which regions we map depends on whether or
--    * not we are using a PCI card.
--    */
--   screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
--   {
--      int ret;
--      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
--			    &screen->gart_buffer_offset);
--
--      if (ret) {
--	 FREE( screen );
--	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
--	 return NULL;
--      }
--
--      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE,
--			    &screen->gart_base);
--      if (ret) {
--	 FREE( screen );
--	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
--	 return NULL;
--      }
--
--      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
--			    &screen->irq);
--      if (ret) {
--	 FREE( screen );
--	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
--	 return NULL;
--      }
--      screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
--      screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
--      screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
--      screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
--      screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
--      screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
--      screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
--   }
--
--   screen->mmio.handle = dri_priv->registerHandle;
--   screen->mmio.size   = dri_priv->registerSize;
--   if ( drmMap( sPriv->fd,
--		screen->mmio.handle,
--		screen->mmio.size,
--		&screen->mmio.map ) ) {
--      FREE( screen );
--      __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
--      return NULL;
--   }
--
--   RADEONMMIO = screen->mmio.map;
--
--   screen->status.handle = dri_priv->statusHandle;
--   screen->status.size   = dri_priv->statusSize;
--   if ( drmMap( sPriv->fd,
--		screen->status.handle,
--		screen->status.size,
--		&screen->status.map ) ) {
--      drmUnmap( screen->mmio.map, screen->mmio.size );
--      FREE( screen );
--      __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
--      return NULL;
--   }
--   screen->scratch = (__volatile__ uint32_t *)
--      ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
--
--   screen->buffers = drmMapBufs( sPriv->fd );
--   if ( !screen->buffers ) {
--      drmUnmap( screen->status.map, screen->status.size );
--      drmUnmap( screen->mmio.map, screen->mmio.size );
--      FREE( screen );
--      __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
--      return NULL;
--   }
--
--   if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
--      screen->gartTextures.handle = dri_priv->gartTexHandle;
--      screen->gartTextures.size   = dri_priv->gartTexMapSize;
--      if ( drmMap( sPriv->fd,
--		   screen->gartTextures.handle,
--		   screen->gartTextures.size,
--		   (drmAddressPtr)&screen->gartTextures.map ) ) {
--	 drmUnmapBufs( screen->buffers );
--	 drmUnmap( screen->status.map, screen->status.size );
--	 drmUnmap( screen->mmio.map, screen->mmio.size );
--	 FREE( screen );
--	 __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
--	 return NULL;
--      }
--
--      screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
--   }
--
-+static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
-+{
-    screen->chip_flags = 0;
--   /* XXX: add more chipsets */
--   switch ( dri_priv->deviceID ) {
-+   switch ( device_id ) {
-    case PCI_CHIP_RADEON_LY:
-    case PCI_CHIP_RADEON_LZ:
-    case PCI_CHIP_RADEON_QY:
-@@ -813,9 +698,162 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
- 
-    default:
-       fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
--	      dri_priv->deviceID);
-+	      device_id);
-+      return -1;
-+   }
-+
-+   return 0;
-+}
-+
-+
-+/* Create the device specific screen private data struct.
-+ */
-+static radeonScreenPtr
-+radeonCreateScreen( __DRIscreenPrivate *sPriv )
-+{
-+   radeonScreenPtr screen;
-+   RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
-+   unsigned char *RADEONMMIO = NULL;
-+   int i;
-+   int ret;
-+   uint32_t temp;
-+
-+   if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
-+      fprintf(stderr,"\nERROR!  sizeof(RADEONDRIRec) does not match passed size from device driver\n");
-+      return GL_FALSE;
-+   }
-+
-+   /* Allocate the private area */
-+   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
-+   if ( !screen ) {
-+      __driUtilMessage("%s: Could not allocate memory for screen structure",
-+		       __FUNCTION__);
-       return NULL;
-    }
-+
-+#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-+	RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
-+#endif
-+
-+   /* parse information in __driConfigOptions */
-+   driParseOptionInfo (&screen->optionCache,
-+		       __driConfigOptions, __driNConfigOptions);
-+
-+   /* This is first since which regions we map depends on whether or
-+    * not we are using a PCI card.
-+    */
-+   screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
-+   {
-+      int ret;
-+
-+#ifdef RADEON_PARAM_KERNEL_MM
-+     ret = radeonGetParam( sPriv->fd, RADEON_PARAM_KERNEL_MM,
-+                            &screen->kernel_mm);
-+
-+      if (ret && ret != -EINVAL) {
-+         FREE( screen );
-+         fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret);
-+         return NULL;
-+      }
-+
-+      if (ret == -EINVAL)
-+          screen->kernel_mm = 0;
-+#endif
-+
-+      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
-+			    &screen->gart_buffer_offset);
-+
-+      if (ret) {
-+	 FREE( screen );
-+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
-+	 return NULL;
-+      }
-+
-+      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE,
-+			    &screen->gart_base);
-+      if (ret) {
-+	 FREE( screen );
-+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
-+	 return NULL;
-+      }
-+
-+      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
-+			    &screen->irq);
-+      if (ret) {
-+	 FREE( screen );
-+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
-+	 return NULL;
-+      }
-+      screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
-+      screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
-+      screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
-+      screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
-+      screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
-+      screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
-+      screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
-+   }
-+
-+   if (!screen->kernel_mm) {
-+     screen->mmio.handle = dri_priv->registerHandle;
-+     screen->mmio.size   = dri_priv->registerSize;
-+     if ( drmMap( sPriv->fd,
-+		  screen->mmio.handle,
-+		  screen->mmio.size,
-+		  &screen->mmio.map ) ) {
-+       FREE( screen );
-+       __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
-+       return NULL;
-+     }
-+
-+     RADEONMMIO = screen->mmio.map;
-+
-+     screen->status.handle = dri_priv->statusHandle;
-+     screen->status.size   = dri_priv->statusSize;
-+     if ( drmMap( sPriv->fd,
-+		  screen->status.handle,
-+		  screen->status.size,
-+		  &screen->status.map ) ) {
-+       drmUnmap( screen->mmio.map, screen->mmio.size );
-+       FREE( screen );
-+       __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
-+       return NULL;
-+     }
-+     screen->scratch = (__volatile__ uint32_t *)
-+       ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
-+
-+     screen->buffers = drmMapBufs( sPriv->fd );
-+     if ( !screen->buffers ) {
-+       drmUnmap( screen->status.map, screen->status.size );
-+       drmUnmap( screen->mmio.map, screen->mmio.size );
-+       FREE( screen );
-+       __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
-+       return NULL;
-+     }
-+     
-+     if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
-+       screen->gartTextures.handle = dri_priv->gartTexHandle;
-+       screen->gartTextures.size   = dri_priv->gartTexMapSize;
-+       if ( drmMap( sPriv->fd,
-+		    screen->gartTextures.handle,
-+		    screen->gartTextures.size,
-+		    (drmAddressPtr)&screen->gartTextures.map ) ) {
-+	 drmUnmapBufs( screen->buffers );
-+	 drmUnmap( screen->status.map, screen->status.size );
-+	 drmUnmap( screen->mmio.map, screen->mmio.size );
-+	 FREE( screen );
-+	 __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
-+	 return NULL;
-+       }
-+       
-+       screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
-+     }
-+   }
-+
-+
-+   ret = radeon_set_screen_flags(screen, dri_priv->deviceID);
-+   if (ret == -1)
-+     return NULL;
-+
-    if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) &&
-        sPriv->ddx_version.minor < 2) {
-       fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n");
-@@ -843,7 +881,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-    ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
-                          &temp);
-    if (ret) {
--       if (screen->chip_family < CHIP_FAMILY_RS690)
-+       if (screen->chip_family < CHIP_FAMILY_RS690 && !screen->kernel_mm)
- 	   screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
-        else {
-            FREE( screen );
-@@ -957,7 +995,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
- #endif
- 
- #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
--   screen->extensions[i++] = &r300texOffsetExtension.base;
-+   //screen->extensions[i++] = &r300texOffsetExtension.base;
- #endif
- 
-    screen->extensions[i++] = NULL;
-@@ -965,6 +1003,106 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
- 
-    screen->driScreen = sPriv;
-    screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
-+   screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
-+					       screen->sarea_priv_offset);
-+
-+   if (screen->kernel_mm)
-+     screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
-+   else
-+     screen->bom = radeon_bo_manager_legacy_ctor(screen);
-+   if (screen->bom == NULL) {
-+     free(screen);
-+     return NULL;
-+   }
-+
-+   return screen;
-+}
-+
-+static radeonScreenPtr
-+radeonCreateScreen2(__DRIscreenPrivate *sPriv)
-+{
-+   radeonScreenPtr screen;
-+   int i;
-+   int ret;
-+   uint32_t device_id;
-+
-+   /* Allocate the private area */
-+   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
-+   if ( !screen ) {
-+      __driUtilMessage("%s: Could not allocate memory for screen structure",
-+		       __FUNCTION__);
-+      fprintf(stderr, "leaving here\n");
-+      return NULL;
-+   }
-+
-+#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-+	RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
-+#endif
-+
-+   /* parse information in __driConfigOptions */
-+   driParseOptionInfo (&screen->optionCache,
-+		       __driConfigOptions, __driNConfigOptions);
-+
-+   screen->kernel_mm = 1;
-+   screen->chip_flags = 0;
-+
-+   ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
-+			 &screen->irq);
-+
-+   ret = radeonGetParam( sPriv->fd, RADEON_PARAM_DEVICE_ID,
-+			 &device_id);
-+   if (ret) {
-+     FREE( screen );
-+     fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_DEVICE_ID): %d\n", ret);
-+     return NULL;
-+   }
-+
-+   ret = radeon_set_screen_flags(screen, device_id);
-+   if (ret == -1)
-+     return NULL;
-+
-+   if (screen->chip_family <= CHIP_FAMILY_RS200)
-+      screen->chip_flags |= RADEON_CLASS_R100;
-+   else if (screen->chip_family <= CHIP_FAMILY_RV280)
-+      screen->chip_flags |= RADEON_CLASS_R200;
-+   else
-+      screen->chip_flags |= RADEON_CLASS_R300;
-+
-+   i = 0;
-+   screen->extensions[i++] = &driCopySubBufferExtension.base;
-+   screen->extensions[i++] = &driFrameTrackingExtension.base;
-+   screen->extensions[i++] = &driReadDrawableExtension;
-+
-+   if ( screen->irq != 0 ) {
-+       screen->extensions[i++] = &driSwapControlExtension.base;
-+       screen->extensions[i++] = &driMediaStreamCounterExtension.base;
-+   }
-+
-+#if !RADEON_COMMON
-+   screen->extensions[i++] = &radeonTexOffsetExtension.base;
-+#endif
-+
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
-+   if (IS_R200_CLASS(screen))
-+       screen->extensions[i++] = &r200AllocateExtension.base;
-+
-+   screen->extensions[i++] = &r200texOffsetExtension.base;
-+#endif
-+
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-+   screen->extensions[i++] = &r300texOffsetExtension.base;
-+   screen->extensions[i++] = &r300TexBufferExtension.base;
-+#endif
-+
-+   screen->extensions[i++] = NULL;
-+   sPriv->extensions = screen->extensions;
-+
-+   screen->driScreen = sPriv;
-+   screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
-+   if (screen->bom == NULL) {
-+       free(screen);
-+       return NULL;
-+   }
-    return screen;
- }
- 
-@@ -973,23 +1111,32 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
- static void
- radeonDestroyScreen( __DRIscreenPrivate *sPriv )
- {
--   radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
-+    radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
- 
--   if (!screen)
--      return;
-+    if (!screen)
-+        return;
- 
--   if ( screen->gartTextures.map ) {
--      drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
--   }
--   drmUnmapBufs( screen->buffers );
--   drmUnmap( screen->status.map, screen->status.size );
--   drmUnmap( screen->mmio.map, screen->mmio.size );
-+    if (screen->kernel_mm) {
-+#ifdef RADEON_BO_TRACK
-+        radeon_tracker_print(&screen->bom->tracker, stderr);
-+#endif
-+        radeon_bo_manager_gem_dtor(screen->bom);
-+    } else {
-+        radeon_bo_manager_legacy_dtor(screen->bom);
-+
-+        if ( screen->gartTextures.map ) {
-+            drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
-+        }
-+        drmUnmapBufs( screen->buffers );
-+        drmUnmap( screen->status.map, screen->status.size );
-+        drmUnmap( screen->mmio.map, screen->mmio.size );
-+    }
- 
--   /* free all option information */
--   driDestroyOptionInfo (&screen->optionCache);
-+    /* free all option information */
-+    driDestroyOptionInfo (&screen->optionCache);
- 
--   FREE( screen );
--   sPriv->private = NULL;
-+    FREE( screen );
-+    sPriv->private = NULL;
- }
- 
- 
-@@ -998,16 +1145,103 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
- static GLboolean
- radeonInitDriver( __DRIscreenPrivate *sPriv )
- {
--   sPriv->private = (void *) radeonCreateScreen( sPriv );
--   if ( !sPriv->private ) {
--      radeonDestroyScreen( sPriv );
--      return GL_FALSE;
--   }
-+    if (sPriv->dri2.enabled) {
-+        sPriv->private = (void *) radeonCreateScreen2( sPriv );
-+    } else {
-+        sPriv->private = (void *) radeonCreateScreen( sPriv );
-+    }
-+    if ( !sPriv->private ) {
-+        radeonDestroyScreen( sPriv );
-+        return GL_FALSE;
-+    }
-+
-+    return GL_TRUE;
-+}
- 
--   return GL_TRUE;
-+static GLboolean
-+radeon_alloc_window_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
-+			    GLenum intFormat, GLuint w, GLuint h)
-+{
-+    rb->Width = w;
-+    rb->Height = h;
-+    rb->_ActualFormat = intFormat;
-+
-+    return GL_TRUE;
- }
- 
- 
-+static struct radeon_renderbuffer *
-+radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv)
-+{
-+    struct radeon_renderbuffer *ret;
-+
-+    ret = CALLOC_STRUCT(radeon_renderbuffer);
-+    if (!ret)
-+	return NULL;
-+
-+    _mesa_init_renderbuffer(&ret->base, 0);
-+
-+    /* XXX format junk */
-+    switch (format) {
-+	case GL_RGB5:
-+	    ret->base._ActualFormat = GL_RGB5;
-+	    ret->base._BaseFormat = GL_RGBA;
-+	    ret->base.RedBits = 5;
-+	    ret->base.GreenBits = 6;
-+	    ret->base.BlueBits = 5;
-+	    ret->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_RGBA8:
-+	    ret->base._ActualFormat = GL_RGBA8;
-+	    ret->base._BaseFormat = GL_RGBA;
-+	    ret->base.RedBits = 8;
-+	    ret->base.GreenBits = 8;
-+	    ret->base.BlueBits = 8;
-+	    ret->base.AlphaBits = 8;
-+	    ret->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_STENCIL_INDEX8_EXT:
-+	    ret->base._ActualFormat = GL_STENCIL_INDEX8_EXT;
-+	    ret->base._BaseFormat = GL_STENCIL_INDEX;
-+	    ret->base.StencilBits = 8;
-+	    ret->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_DEPTH_COMPONENT16:
-+	    ret->base._ActualFormat = GL_DEPTH_COMPONENT16;
-+	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
-+	    ret->base.DepthBits = 16;
-+	    ret->base.DataType = GL_UNSIGNED_SHORT;
-+	    break;
-+	case GL_DEPTH_COMPONENT24:
-+	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
-+	    ret->base.DepthBits = 24;
-+	    ret->base.DataType = GL_UNSIGNED_INT;
-+	    break;
-+	case GL_DEPTH24_STENCIL8_EXT:
-+	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+	    ret->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
-+	    ret->base.DepthBits = 24;
-+	    ret->base.StencilBits = 8;
-+	    ret->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
-+	    break;
-+	default:
-+	    fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format);
-+	    _mesa_delete_renderbuffer(&ret->base);
-+	    return NULL;
-+    }
-+
-+    ret->dPriv = driDrawPriv;
-+    ret->base.InternalFormat = format;
-+
-+    ret->base.AllocStorage = radeon_alloc_window_storage;
-+
-+    radeonSetSpanFunctions(ret);
-+
-+    ret->bo = NULL;
-+    return ret;
-+}
-+
- /**
-  * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
-  *
-@@ -1022,95 +1256,86 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
- {
-    radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
- 
--   if (isPixmap) {
--      return GL_FALSE; /* not implemented */
--   }
--   else {
--      const GLboolean swDepth = GL_FALSE;
--      const GLboolean swAlpha = GL_FALSE;
--      const GLboolean swAccum = mesaVis->accumRedBits > 0;
--      const GLboolean swStencil = mesaVis->stencilBits > 0 &&
--         mesaVis->depthBits != 24;
--      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
--
--      /* front color renderbuffer */
--      {
--         driRenderbuffer *frontRb
--            = driNewRenderbuffer(GL_RGBA,
--                                 driScrnPriv->pFB + screen->frontOffset,
--                                 screen->cpp,
--                                 screen->frontOffset, screen->frontPitch,
--                                 driDrawPriv);
--         radeonSetSpanFunctions(frontRb, mesaVis);
--         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
--      }
-+    const GLboolean swDepth = GL_FALSE;
-+    const GLboolean swAlpha = GL_FALSE;
-+    const GLboolean swAccum = mesaVis->accumRedBits > 0;
-+    const GLboolean swStencil = mesaVis->stencilBits > 0 &&
-+	mesaVis->depthBits != 24;
-+    GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8);
-+    GLenum depthFormat = GL_NONE;
-+    struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
-+
-+    if (mesaVis->depthBits == 16)
-+	depthFormat = GL_DEPTH_COMPONENT16;
-+    else if (mesaVis->depthBits == 24)
-+	depthFormat = GL_DEPTH_COMPONENT24;
-+
-+    /* front color renderbuffer */
-+    {
-+	struct radeon_renderbuffer *front =
-+	    radeon_create_renderbuffer(rgbFormat, driDrawPriv);
-+	_mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &front->base);
-+	front->has_surface = 1;
-+    }
- 
--      /* back color renderbuffer */
--      if (mesaVis->doubleBufferMode) {
--         driRenderbuffer *backRb
--            = driNewRenderbuffer(GL_RGBA,
--                                 driScrnPriv->pFB + screen->backOffset,
--                                 screen->cpp,
--                                 screen->backOffset, screen->backPitch,
--                                 driDrawPriv);
--         radeonSetSpanFunctions(backRb, mesaVis);
--         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
--      }
-+    /* back color renderbuffer */
-+    if (mesaVis->doubleBufferMode) {
-+	struct radeon_renderbuffer *back =
-+	    radeon_create_renderbuffer(rgbFormat, driDrawPriv);
-+	_mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &back->base);
-+	back->has_surface = 1;
-+    }
- 
--      /* depth renderbuffer */
--      if (mesaVis->depthBits == 16) {
--         driRenderbuffer *depthRb
--            = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
--                                 driScrnPriv->pFB + screen->depthOffset,
--                                 screen->cpp,
--                                 screen->depthOffset, screen->depthPitch,
--                                 driDrawPriv);
--         radeonSetSpanFunctions(depthRb, mesaVis);
--         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
--	 depthRb->depthHasSurface = screen->depthHasSurface;
--      }
--      else if (mesaVis->depthBits == 24) {
--         driRenderbuffer *depthRb
--            = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
--                                 driScrnPriv->pFB + screen->depthOffset,
--                                 screen->cpp,
--                                 screen->depthOffset, screen->depthPitch,
--                                 driDrawPriv);
--         radeonSetSpanFunctions(depthRb, mesaVis);
--         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
--	 depthRb->depthHasSurface = screen->depthHasSurface;
--      }
-+    /* depth renderbuffer */
-+    if (depthFormat != GL_NONE) {
-+	struct radeon_renderbuffer *depth =
-+	    radeon_create_renderbuffer(depthFormat, driDrawPriv);
-+	_mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depth->base);
-+	depth->has_surface = screen->depthHasSurface;
-+    }
- 
--      /* stencil renderbuffer */
--      if (mesaVis->stencilBits > 0 && !swStencil) {
--         driRenderbuffer *stencilRb
--            = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
--                                 driScrnPriv->pFB + screen->depthOffset,
--                                 screen->cpp,
--                                 screen->depthOffset, screen->depthPitch,
--                                 driDrawPriv);
--         radeonSetSpanFunctions(stencilRb, mesaVis);
--         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
--	 stencilRb->depthHasSurface = screen->depthHasSurface;
--      }
-+    /* stencil renderbuffer */
-+    if (mesaVis->stencilBits > 0 && !swStencil) {
-+	struct radeon_renderbuffer *stencil =
-+	    radeon_create_renderbuffer(GL_STENCIL_INDEX8_EXT, driDrawPriv);
-+	_mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencil->base);
-+	stencil->has_surface = screen->depthHasSurface;
-+    }
- 
--      _mesa_add_soft_renderbuffers(fb,
--                                   GL_FALSE, /* color */
--                                   swDepth,
--                                   swStencil,
--                                   swAccum,
--                                   swAlpha,
--                                   GL_FALSE /* aux */);
--      driDrawPriv->driverPrivate = (void *) fb;
-+    _mesa_add_soft_renderbuffers(fb,
-+	    GL_FALSE, /* color */
-+	    swDepth,
-+	    swStencil,
-+	    swAccum,
-+	    swAlpha,
-+	    GL_FALSE /* aux */);
-+    driDrawPriv->driverPrivate = (void *) fb;
- 
--      return (driDrawPriv->driverPrivate != NULL);
--   }
-+    return (driDrawPriv->driverPrivate != NULL);
- }
- 
--
- static void
- radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
- {
-+	struct radeon_renderbuffer *rb;
-+	GLframebuffer *fb;
-+    
-+    fb = (void*)driDrawPriv->driverPrivate;
-+    rb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+    rb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+    rb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-    _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)));
- }
- 
-@@ -1205,13 +1430,48 @@ radeonInitScreen(__DRIscreenPrivate *psp)
-    if (!radeonInitDriver(psp))
-        return NULL;
- 
-+   /* for now fill in all modes */
-    return radeonFillInModes( psp,
- 			     dri_priv->bpp,
- 			     (dri_priv->bpp == 16) ? 16 : 24,
--			     (dri_priv->bpp == 16) ? 0  : 8,
--			     (dri_priv->backOffset != dri_priv->depthOffset) );
-+			     (dri_priv->bpp == 16) ? 0  : 8, 1);
- }
- 
-+/**
-+ * This is the driver specific part of the createNewScreen entry point.
-+ * Called when using DRI2.
-+ *
-+ * \return the __GLcontextModes supported by this driver
-+ */
-+static const
-+__DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp)
-+{
-+   /* Calling driInitExtensions here, with a NULL context pointer,
-+    * does not actually enable the extensions.  It just makes sure
-+    * that all the dispatch offsets for all the extensions that
-+    * *might* be enables are known.  This is needed because the
-+    * dispatch offsets need to be known when _mesa_context_create
-+    * is called, but we can't enable the extensions until we have a
-+    * context pointer.
-+    *
-+    * Hello chicken.  Hello egg.  How are you two today?
-+    */
-+   driInitExtensions( NULL, card_extensions, GL_FALSE );
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
-+   driInitExtensions( NULL, blend_extensions, GL_FALSE );
-+   driInitSingleExtension( NULL, ARB_vp_extension );
-+   driInitSingleExtension( NULL, NV_vp_extension );
-+   driInitSingleExtension( NULL, ATI_fs_extension );
-+   driInitExtensions( NULL, point_extensions, GL_FALSE );
-+#endif
-+
-+   if (!radeonInitDriver(psp)) {
-+       return NULL;
-+    }
-+
-+   /* for now fill in all modes */
-+   return radeonFillInModes( psp, 24, 24, 8, 1);
-+}
- 
- /**
-  * Get information about previous buffer swaps.
-@@ -1219,11 +1479,7 @@ radeonInitScreen(__DRIscreenPrivate *psp)
- static int
- getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
- {
--#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
-    radeonContextPtr  rmesa;
--#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
--   r200ContextPtr  rmesa;
--#endif
- 
-    if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
- 	|| (dPriv->driContextPriv->driverPrivate == NULL)
-@@ -1260,6 +1516,8 @@ const struct __DriverAPIRec driDriverAPI = {
-    .WaitForSBC      = NULL,
-    .SwapBuffersMSC  = NULL,
-    .CopySubBuffer   = radeonCopySubBuffer,
-+    /* DRI2 */
-+   .InitScreen2     = radeonInitScreen2,
- };
- #else
- const struct __DriverAPIRec driDriverAPI = {
-@@ -1269,14 +1527,15 @@ const struct __DriverAPIRec driDriverAPI = {
-    .DestroyContext  = r200DestroyContext,
-    .CreateBuffer    = radeonCreateBuffer,
-    .DestroyBuffer   = radeonDestroyBuffer,
--   .SwapBuffers     = r200SwapBuffers,
--   .MakeCurrent     = r200MakeCurrent,
--   .UnbindContext   = r200UnbindContext,
-+   .SwapBuffers     = radeonSwapBuffers,
-+   .MakeCurrent     = radeonMakeCurrent,
-+   .UnbindContext   = radeonUnbindContext,
-    .GetSwapInfo     = getSwapInfo,
-    .GetDrawableMSC  = driDrawableGetMSC32,
-    .WaitForMSC      = driWaitForMSC32,
-    .WaitForSBC      = NULL,
-    .SwapBuffersMSC  = NULL,
--   .CopySubBuffer   = r200CopySubBuffer,
-+   .CopySubBuffer   = radeonCopySubBuffer,
- };
- #endif
-+
-diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
-index b84c70b..1c0f5bb 100644
---- a/src/mesa/drivers/dri/radeon/radeon_screen.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
-@@ -54,7 +54,7 @@ typedef struct {
-    drmAddress map;			/* Mapping of the DRM region */
- } radeonRegionRec, *radeonRegionPtr;
- 
--typedef struct {
-+typedef struct radeon_screen {
-    int chip_family;
-    int chip_flags;
-    int cpp;
-@@ -103,9 +103,12 @@ typedef struct {
-    /* Configuration cache with default values for all contexts */
-    driOptionCache optionCache;
- 
--   const __DRIextension *extensions[8];
-+   const __DRIextension *extensions[16];
- 
-    int num_gb_pipes;
-+   int kernel_mm;
-+   drm_radeon_sarea_t *sarea;	/* Private SAREA data */
-+   struct radeon_bo_manager *bom;
- } radeonScreenRec, *radeonScreenPtr;
- 
- #define IS_R100_CLASS(screen) \
-diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
-index 12051ff..49ec2c3 100644
---- a/src/mesa/drivers/dri/radeon/radeon_span.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
-@@ -43,37 +43,168 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/glheader.h"
- #include "swrast/swrast.h"
- 
--#include "radeon_context.h"
--#include "radeon_ioctl.h"
--#include "radeon_state.h"
-+#include "radeon_common.h"
-+#include "radeon_lock.h"
- #include "radeon_span.h"
--#include "radeon_tex.h"
--
--#include "drirenderbuffer.h"
- 
- #define DBG 0
- 
-+static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
-+			     GLint x, GLint y)
-+{
-+    GLubyte *ptr = rrb->bo->ptr;
-+    const __DRIdrawablePrivate *dPriv = rrb->dPriv;
-+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-+    GLint offset;
-+    GLint nmacroblkpl;
-+    GLint nmicroblkpl;
-+
-+    x += dPriv->x;
-+    y += dPriv->y;
-+
-+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-+        offset = x * rrb->cpp + y * rrb->pitch;
-+    } else {
-+        offset = 0;
-+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-+                nmacroblkpl = rrb->pitch >> 5;
-+                offset += ((y >> 4) * nmacroblkpl) << 11;
-+                offset += ((y & 15) >> 1) << 8;
-+                offset += (y & 1) << 4;
-+                offset += (x >> 5) << 11;
-+                offset += ((x & 31) >> 2) << 5;
-+                offset += (x & 3) << 2;
-+            } else {
-+                nmacroblkpl = rrb->pitch >> 6;
-+                offset += ((y >> 3) * nmacroblkpl) << 11;
-+                offset += (y & 7) << 8;
-+                offset += (x >> 6) << 11;
-+                offset += ((x & 63) >> 3) << 5;
-+                offset += (x & 7) << 2;
-+            }
-+        } else {
-+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-+            offset += (y * nmicroblkpl) << 5;
-+            offset += (x >> 3) << 5;
-+            offset += (x & 7) << 2;
-+        }
-+    }
-+    return &ptr[offset];
-+}
-+
-+static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
-+			     GLint x, GLint y)
-+{
-+    GLubyte *ptr = rrb->bo->ptr;
-+    const __DRIdrawablePrivate *dPriv = rrb->dPriv;
-+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-+    GLint offset;
-+    GLint nmacroblkpl;
-+    GLint nmicroblkpl;
-+
-+    x += dPriv->x;
-+    y += dPriv->y;
-+
-+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-+        offset = x * rrb->cpp + y * rrb->pitch;
-+    } else {
-+        offset = 0;
-+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-+                nmacroblkpl = rrb->pitch >> 6;
-+                offset += ((y >> 4) * nmacroblkpl) << 11;
-+                offset += ((y & 15) >> 1) << 8;
-+                offset += (y & 1) << 4;
-+                offset += (x >> 6) << 11;
-+                offset += ((x & 63) >> 3) << 5;
-+                offset += (x & 7) << 1;
-+            } else {
-+                nmacroblkpl = rrb->pitch >> 7;
-+                offset += ((y >> 3) * nmacroblkpl) << 11;
-+                offset += (y & 7) << 8;
-+                offset += (x >> 7) << 11;
-+                offset += ((x & 127) >> 4) << 5;
-+                offset += (x & 15) << 2;
-+            }
-+        } else {
-+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-+            offset += (y * nmicroblkpl) << 5;
-+            offset += (x >> 4) << 5;
-+            offset += (x & 15) << 2;
-+        }
-+    }
-+    return &ptr[offset];
-+}
-+
-+static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
-+			   GLint x, GLint y)
-+{
-+    GLubyte *ptr = rrb->bo->ptr;
-+    const __DRIdrawablePrivate *dPriv = rrb->dPriv;
-+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-+    GLint offset;
-+    GLint microblkxs;
-+    GLint macroblkxs;
-+    GLint nmacroblkpl;
-+    GLint nmicroblkpl;
-+
-+    x += dPriv->x;
-+    y += dPriv->y;
-+
-+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-+        offset = x * rrb->cpp + y * rrb->pitch;
-+    } else {
-+        offset = 0;
-+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-+                microblkxs = 16 / rrb->cpp;
-+                macroblkxs = 128 / rrb->cpp;
-+                nmacroblkpl = rrb->pitch / macroblkxs;
-+                offset += ((y >> 4) * nmacroblkpl) << 11;
-+                offset += ((y & 15) >> 1) << 8;
-+                offset += (y & 1) << 4;
-+                offset += (x / macroblkxs) << 11;
-+                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
-+                offset += (x & (microblkxs - 1)) * rrb->cpp;
-+            } else {
-+                microblkxs = 32 / rrb->cpp;
-+                macroblkxs = 256 / rrb->cpp;
-+                nmacroblkpl = rrb->pitch / macroblkxs;
-+                offset += ((y >> 3) * nmacroblkpl) << 11;
-+                offset += (y & 7) << 8;
-+                offset += (x / macroblkxs) << 11;
-+                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
-+                offset += (x & (microblkxs - 1)) * rrb->cpp;
-+            }
-+        } else {
-+            microblkxs = 32 / rrb->cpp;
-+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-+            offset += (y * nmicroblkpl) << 5;
-+            offset += (x / microblkxs) << 5;
-+            offset += (x & (microblkxs - 1)) * rrb->cpp;
-+        }
-+    }
-+    return &ptr[offset];
-+}
-+
-+
- /*
-  * Note that all information needed to access pixels in a renderbuffer
-  * should be obtained through the gl_renderbuffer parameter, not per-context
-  * information.
-  */
- #define LOCAL_VARS						\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
-+   struct radeon_renderbuffer *rrb = (void *) rb;		\
-+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;		\
-    const GLuint bottom = dPriv->h - 1;				\
--   GLubyte *buf = (GLubyte *) drb->flippedData			\
--      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
--   GLuint p;							\
--   (void) p;
-+   GLuint p;						\
-+   (void)p;
- 
- #define LOCAL_DEPTH_VARS				\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
--   const GLuint bottom = dPriv->h - 1;			\
--   GLuint xo = dPriv->x;				\
--   GLuint yo = dPriv->y;				\
--   GLubyte *buf = (GLubyte *) drb->Base.Data;
-+   struct radeon_renderbuffer *rrb = (void *) rb;	\
-+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;	\
-+   const GLuint bottom = dPriv->h - 1;
- 
- #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
- 
-@@ -94,7 +225,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define TAG(x)    radeon##x##_RGB565
- #define TAG2(x,y) radeon##x##_RGB565##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
-+#define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
- #include "spantmp2.h"
- 
- /* 32 bit, ARGB8888 color spanline and pixel functions
-@@ -104,7 +235,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define TAG(x)    radeon##x##_ARGB8888
- #define TAG2(x,y) radeon##x##_ARGB8888##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
-+#define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
- #include "spantmp2.h"
- 
- /* ================================================================
-@@ -121,65 +252,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  * too...
-  */
- 
--static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
--{
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
--		return 4 * (x + y * pitch);
--	} else {
--		GLuint ba, address = 0;	/* a[0..1] = 0           */
--
--#ifdef COMPILE_R300
--		ba = (y / 8) * (pitch / 8) + (x / 8);
--#else
--		ba = (y / 16) * (pitch / 16) + (x / 16);
--#endif
--
--		address |= (x & 0x7) << 2;	/* a[2..4] = x[0..2]     */
--		address |= (y & 0x3) << 5;	/* a[5..6] = y[0..1]     */
--		address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
--		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
--
--		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
--		address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
--		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
--
--		return address;
--	}
--}
--
--static INLINE GLuint
--radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
--{
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
--		return 2 * (x + y * pitch);
--	} else {
--		GLuint ba, address = 0;	/* a[0]    = 0           */
--
--		ba = (y / 16) * (pitch / 32) + (x / 32);
--
--		address |= (x & 0x7) << 1;	/* a[1..3] = x[0..2]     */
--		address |= (y & 0x7) << 4;	/* a[4..6] = y[0..2]     */
--		address |= (x & 0x8) << 4;	/* a[7]    = x[3]        */
--		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
--		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
--		address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
--		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
--
--		return address;
--	}
--}
--
- /* 16-bit depth buffer functions
-  */
- #define VALUE_TYPE GLushort
- 
- #define WRITE_DEPTH( _x, _y, d )					\
--   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
-+   *(GLushort *)radeon_ptr(rrb, _x, _y) = d
- 
- #define READ_DEPTH( d, _x, _y )						\
--   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
-+   d = *(GLushort *)radeon_ptr(rrb, _x, _y)
- 
- #define TAG(x) radeon##x##_z16
- #include "depthtmp.h"
-@@ -194,35 +275,36 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
- #ifdef COMPILE_R300
- #define WRITE_DEPTH( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );		\
-+   GLuint tmp = *_ptr;				\
-    tmp &= 0x000000ff;							\
-    tmp |= ((d << 8) & 0xffffff00);					\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #else
- #define WRITE_DEPTH( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );		\
-+   GLuint tmp = *_ptr;				\
-    tmp &= 0xff000000;							\
-    tmp |= ((d) & 0x00ffffff);						\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #endif
- 
- #ifdef COMPILE_R300
- #define READ_DEPTH( d, _x, _y )						\
-   do { \
--    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
--					 _y + yo )) & 0xffffff00) >> 8; \
-+    d = (*(GLuint*)(radeon_ptr32(rrb, _x, _y)) & 0xffffff00) >> 8; \
-   }while(0)
- #else
- #define READ_DEPTH( d, _x, _y )						\
--   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
--					 _y + yo )) & 0x00ffffff;
-+   d = *(GLuint*)(radeon_ptr32(rrb, _x,	_y )) & 0x00ffffff;
- #endif
--
-+/*
-+    fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
-+   d = *(GLuint*)(radeon_ptr(rrb, _x,	_y )) & 0x00ffffff;
-+*/
- #define TAG(x) radeon##x##_z24_s8
- #include "depthtmp.h"
- 
-@@ -235,35 +317,35 @@ do {									\
- #ifdef COMPILE_R300
- #define WRITE_STENCIL( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y);		\
-+   GLuint tmp = *_ptr;				\
-    tmp &= 0xffffff00;							\
-    tmp |= (d) & 0xff;							\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #else
- #define WRITE_STENCIL( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y);		\
-+   GLuint tmp = *_ptr;				\
-    tmp &= 0x00ffffff;							\
-    tmp |= (((d) & 0xff) << 24);						\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #endif
- 
- #ifdef COMPILE_R300
- #define READ_STENCIL( d, _x, _y )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );		\
-+   GLuint tmp = *_ptr;				\
-    d = tmp & 0x000000ff;						\
- } while (0)
- #else
- #define READ_STENCIL( d, _x, _y )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );		\
-+   GLuint tmp = *_ptr;				\
-    d = (tmp & 0xff000000) >> 24;					\
- } while (0)
- #endif
-@@ -271,20 +353,60 @@ do {									\
- #define TAG(x) radeon##x##_z24_s8
- #include "stenciltmp.h"
- 
--/* Move locking out to get reasonable span performance (10x better
-- * than doing this in HW_LOCK above).  WaitForIdle() is the main
-- * culprit.
-- */
-+
-+static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
-+{
-+	struct radeon_renderbuffer *rrb = (void*)rb;
-+	int r;
-+	
-+	if (rrb->bo) {
-+		r = radeon_bo_map(rrb->bo, write);
-+		if (r) {
-+			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
-+				__FUNCTION__, r);
-+		}
-+	}
-+}
-+
-+static void unmap_buffer(struct gl_renderbuffer *rb)
-+{
-+	struct radeon_renderbuffer *rrb = (void*)rb;
-+
-+	if (rrb->bo) {
-+		radeon_bo_unmap(rrb->bo);
-+	}
-+}
- 
- static void radeonSpanRenderStart(GLcontext * ctx)
- {
- 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--#ifdef COMPILE_R300
--	r300ContextPtr r300 = (r300ContextPtr) rmesa;
--	R300_FIREVERTICES(r300);
--#else
--	RADEON_FIREVERTICES(rmesa);
--#endif
-+	int i;
-+
-+	radeon_firevertices(rmesa);
-+
-+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
-+		if (ctx->Texture.Unit[i]._ReallyEnabled)
-+			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
-+	}
-+
-+	/* color draw buffers */
-+	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
-+		map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
-+	}
-+
-+	map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
-+
-+	if (ctx->DrawBuffer->_DepthBuffer) {
-+		map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
-+	}
-+	if (ctx->DrawBuffer->_StencilBuffer)
-+		map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
-+
-+	/* The locking and wait for idle should really only be needed in classic mode.
-+	 * In a future memory manager based implementation, this should become
-+	 * unnecessary due to the fact that mapping our buffers, textures, etc.
-+	 * should implicitly wait for any previous rendering commands that must
-+	 * be waited on. */
- 	LOCK_HARDWARE(rmesa);
- 	radeonWaitForIdleLocked(rmesa);
- }
-@@ -292,8 +414,25 @@ static void radeonSpanRenderStart(GLcontext * ctx)
- static void radeonSpanRenderFinish(GLcontext * ctx)
- {
- 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	int i;
- 	_swrast_flush(ctx);
- 	UNLOCK_HARDWARE(rmesa);
-+
-+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
-+		if (ctx->Texture.Unit[i]._ReallyEnabled)
-+			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
-+	}
-+
-+	/* color draw buffers */
-+	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
-+		unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
-+
-+	unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
-+
-+	if (ctx->DrawBuffer->_DepthBuffer)
-+		unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
-+	if (ctx->DrawBuffer->_StencilBuffer)
-+		unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
- }
- 
- void radeonInitSpanFuncs(GLcontext * ctx)
-@@ -307,20 +446,17 @@ void radeonInitSpanFuncs(GLcontext * ctx)
- /**
-  * Plug in the Get/Put routines for the given driRenderbuffer.
-  */
--void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
-+void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
- {
--	if (drb->Base.InternalFormat == GL_RGBA) {
--		if (vis->redBits == 5 && vis->greenBits == 6
--		    && vis->blueBits == 5) {
--			radeonInitPointers_RGB565(&drb->Base);
--		} else {
--			radeonInitPointers_ARGB8888(&drb->Base);
--		}
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
--		radeonInitDepthPointers_z16(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
--		radeonInitDepthPointers_z24_s8(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
--		radeonInitStencilPointers_z24_s8(&drb->Base);
-+	if (rrb->base.InternalFormat == GL_RGB5) {
-+		radeonInitPointers_RGB565(&rrb->base);
-+	} else if (rrb->base.InternalFormat == GL_RGBA8) {
-+		radeonInitPointers_ARGB8888(&rrb->base);
-+	} else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) {
-+		radeonInitDepthPointers_z16(&rrb->base);
-+	} else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) {
-+		radeonInitDepthPointers_z24_s8(&rrb->base);
-+	} else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
-+		radeonInitStencilPointers_z24_s8(&rrb->base);
- 	}
- }
-diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h
-index 9abe086..dd44ab5 100644
---- a/src/mesa/drivers/dri/radeon/radeon_span.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_span.h
-@@ -42,9 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #ifndef __RADEON_SPAN_H__
- #define __RADEON_SPAN_H__
- 
--#include "drirenderbuffer.h"
--
- extern void radeonInitSpanFuncs(GLcontext * ctx);
--extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
- 
-+extern void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
- #endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
-index 32bcff3..86d8720 100644
---- a/src/mesa/drivers/dri/radeon/radeon_state.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
-@@ -62,7 +62,7 @@ static void radeonUpdateSpecular( GLcontext *ctx );
- 
- static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
-    GLubyte refByte;
- 
-@@ -106,7 +106,7 @@ static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
- static void radeonBlendEquationSeparate( GLcontext *ctx,
- 					 GLenum modeRGB, GLenum modeA )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
-    GLboolean fallback = GL_FALSE;
- 
-@@ -147,7 +147,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
- 				     GLenum sfactorRGB, GLenum dfactorRGB,
- 				     GLenum sfactorA, GLenum dfactorA )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
-       ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
-    GLboolean fallback = GL_FALSE;
-@@ -257,7 +257,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
- 
- static void radeonDepthFunc( GLcontext *ctx, GLenum func )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    RADEON_STATECHANGE( rmesa, ctx );
-    rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
-@@ -293,7 +293,7 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
- 
- static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    RADEON_STATECHANGE( rmesa, ctx );
- 
-    if ( ctx->Depth.Mask ) {
-@@ -305,16 +305,16 @@ static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
- 
- static void radeonClearDepth( GLcontext *ctx, GLclampd d )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
- 		    RADEON_DEPTH_FORMAT_MASK);
- 
-    switch ( format ) {
-    case RADEON_DEPTH_FORMAT_16BIT_INT_Z:
--      rmesa->state.depth.clear = d * 0x0000ffff;
-+      rmesa->radeon.state.depth.clear = d * 0x0000ffff;
-       break;
-    case RADEON_DEPTH_FORMAT_24BIT_INT_Z:
--      rmesa->state.depth.clear = d * 0x00ffffff;
-+      rmesa->radeon.state.depth.clear = d * 0x00ffffff;
-       break;
-    }
- }
-@@ -327,7 +327,7 @@ static void radeonClearDepth( GLcontext *ctx, GLclampd d )
- 
- static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    union { int i; float f; } c, d;
-    GLchan col[4];
- 
-@@ -406,109 +406,13 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
-    }
- }
- 
--
--/* =============================================================
-- * Scissoring
-- */
--
--
--static GLboolean intersect_rect( drm_clip_rect_t *out,
--				 drm_clip_rect_t *a,
--				 drm_clip_rect_t *b )
--{
--   *out = *a;
--   if ( b->x1 > out->x1 ) out->x1 = b->x1;
--   if ( b->y1 > out->y1 ) out->y1 = b->y1;
--   if ( b->x2 < out->x2 ) out->x2 = b->x2;
--   if ( b->y2 < out->y2 ) out->y2 = b->y2;
--   if ( out->x1 >= out->x2 ) return GL_FALSE;
--   if ( out->y1 >= out->y2 ) return GL_FALSE;
--   return GL_TRUE;
--}
--
--
--void radeonRecalcScissorRects( radeonContextPtr rmesa )
--{
--   drm_clip_rect_t *out;
--   int i;
--
--   /* Grow cliprect store?
--    */
--   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
--      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
--	 rmesa->state.scissor.numAllocedClipRects += 1;	/* zero case */
--	 rmesa->state.scissor.numAllocedClipRects *= 2;
--      }
--
--      if (rmesa->state.scissor.pClipRects)
--	 FREE(rmesa->state.scissor.pClipRects);
--
--      rmesa->state.scissor.pClipRects = 
--	 MALLOC( rmesa->state.scissor.numAllocedClipRects * 
--		 sizeof(drm_clip_rect_t) );
--
--      if ( rmesa->state.scissor.pClipRects == NULL ) {
--	 rmesa->state.scissor.numAllocedClipRects = 0;
--	 return;
--      }
--   }
--   
--   out = rmesa->state.scissor.pClipRects;
--   rmesa->state.scissor.numClipRects = 0;
--
--   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
--      if ( intersect_rect( out, 
--			   &rmesa->pClipRects[i], 
--			   &rmesa->state.scissor.rect ) ) {
--	 rmesa->state.scissor.numClipRects++;
--	 out++;
--      }
--   }
--}
--
--
--static void radeonUpdateScissor( GLcontext *ctx )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if ( rmesa->dri.drawable ) {
--      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--
--      int x = ctx->Scissor.X;
--      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
--      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
--      int h = dPriv->h - ctx->Scissor.Y - 1;
--
--      rmesa->state.scissor.rect.x1 = x + dPriv->x;
--      rmesa->state.scissor.rect.y1 = y + dPriv->y;
--      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
--      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
--
--      radeonRecalcScissorRects( rmesa );
--   }
--}
--
--
--static void radeonScissor( GLcontext *ctx,
--			   GLint x, GLint y, GLsizei w, GLsizei h )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if ( ctx->Scissor.Enabled ) {
--      RADEON_FIREVERTICES( rmesa );	/* don't pipeline cliprect changes */
--      radeonUpdateScissor( ctx );
--   }
--
--}
--
--
- /* =============================================================
-  * Culling
-  */
- 
- static void radeonCullFace( GLcontext *ctx, GLenum unused )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
-    GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
- 
-@@ -545,7 +449,7 @@ static void radeonCullFace( GLcontext *ctx, GLenum unused )
- 
- static void radeonFrontFace( GLcontext *ctx, GLenum mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    RADEON_STATECHANGE( rmesa, set );
-    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
-@@ -570,7 +474,7 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
-  */
- static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    RADEON_STATECHANGE( rmesa, lin );
-    RADEON_STATECHANGE( rmesa, set );
-@@ -587,7 +491,7 @@ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
- 
- static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    RADEON_STATECHANGE( rmesa, lin );
-    rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = 
-@@ -602,8 +506,8 @@ static void radeonColorMask( GLcontext *ctx,
- 			     GLboolean r, GLboolean g,
- 			     GLboolean b, GLboolean a )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   GLuint mask = radeonPackColor( rmesa->radeonScreen->cpp,
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
- 				  ctx->Color.ColorMask[RCOMP],
- 				  ctx->Color.ColorMask[GCOMP],
- 				  ctx->Color.ColorMask[BCOMP],
-@@ -623,8 +527,8 @@ static void radeonColorMask( GLcontext *ctx,
- static void radeonPolygonOffset( GLcontext *ctx,
- 				 GLfloat factor, GLfloat units )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   float_ui32_type constant =  { units * rmesa->state.depth.scale };
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   float_ui32_type constant =  { units * rmesa->radeon.state.depth.scale };
-    float_ui32_type factoru = { factor };
- 
-    RADEON_STATECHANGE( rmesa, zbs );
-@@ -634,7 +538,7 @@ static void radeonPolygonOffset( GLcontext *ctx,
- 
- static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint i;
-    drm_radeon_stipple_t stipple;
- 
-@@ -646,27 +550,27 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
- 
-    /* TODO: push this into cmd mechanism
-     */
--   RADEON_FIREVERTICES( rmesa );
--   LOCK_HARDWARE( rmesa );
-+   radeon_firevertices(&rmesa->radeon);
-+   LOCK_HARDWARE( &rmesa->radeon );
- 
-    /* FIXME: Use window x,y offsets into stipple RAM.
-     */
-    stipple.mask = rmesa->state.stipple.mask;
--   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
-+   drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, 
-                     &stipple, sizeof(drm_radeon_stipple_t) );
--   UNLOCK_HARDWARE( rmesa );
-+   UNLOCK_HARDWARE( &rmesa->radeon );
- }
- 
- static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
- 
-    /* Can't generally do unfilled via tcl, but some good special
-     * cases work. 
-     */
-    TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag);
--   if (rmesa->TclFallback) {
-+   if (rmesa->radeon.TclFallback) {
-       radeonChooseRenderState( ctx );
-       radeonChooseVertexState( ctx );
-    }
-@@ -686,7 +590,7 @@ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
-  */
- static void radeonUpdateSpecular( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
-    GLuint flag = 0;
- 
-@@ -757,7 +661,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
- 
-    /* Update vertex/render formats
-     */
--   if (rmesa->TclFallback) { 
-+   if (rmesa->radeon.TclFallback) { 
-       radeonChooseRenderState( ctx );
-       radeonChooseVertexState( ctx );
-    }
-@@ -774,7 +678,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
-  */
- static void update_global_ambient( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    float *fcmd = (float *)RADEON_DB_STATE( glt );
- 
-    /* Need to do more if both emmissive & ambient are PREMULT:
-@@ -809,7 +713,7 @@ static void update_light_colors( GLcontext *ctx, GLuint p )
- /*     fprintf(stderr, "%s\n", __FUNCTION__); */
- 
-    if (l->Enabled) {
--      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
-       float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
- 
-       COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );	 
-@@ -849,7 +753,7 @@ static void check_twoside_fallback( GLcontext *ctx )
- 
- static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
- {
--      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
-       GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
- 
-       light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
-@@ -913,7 +817,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
- 
- void radeonUpdateMaterial( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
-    GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
-    GLuint mask = ~0;
-@@ -978,7 +882,7 @@ void radeonUpdateMaterial( GLcontext *ctx )
-  */
- static void update_light( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    /* Have to check these, or have an automatic shortcircuit mechanism
-     * to remove noop statechanges. (Or just do a better job on the
-@@ -1043,7 +947,7 @@ static void update_light( GLcontext *ctx )
- static void radeonLightfv( GLcontext *ctx, GLenum light,
- 			   GLenum pname, const GLfloat *params )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLint p = light - GL_LIGHT0;
-    struct gl_light *l = &ctx->Light.Light[p];
-    GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
-@@ -1164,7 +1068,7 @@ static void radeonLightfv( GLcontext *ctx, GLenum light,
- static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
- 				const GLfloat *param )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    switch (pname) {
-       case GL_LIGHT_MODEL_AMBIENT: 
-@@ -1188,7 +1092,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
- 
- 	 check_twoside_fallback( ctx );
- 
--	 if (rmesa->TclFallback) {
-+	 if (rmesa->radeon.TclFallback) {
- 	    radeonChooseRenderState( ctx );
- 	    radeonChooseVertexState( ctx );
- 	 }
-@@ -1205,7 +1109,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
- 
- static void radeonShadeModel( GLcontext *ctx, GLenum mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
- 
-    s &= ~(RADEON_DIFFUSE_SHADE_MASK |
-@@ -1244,7 +1148,7 @@ static void radeonShadeModel( GLcontext *ctx, GLenum mode )
- static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
- {
-    GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
- 
-    RADEON_STATECHANGE( rmesa, ucp[p] );
-@@ -1256,7 +1160,7 @@ static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
- 
- static void radeonUpdateClipPlanes( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint p;
- 
-    for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
-@@ -1281,7 +1185,7 @@ static void
- radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
-                            GLint ref, GLuint mask )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) |
- 		     ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT));
- 
-@@ -1325,7 +1229,7 @@ radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
- static void
- radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    RADEON_STATECHANGE( rmesa, msk );
-    rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
-@@ -1336,7 +1240,7 @@ radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
- static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
-                                      GLenum zfail, GLenum zpass )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP,
-       and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC,
-@@ -1349,7 +1253,7 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
-    GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP;
-    GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP;
-    
--   if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
-+   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
-       tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC;
-       tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC;
-       tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC;
-@@ -1455,9 +1359,9 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
- 
- static void radeonClearStencil( GLcontext *ctx, GLint s )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
--   rmesa->state.stencil.clear = 
-+   rmesa->radeon.state.stencil.clear = 
-       ((GLuint) (ctx->Stencil.Clear & 0xff) |
-        (0xff << RADEON_STENCIL_MASK_SHIFT) |
-        ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT));
-@@ -1481,20 +1385,20 @@ static void radeonClearStencil( GLcontext *ctx, GLint s )
-  */
- void radeonUpdateWindow( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
--   GLfloat xoffset = (GLfloat)dPriv->x;
--   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-+   GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
-+   GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
-    const GLfloat *v = ctx->Viewport._WindowMap.m;
- 
-    float_ui32_type sx = { v[MAT_SX] };
-    float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
-    float_ui32_type sy = { - v[MAT_SY] };
-    float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
--   float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
--   float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
-+   float_ui32_type sz = { v[MAT_SZ] * rmesa->radeon.state.depth.scale };
-+   float_ui32_type tz = { v[MAT_TZ] * rmesa->radeon.state.depth.scale };
- 
--   RADEON_FIREVERTICES( rmesa );
-+   radeon_firevertices(&rmesa->radeon);
-    RADEON_STATECHANGE( rmesa, vpt );
- 
-    rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
-@@ -1524,8 +1428,8 @@ static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
- 
- void radeonUpdateViewportOffset( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
-    GLfloat xoffset = (GLfloat)dPriv->x;
-    GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
-    const GLfloat *v = ctx->Viewport._WindowMap.m;
-@@ -1555,8 +1459,8 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
-                 RADEON_STIPPLE_Y_OFFSET_MASK);
- 
-          /* add magic offsets, then invert */
--         stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
--         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
-+         stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
-+         sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1)
-                      & RADEON_STIPPLE_COORD_MASK);
- 
-          m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
-@@ -1580,20 +1484,20 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
- 
- static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLubyte c[4];
-    CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
-    CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
-    CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
-    CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
--   rmesa->state.color.clear = radeonPackColor( rmesa->radeonScreen->cpp,
-+   rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
- 					       c[0], c[1], c[2], c[3] );
- }
- 
- 
- static void radeonRenderMode( GLcontext *ctx, GLenum mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
- }
- 
-@@ -1619,7 +1523,7 @@ static GLuint radeon_rop_tab[] = {
- 
- static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint rop = (GLuint)opcode - GL_CLEAR;
- 
-    ASSERT( rop < 16 );
-@@ -1630,66 +1534,17 @@ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
- 
- 
- /**
-- * Set up the cliprects for either front or back-buffer drawing.
-- */
--void radeonSetCliprects( radeonContextPtr rmesa )
--{
--   __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
--   __DRIdrawablePrivate *const readable = rmesa->dri.readable;
--   GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
--   GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
--
--   if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
--      /* Can't ignore 2d windows if we are page flipping.
--       */
--      if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
--	 rmesa->numClipRects = drawable->numClipRects;
--	 rmesa->pClipRects = drawable->pClipRects;
--      }
--      else {
--	 rmesa->numClipRects = drawable->numBackClipRects;
--	 rmesa->pClipRects = drawable->pBackClipRects;
--      }
--   }
--   else {
--      /* front buffer (or none, or multiple buffers */
--      rmesa->numClipRects = drawable->numClipRects;
--      rmesa->pClipRects = drawable->pClipRects;
--   }
--
--   if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
--      _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
--			       drawable->w, drawable->h);
--      draw_fb->Initialized = GL_TRUE;
--   }
--
--   if (drawable != readable) {
--      if ((read_fb->Width != readable->w) || (read_fb->Height != readable->h)) {
--	 _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
--				  readable->w, readable->h);
--	 read_fb->Initialized = GL_TRUE;
--      }
--   }
--
--   if (rmesa->state.scissor.enabled)
--      radeonRecalcScissorRects( rmesa );
--
--   rmesa->lastStamp = drawable->lastStamp;
--}
--
--
--/**
-  * Called via glDrawBuffer.
-  */
- static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
-    if (RADEON_DEBUG & DEBUG_DRI)
-       fprintf(stderr, "%s %s\n", __FUNCTION__,
- 	      _mesa_lookup_enum_by_nr( mode ));
- 
--   RADEON_FIREVERTICES(rmesa);	/* don't pipeline cliprect changes */
-+   radeon_firevertices(&rmesa->radeon);	/* don't pipeline cliprect changes */
- 
-    if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
-       /* 0 (GL_NONE) buffers or multiple color drawing buffers */
-@@ -1707,8 +1562,8 @@ static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
-       return;
-    }
- 
--   radeonSetCliprects( rmesa );
--
-+   radeonSetCliprects( &rmesa->radeon );
-+   radeonUpdatePageFlipping(&rmesa->radeon);
-    /* We'll set the drawing engine's offset/pitch parameters later
-     * when we update other state.
-     */
-@@ -1726,7 +1581,7 @@ static void radeonReadBuffer( GLcontext *ctx, GLenum mode )
- 
- static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint p, flag;
- 
-    if ( RADEON_DEBUG & DEBUG_STATE )
-@@ -1821,10 +1676,10 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
-       RADEON_STATECHANGE(rmesa, ctx );
-       if ( state ) {
- 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
--	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
-+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
-       } else {
- 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
--	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->state.color.roundEnable;
-+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
-       }
-       break;
- 
-@@ -1971,13 +1826,13 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
-    }
- 
-    case GL_SCISSOR_TEST:
--      RADEON_FIREVERTICES( rmesa );
--      rmesa->state.scissor.enabled = state;
-+      radeon_firevertices(&rmesa->radeon);
-+      rmesa->radeon.state.scissor.enabled = state;
-       radeonUpdateScissor( ctx );
-       break;
- 
-    case GL_STENCIL_TEST:
--      if ( rmesa->state.stencil.hwBuffer ) {
-+      if ( rmesa->radeon.state.stencil.hwBuffer ) {
- 	 RADEON_STATECHANGE( rmesa, ctx );
- 	 if ( state ) {
- 	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
-@@ -2010,7 +1865,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
- 
- static void radeonLightingSpaceChange( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLboolean tmp;
-    RADEON_STATECHANGE( rmesa, tcl );
- 
-@@ -2039,7 +1894,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
-  */
- 
- 
--void radeonUploadTexMatrix( radeonContextPtr rmesa,
-+void radeonUploadTexMatrix( r100ContextPtr rmesa,
- 			    int unit, GLboolean swapcols )
- {
- /* Here's how this works: on r100, only 3 tex coords can be submitted, so the
-@@ -2065,7 +1920,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
-    int idx = TEXMAT_0 + unit;
-    float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0;
-    int i;
--   struct gl_texture_unit tUnit = rmesa->glCtx->Texture.Unit[unit];
-+   struct gl_texture_unit tUnit = rmesa->radeon.glCtx->Texture.Unit[unit];
-    GLfloat *src = rmesa->tmpmat[unit].m;
- 
-    rmesa->TexMatColSwap &= ~(1 << unit);
-@@ -2119,7 +1974,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
- }
- 
- 
--static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
-+static void upload_matrix( r100ContextPtr rmesa, GLfloat *src, int idx )
- {
-    float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
-    int i;
-@@ -2135,7 +1990,7 @@ static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
-    RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
- }
- 
--static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
-+static void upload_matrix_t( r100ContextPtr rmesa, GLfloat *src, int idx )
- {
-    float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
-    memcpy(dest, src, 16*sizeof(float));
-@@ -2145,7 +2000,7 @@ static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
- 
- static void update_texturematrix( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-    GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
-    GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
-    int unit;
-@@ -2217,43 +2072,32 @@ static void update_texturematrix( GLcontext *ctx )
- void
- radeonUpdateDrawBuffer(GLcontext *ctx)
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    struct gl_framebuffer *fb = ctx->DrawBuffer;
--   driRenderbuffer *drb;
-+   struct radeon_renderbuffer *rrb;
- 
-    if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
--      /* draw to front */
--      drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
--   }
--   else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
--      /* draw to back */
--      drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
--   }
--   else {
--      /* drawing to multiple buffers, or none */
--      return;
-+     /* draw to front */
-+     rrb = (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+   } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
-+     /* draw to back */
-+     rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+   } else {
-+     /* drawing to multiple buffers, or none */
-+     return;
-    }
- 
--   assert(drb);
--   assert(drb->flippedPitch);
-+   assert(rrb);
-+   assert(rrb->pitch);
- 
-    RADEON_STATECHANGE( rmesa, ctx );
--
--   /* Note: we used the (possibly) page-flipped values */
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
--     = ((drb->flippedOffset + rmesa->radeonScreen->fbLocation)
--	& RADEON_COLOROFFSET_MASK);
--   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
--   if (rmesa->sarea->tiling_enabled) {
--      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
--   }
- }
- 
- 
- void radeonValidateState( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   GLuint new_state = rmesa->NewGLState;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   GLuint new_state = rmesa->radeon.NewGLState;
- 
-    if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
-      radeonUpdateDrawBuffer(ctx);
-@@ -2261,7 +2105,7 @@ void radeonValidateState( GLcontext *ctx )
- 
-    if (new_state & _NEW_TEXTURE) {
-       radeonUpdateTextureState( ctx );
--      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
-+      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
-    }
- 
-    /* Need an event driven matrix update?
-@@ -2295,7 +2139,7 @@ void radeonValidateState( GLcontext *ctx )
-    }
- 
- 
--   rmesa->NewGLState = 0;
-+   rmesa->radeon.NewGLState = 0;
- }
- 
- 
-@@ -2306,7 +2150,7 @@ static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
-    _vbo_InvalidateState( ctx, new_state );
-    _tnl_InvalidateState( ctx, new_state );
-    _ae_invalidate_state( ctx, new_state );
--   RADEON_CONTEXT(ctx)->NewGLState |= new_state;
-+   R100_CONTEXT(ctx)->radeon.NewGLState |= new_state;
- }
- 
- 
-@@ -2330,15 +2174,15 @@ static GLboolean check_material( GLcontext *ctx )
- 
- static void radeonWrapRunPipeline( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLboolean has_material;
- 
-    if (0)
--      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
-+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
- 
-    /* Validate state:
-     */
--   if (rmesa->NewGLState)
-+   if (rmesa->radeon.NewGLState)
-       radeonValidateState( ctx );
- 
-    has_material = (ctx->Light.Enabled && check_material( ctx ));
-diff --git a/src/mesa/drivers/dri/radeon/radeon_state.h b/src/mesa/drivers/dri/radeon/radeon_state.h
-index 2171879..17c2b11 100644
---- a/src/mesa/drivers/dri/radeon/radeon_state.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_state.h
-@@ -39,22 +39,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "radeon_context.h"
- 
--extern void radeonInitState( radeonContextPtr rmesa );
-+extern void radeonInitState( r100ContextPtr rmesa );
- extern void radeonInitStateFuncs( GLcontext *ctx );
- 
- extern void radeonUpdateMaterial( GLcontext *ctx );
- 
--extern void radeonSetCliprects( radeonContextPtr rmesa );
--extern void radeonRecalcScissorRects( radeonContextPtr rmesa );
- extern void radeonUpdateViewportOffset( GLcontext *ctx );
- extern void radeonUpdateWindow( GLcontext *ctx );
- extern void radeonUpdateDrawBuffer( GLcontext *ctx );
--extern void radeonUploadTexMatrix( radeonContextPtr rmesa,
-+extern void radeonUploadTexMatrix( r100ContextPtr rmesa,
- 				   int unit, GLboolean swapcols );
- 
- extern void radeonValidateState( GLcontext *ctx );
- 
--extern void radeonPrintDirty( radeonContextPtr rmesa,
-+extern void radeonPrintDirty( r100ContextPtr rmesa,
- 			      const char *msg );
- 
- 
-@@ -62,7 +60,7 @@ extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
- #define FALLBACK( rmesa, bit, mode ) do {				\
-    if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",		\
- 		     __FUNCTION__, bit, mode );				\
--   radeonFallback( rmesa->glCtx, bit, mode );				\
-+   radeonFallback( rmesa->radeon.glCtx, bit, mode );				\
- } while (0)
- 
- 
-diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
-index 57dc380..617e731 100644
---- a/src/mesa/drivers/dri/radeon/radeon_state_init.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
-@@ -38,39 +38,156 @@
- #include "swrast_setup/swrast_setup.h"
- 
- #include "radeon_context.h"
-+#include "radeon_mipmap_tree.h"
- #include "radeon_ioctl.h"
- #include "radeon_state.h"
- #include "radeon_tcl.h"
- #include "radeon_tex.h"
- #include "radeon_swtcl.h"
- 
-+#include "../r200/r200_reg.h"
-+
- #include "xmlpool.h"
- 
-+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
-+ * 1.3 cmdbuffers allow all previous state to be updated as well as
-+ * the tcl scalar and vector areas.
-+ */
-+static struct {
-+	int start;
-+	int len;
-+	const char *name;
-+} packet[RADEON_MAX_STATE_PACKETS] = {
-+	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
-+	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
-+	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
-+	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
-+	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
-+	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
-+	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
-+	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
-+	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
-+	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
-+	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
-+	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
-+	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
-+	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
-+	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
-+	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
-+	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
-+	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
-+	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
-+	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
-+	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
-+		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
-+	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
-+	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
-+	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
-+	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
-+	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
-+	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
-+	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
-+	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
-+	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
-+	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
-+	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
-+	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
-+	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
-+	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
-+	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
-+	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
-+	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
-+	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
-+	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
-+	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
-+	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
-+	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
-+	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
-+	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
-+	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
-+	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
-+	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
-+	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
-+	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
-+	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
-+	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
-+	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
-+	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
-+	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
-+	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
-+	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
-+	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
-+	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
-+	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
-+	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
-+	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
-+		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
-+	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
-+	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
-+	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
-+	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
-+	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
-+	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
-+	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
-+	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
-+	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
-+	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
-+	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
-+	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
-+	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
-+	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
-+	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
-+	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
-+	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
-+	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
-+	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
-+	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
-+	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
-+	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
-+	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
-+	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
-+	{R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
-+	{R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
-+	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
-+	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
-+	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
-+	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
-+	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
-+	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
-+	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
-+	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
-+};
-+
- /* =============================================================
-  * State initialization
-  */
- 
--void radeonPrintDirty( radeonContextPtr rmesa, const char *msg )
-+void radeonPrintDirty( r100ContextPtr rmesa, const char *msg )
- {
-    struct radeon_state_atom *l;
- 
-    fprintf(stderr, msg);
-    fprintf(stderr, ": ");
- 
--   foreach(l, &rmesa->hw.atomlist) {
--      if (l->dirty || rmesa->hw.all_dirty)
-+   foreach(l, &rmesa->radeon.hw.atomlist) {
-+      if (l->dirty || rmesa->radeon.hw.all_dirty)
- 	 fprintf(stderr, "%s, ", l->name);
-    }
- 
-    fprintf(stderr, "\n");
- }
- 
--static int cmdpkt( int id ) 
-+static int cmdpkt( r100ContextPtr rmesa, int id ) 
- {
-    drm_radeon_cmd_header_t h;
--   h.i = 0;
--   h.packet.cmd_type = RADEON_CMD_PACKET;
--   h.packet.packet_id = id;
-+
-+   if (rmesa->radeon.radeonScreen->kernel_mm) {
-+     return CP_PACKET0(packet[id].start, packet[id].len - 1);
-+   } else {
-+     h.i = 0;
-+     h.packet.cmd_type = RADEON_CMD_PACKET;
-+     h.packet.packet_id = id;
-+   }
-    return h.i;
- }
- 
-@@ -96,17 +213,17 @@ static int cmdscl( int offset, int stride, int count )
-    return h.i;
- }
- 
--#define CHECK( NM, FLAG )			\
--static GLboolean check_##NM( GLcontext *ctx )	\
--{						\
--   return FLAG;					\
-+#define CHECK( NM, FLAG )				\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\
-+{							\
-+   return FLAG ? atom->cmd_size : 0;			\
- }
- 
- #define TCL_CHECK( NM, FLAG )				\
--static GLboolean check_##NM( GLcontext *ctx )		\
-+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\
- {							\
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);	\
--   return !rmesa->TclFallback && (FLAG);		\
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);	\
-+   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0;	\
- }
- 
- 
-@@ -146,17 +263,244 @@ CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT))
- CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT))
- CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT))
- 
-+#define OUT_VEC(hdr, data) do {			\
-+    drm_radeon_cmd_header_t h;					\
-+    h.i = hdr;								\
-+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
-+    OUT_BATCH(0);							\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
-+    OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));	\
-+    OUT_BATCH_TABLE((data), h.vectors.count);				\
-+  } while(0)
-+
-+#define OUT_SCL(hdr, data) do {					\
-+    drm_radeon_cmd_header_t h;						\
-+    h.i = hdr;								\
-+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
-+    OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
-+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
-+    OUT_BATCH_TABLE((data), h.scalars.count);				\
-+  } while(0)
-+
-+static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_SCL(atom->cmd[0], atom->cmd+1);
-+   END_BATCH();
-+}
-+
-+
-+static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_VEC(atom->cmd[0], atom->cmd+1);
-+   END_BATCH();
-+}
-+
-+static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   struct radeon_renderbuffer *rrb;
-+   uint32_t cbpitch;
-+   uint32_t zbpitch;
-+   uint32_t dwords = atom->cmd_size;
-+   GLframebuffer *fb = r100->radeon.dri.drawable->driverPrivate;
-+
-+   /* output the first 7 bytes of context */
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords + 4);
-+   OUT_BATCH_TABLE(atom->cmd, 5);
-+
-+   rrb = r100->radeon.state.depth.rrb;
-+   if (!rrb) {
-+     OUT_BATCH(0);
-+     OUT_BATCH(0);
-+   } else {
-+     zbpitch = (rrb->pitch / rrb->cpp);
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+     OUT_BATCH(zbpitch);
-+   }
-+     
-+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
-+   OUT_BATCH(atom->cmd[CTX_CMD_1]);
-+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
-+   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
-+
-+   rrb = r100->radeon.state.color.rrb;
-+   if (r100->radeon.radeonScreen->driScreen->dri2.enabled) {
-+      rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+   }
-+   if (!rrb || !rrb->bo) {
-+     OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
-+   } else {
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+   }
- 
-+   OUT_BATCH(atom->cmd[CTX_CMD_2]);
-+
-+   if (!rrb || !rrb->bo) {
-+     OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
-+   } else {
-+     cbpitch = (rrb->pitch / rrb->cpp);
-+     if (rrb->cpp == 4)
-+       ;
-+     else
-+       ;
-+     if (r100->radeon.sarea->tiling_enabled)
-+       cbpitch |= R200_COLOR_TILE_ENABLE;
-+     OUT_BATCH(cbpitch);
-+   }
-+
-+   END_BATCH();
-+}
-+
-+static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   struct radeon_renderbuffer *rrb, *drb;
-+   uint32_t cbpitch = 0;
-+   uint32_t zbpitch = 0;
-+   uint32_t dwords = atom->cmd_size;
-+   GLframebuffer *fb = r100->radeon.dri.drawable->driverPrivate;
-+
-+   rrb = r100->radeon.state.color.rrb;
-+   if (r100->radeon.radeonScreen->driScreen->dri2.enabled) {
-+      rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+   }
-+   if (rrb) {
-+     assert(rrb->bo != NULL);
-+     cbpitch = (rrb->pitch / rrb->cpp);
-+     if (r100->radeon.sarea->tiling_enabled)
-+       cbpitch |= R200_COLOR_TILE_ENABLE;
-+   }
-+
-+   drb = r100->radeon.state.depth.rrb;
-+   if (drb)
-+     zbpitch = (drb->pitch / drb->cpp);
-+
-+   /* output the first 7 bytes of context */
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+
-+   /* In the CS case we need to split this up */
-+   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
-+   OUT_BATCH_TABLE((atom->cmd + 1), 4);
-+
-+   if (drb) {
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
-+     OUT_BATCH(zbpitch);
-+   }
-+
-+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
-+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
-+   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
-+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
-+   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
-+
-+
-+   if (rrb) {
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
-+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+   }
-+
-+   if (rrb) {
-+     if (rrb->cpp == 4)
-+       ;
-+     else
-+       ;
-+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
-+     OUT_BATCH(cbpitch);
-+   }
-+
-+   // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
-+   //   OUT_BATCH_TABLE((atom->cmd + 14), 4);
-+   // }
-+
-+   END_BATCH();
-+}
-+
-+static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+   int i = atom->idx, j;
-+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
-+   radeon_mipmap_level *lvl;
-+
-+   if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT))
-+	return;
-+
-+   if (!t)
-+	return;
-+
-+   if (!t->mt)
-+	return;
-+
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords + 10);
-+   OUT_BATCH_TABLE(atom->cmd, 3);
-+   lvl = &t->mt->levels[0];
-+   for (j = 0; j < 5; j++) {
-+	OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
-+			RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+   }
-+   END_BATCH();
-+}
-+
-+static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
-+{
-+   r100ContextPtr r100 = R100_CONTEXT(ctx);
-+   BATCH_LOCALS(&r100->radeon);
-+   uint32_t dwords = atom->cmd_size;
-+   int i = atom->idx;
-+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
-+   radeon_mipmap_level *lvl;
-+
-+   if (t && t->mt && !t->image_override)
-+     dwords += 2;
-+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+   OUT_BATCH_TABLE(atom->cmd, 3);
-+   if (t && t->mt && !t->image_override) {
-+     if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
-+   	lvl = &t->mt->levels[0];
-+	OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
-+			RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     } else {
-+        OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
-+		     RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+     }
-+   } else if (!t) {
-+     /* workaround for old CS mechanism */
-+     OUT_BATCH(r100->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
-+     //     OUT_BATCH(r100->radeon.radeonScreen);
-+   } else if (t->image_override)
-+     OUT_BATCH(t->override_offset);
-+
-+   OUT_BATCH_TABLE((atom->cmd+4), 5);
-+   END_BATCH();
-+}
- 
- /* Initialize the context's hardware state.
-  */
--void radeonInitState( radeonContextPtr rmesa )
-+void radeonInitState( r100ContextPtr rmesa )
- {
--   GLcontext *ctx = rmesa->glCtx;
-+   GLcontext *ctx = rmesa->radeon.glCtx;
-    GLuint color_fmt, depth_fmt, i;
-    GLint drawPitch, drawOffset;
- 
--   switch ( rmesa->radeonScreen->cpp ) {
-+   switch ( rmesa->radeon.radeonScreen->cpp ) {
-    case 2:
-       color_fmt = RADEON_COLOR_FORMAT_RGB565;
-       break;
-@@ -168,20 +512,20 @@ void radeonInitState( radeonContextPtr rmesa )
-       exit( -1 );
-    }
- 
--   rmesa->state.color.clear = 0x00000000;
-+   rmesa->radeon.state.color.clear = 0x00000000;
- 
-    switch ( ctx->Visual.depthBits ) {
-    case 16:
--      rmesa->state.depth.clear = 0x0000ffff;
--      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
-+      rmesa->radeon.state.depth.clear = 0x0000ffff;
-+      rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffff;
-       depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
--      rmesa->state.stencil.clear = 0x00000000;
-+      rmesa->radeon.state.stencil.clear = 0x00000000;
-       break;
-    case 24:
--      rmesa->state.depth.clear = 0x00ffffff;
--      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
-+      rmesa->radeon.state.depth.clear = 0x00ffffff;
-+      rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffffff;
-       depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
--      rmesa->state.stencil.clear = 0xffff0000;
-+      rmesa->radeon.state.stencil.clear = 0xffff0000;
-       break;
-    default:
-       fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
-@@ -190,37 +534,44 @@ void radeonInitState( radeonContextPtr rmesa )
-    }
- 
-    /* Only have hw stencil when depth buffer is 24 bits deep */
--   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
-+   rmesa->radeon.state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
- 				     ctx->Visual.depthBits == 24 );
- 
--   rmesa->Fallback = 0;
-+   rmesa->radeon.Fallback = 0;
- 
--   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
--      drawOffset = rmesa->radeonScreen->backOffset;
--      drawPitch  = rmesa->radeonScreen->backPitch;
-+   if ( ctx->Visual.doubleBufferMode && rmesa->radeon.sarea->pfCurrentPage == 0 ) {
-+      drawOffset = rmesa->radeon.radeonScreen->backOffset;
-+      drawPitch  = rmesa->radeon.radeonScreen->backPitch;
-    } else {
--      drawOffset = rmesa->radeonScreen->frontOffset;
--      drawPitch  = rmesa->radeonScreen->frontPitch;
-+      drawOffset = rmesa->radeon.radeonScreen->frontOffset;
-+      drawPitch  = rmesa->radeon.radeonScreen->frontPitch;
-    }
- 
--   rmesa->hw.max_state_size = 0;
-+   rmesa->radeon.hw.max_state_size = 0;
- 
--#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )				\
-+#define ALLOC_STATE_IDX( ATOM, CHK, SZ, NM, FLAG, IDX )		\
-    do {								\
-       rmesa->hw.ATOM.cmd_size = SZ;				\
--      rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));	\
--      rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));	\
--      rmesa->hw.ATOM.name = NM;					\
-+      rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
-+      rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
-+      rmesa->hw.ATOM.name = NM;						\
-       rmesa->hw.ATOM.is_tcl = FLAG;					\
-       rmesa->hw.ATOM.check = check_##CHK;				\
--      rmesa->hw.ATOM.dirty = GL_TRUE;				\
--      rmesa->hw.max_state_size += SZ * sizeof(int);		\
-+      rmesa->hw.ATOM.dirty = GL_TRUE;					\
-+      rmesa->hw.ATOM.idx = IDX;					\
-+      rmesa->radeon.hw.max_state_size += SZ * sizeof(int);		\
-    } while (0)
--      
--      
-+
-+#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )		\
-+   ALLOC_STATE_IDX(ATOM, CHK, SZ, NM, FLAG, 0)
-+
-    /* Allocate state buffers:
-     */
-    ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 );
-+   if (rmesa->radeon.radeonScreen->kernel_mm)
-+     rmesa->hw.ctx.emit = ctx_emit_cs;
-+   else
-+     rmesa->hw.ctx.emit = ctx_emit;
-    ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
-    ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
-    ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
-@@ -233,20 +584,25 @@ void radeonInitState( radeonContextPtr rmesa )
-    ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
-    ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
-    ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
--   ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 );
--   ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 );
--   ALLOC_STATE( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0 );
--   if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
-+   ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
-+   ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
-+   ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2 );
-+
-+   for (i = 0; i < 3; i++)
-+     rmesa->hw.tex[i].emit = tex_emit;
-+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
-    {
--      ALLOC_STATE( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
--      ALLOC_STATE( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
--      ALLOC_STATE( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
-+      ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
-+      ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
-+      ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
-+      for (i = 0; i < 3; i++)
-+         rmesa->hw.cube[i].emit = cube_emit;
-    }
-    else
-    {
--      ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
--      ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
--      ALLOC_STATE( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
-+      ALLOC_STATE_IDX( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
-+      ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
-+      ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
-    }
-    ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
-    ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
-@@ -268,43 +624,43 @@ void radeonInitState( radeonContextPtr rmesa )
-    ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
-    ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
-    ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
--   ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 );
--   ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 );
--   ALLOC_STATE( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0 );
-+   ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 );
-+   ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 );
-+   ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 );
- 
-    radeonSetUpAtomList( rmesa );
- 
-    /* Fill in the packet headers:
-     */
--   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
--   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
--   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
--   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
--   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
--   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
--   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
--   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
--   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS);
--   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
--   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0);
--   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0);
--   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1);
--   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1);
--   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_2);
--   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_2);
--   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_0);
--   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
--   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_1);
--   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
--   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_2);
--   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
--   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
--   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
-+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
-+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
-+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
-+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
-+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
-+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
-+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
-+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
-+   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL_STATUS);
-+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
-+   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_0);
-+   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_0);
-+   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_1);
-+   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_1);
-+   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_2);
-+   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_2);
-+   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_0);
-+   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
-+   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_1);
-+   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
-+   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_2);
-+   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
-+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
-+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
-    rmesa->hw.mtl.cmd[MTL_CMD_0] = 
--      cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
--   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0);
--   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1);
--   rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_2);
-+      cmdpkt(rmesa, RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
-+   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_0);
-+   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_1);
-+   rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_2);
-    rmesa->hw.grd.cmd[GRD_CMD_0] = 
-       cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
-    rmesa->hw.fog.cmd[FOG_CMD_0] = 
-@@ -353,10 +709,10 @@ void radeonInitState( radeonContextPtr rmesa )
- 					    RADEON_DST_BLEND_GL_ZERO );
- 
-    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
--      rmesa->radeonScreen->depthOffset + rmesa->radeonScreen->fbLocation;
-+      rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation;
- 
-    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
--      ((rmesa->radeonScreen->depthPitch &
-+      ((rmesa->radeon.radeonScreen->depthPitch &
- 	RADEON_DEPTHPITCH_MASK) |
-        RADEON_DEPTH_ENDIAN_NO_SWAP);
-        
-@@ -374,7 +730,7 @@ void radeonInitState( radeonContextPtr rmesa )
-    if (rmesa->using_hyperz) {
-        rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE |
- 						   RADEON_Z_DECOMPRESSION_ENABLE;
--      if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
-+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
- 	 /* works for q3, but slight rendering errors with glxgears ? */
- /*	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
- 	 /* need this otherwise get lots of lockups with q3 ??? */
-@@ -389,7 +745,7 @@ void radeonInitState( radeonContextPtr rmesa )
- 				       color_fmt |
- 				       RADEON_ZBLOCK16);
- 
--   switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) {
-+   switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
-    case DRI_CONF_DITHER_XERRORDIFFRESET:
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT;
-       break;
-@@ -397,19 +753,19 @@ void radeonInitState( radeonContextPtr rmesa )
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE;
-       break;
-    }
--   if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) ==
-+   if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
- 	DRI_CONF_ROUND_ROUND )
--      rmesa->state.color.roundEnable = RADEON_ROUND_ENABLE;
-+      rmesa->radeon.state.color.roundEnable = RADEON_ROUND_ENABLE;
-    else
--      rmesa->state.color.roundEnable = 0;
--   if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) ==
-+      rmesa->radeon.state.color.roundEnable = 0;
-+   if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
- 	DRI_CONF_COLOR_REDUCTION_DITHER )
-       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE;
-    else
--      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
-+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
- 
-    rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
--					       rmesa->radeonScreen->fbLocation)
-+					       rmesa->radeon.radeonScreen->fbLocation)
- 					      & RADEON_COLOROFFSET_MASK);
- 
-    rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
-@@ -418,7 +774,7 @@ void radeonInitState( radeonContextPtr rmesa )
- 
- 
-    /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
--   if (rmesa->sarea->tiling_enabled) {
-+   if (rmesa->radeon.sarea->tiling_enabled) {
-       rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
-    }
- 
-@@ -444,7 +800,7 @@ void radeonInitState( radeonContextPtr rmesa )
-   					    RADEON_VC_NO_SWAP;
- #endif
- 
--   if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
-+   if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
-      rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS;
-    }
- 
-@@ -491,8 +847,8 @@ void radeonInitState( radeonContextPtr rmesa )
- 	   (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
- 
-       /* Initialize the texture offset to the start of the card texture heap */
--      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+      //      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
-+      //	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
- 
-       rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
-       rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] =  
-@@ -513,15 +869,15 @@ void radeonInitState( radeonContextPtr rmesa )
- 
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] =
--	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
-    }
- 
-    /* Can only add ST1 at the time of doing some multitex but can keep
-@@ -613,5 +969,7 @@ void radeonInitState( radeonContextPtr rmesa )
-    rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
-    rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
-    
--   rmesa->hw.all_dirty = GL_TRUE;
-+   rmesa->radeon.hw.all_dirty = GL_TRUE;
-+
-+   rcommonInitCmdBuf(&rmesa->radeon);
- }
-diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
-index ebea1fe..af933a3 100644
---- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
-@@ -52,8 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_tcl.h"
- 
- 
--static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
--
- /* R100: xyzw, c0, c1/fog, stq[0..2]  = 4+1+1+3*3 = 15  right? */
- /* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
- #define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat))	/* for mesa _tnl stage */
-@@ -64,18 +62,18 @@ static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
- 
- #define EMIT_ATTR( ATTR, STYLE, F0 )					\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
-    fmt_0 |= F0;								\
- } while (0)
- 
- #define EMIT_PAD( N )							\
- do {									\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
--   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
--   rmesa->swtcl.vertex_attr_count++;					\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
-+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
-+   rmesa->radeon.swtcl.vertex_attr_count++;					\
- } while (0)
- 
- static GLuint radeon_cp_vc_frmts[3][2] =
-@@ -87,7 +85,7 @@ static GLuint radeon_cp_vc_frmts[3][2] =
- 
- static void radeonSetVertexFormat( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    struct vertex_buffer *VB = &tnl->vb;
-    DECLARE_RENDERINPUTS(index_bitset);
-@@ -106,7 +104,7 @@ static void radeonSetVertexFormat( GLcontext *ctx )
-    }
- 
-    assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
--   rmesa->swtcl.vertex_attr_count = 0;
-+   rmesa->radeon.swtcl.vertex_attr_count = 0;
- 
-    /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
-     * build up a hardware vertex.
-@@ -204,33 +202,33 @@ static void radeonSetVertexFormat( GLcontext *ctx )
-       }
-    }
- 
--   if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) ||
-+   if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
- 	fmt_0 != rmesa->swtcl.vertex_format) {
-       RADEON_NEWPRIM(rmesa);
-       rmesa->swtcl.vertex_format = fmt_0;
--      rmesa->swtcl.vertex_size =
-+      rmesa->radeon.swtcl.vertex_size =
- 	  _tnl_install_attrs( ctx,
--			      rmesa->swtcl.vertex_attrs, 
--			      rmesa->swtcl.vertex_attr_count,
-+			      rmesa->radeon.swtcl.vertex_attrs, 
-+			      rmesa->radeon.swtcl.vertex_attr_count,
- 			      NULL, 0 );
--      rmesa->swtcl.vertex_size /= 4;
--      RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-+      rmesa->radeon.swtcl.vertex_size /= 4;
-+      RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
-       if (RADEON_DEBUG & DEBUG_VERTS)
- 	 fprintf( stderr, "%s: vertex_size= %d floats\n",
--		  __FUNCTION__, rmesa->swtcl.vertex_size);
-+		  __FUNCTION__, rmesa->radeon.swtcl.vertex_size);
-    }
- }
- 
- 
- static void radeonRenderStart( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
- 
-    radeonSetVertexFormat( ctx );
-    
--   if (rmesa->dma.flush != 0 && 
--       rmesa->dma.flush != flush_last_swtcl_prim)
--      rmesa->dma.flush( rmesa );
-+   if (rmesa->radeon.dma.flush != 0 && 
-+       rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
-+      rmesa->radeon.dma.flush( ctx );
- }
- 
- 
-@@ -241,7 +239,7 @@ static void radeonRenderStart( GLcontext *ctx )
-  */
- void radeonChooseVertexState( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
- 
-    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
-@@ -254,7 +252,7 @@ void radeonChooseVertexState( GLcontext *ctx )
-     * rasterization fallback.  As this function will be called again when we
-     * leave a rasterization fallback, we can just skip it for now.
-     */
--   if (rmesa->Fallback != 0)
-+   if (rmesa->radeon.Fallback != 0)
-       return;
- 
-    /* HW perspective divide is a win, but tiny vertex formats are a
-@@ -281,80 +279,29 @@ void radeonChooseVertexState( GLcontext *ctx )
-    }
- }
- 
--
--/* Flush vertices in the current dma region.
-- */
--static void flush_last_swtcl_prim( radeonContextPtr rmesa  )
-+void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
- {
--   if (RADEON_DEBUG & DEBUG_IOCTL)
--      fprintf(stderr, "%s\n", __FUNCTION__);
--
--   rmesa->dma.flush = NULL;
--
--   if (rmesa->dma.current.buf) {
--      struct radeon_dma_region *current = &rmesa->dma.current;
--      GLuint current_offset = (rmesa->radeonScreen->gart_buffer_offset +
--			       current->buf->buf->idx * RADEON_BUFFER_SIZE + 
--			       current->start);
--
--      assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
--      assert (current->start + 
--	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--	      current->ptr);
-+   rcommonEnsureCmdBufSpace(&rmesa->radeon,
-+			    rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
-+			    __FUNCTION__);
- 
--      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
--	 radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
--			          rmesa->hw.max_state_size + VBUF_BUFSZ );
- 
--	 radeonEmitVertexAOS( rmesa,
--			      rmesa->swtcl.vertex_size,
--			      current_offset);
-+   radeonEmitState(&rmesa->radeon);
-+   radeonEmitVertexAOS( rmesa,
-+			rmesa->radeon.swtcl.vertex_size,
-+			rmesa->radeon.dma.current,
-+			current_offset);
- 
--	 radeonEmitVbufPrim( rmesa,
--			     rmesa->swtcl.vertex_format,
--			     rmesa->swtcl.hw_primitive,
--			     rmesa->swtcl.numverts);
--      }
-+		      
-+   radeonEmitVbufPrim( rmesa,
-+		       rmesa->swtcl.vertex_format,
-+		       rmesa->radeon.swtcl.hw_primitive,
-+		       rmesa->radeon.swtcl.numverts);
- 
--      rmesa->swtcl.numverts = 0;
--      current->start = current->ptr;
--   }
- }
- 
--
--/* Alloc space in the current dma region.
-- */
--static INLINE void *
--radeonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
--{
--   GLuint bytes = vsize * nverts;
--
--   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
--      radeonRefillCurrentDmaRegion( rmesa );
--
--   if (!rmesa->dma.flush) {
--      rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
--      rmesa->dma.flush = flush_last_swtcl_prim;
--   }
--
--   assert( vsize == rmesa->swtcl.vertex_size * 4 );
--   assert( rmesa->dma.flush == flush_last_swtcl_prim );
--   assert (rmesa->dma.current.start + 
--	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--	   rmesa->dma.current.ptr);
--
--
--   {
--      GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
--      rmesa->dma.current.ptr += bytes;
--      rmesa->swtcl.numverts += nverts;
--      return head;
--   }
--
--}
--
--
- /*
-  * Render unclipped vertex buffers by emitting vertices directly to
-  * dma buffers.  Use strip/fan hardware primitives where possible.
-@@ -387,22 +334,22 @@ static const GLuint hw_prim[GL_POLYGON+1] = {
- };
- 
- static INLINE void
--radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
-+radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim )
- {
-    RADEON_NEWPRIM( rmesa );
--   rmesa->swtcl.hw_primitive = hw_prim[prim];
--   assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
-+   rmesa->radeon.swtcl.hw_primitive = hw_prim[prim];
-+   //   assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start);
- }
- 
--#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
-+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
- #define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
- #define FLUSH()  RADEON_NEWPRIM( rmesa )
--#define GET_CURRENT_VB_MAX_VERTS() \
--  (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
-+#define GET_CURRENT_VB_MAX_VERTS()					10\
-+//  (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4))
- #define GET_SUBSEQUENT_VB_MAX_VERTS() \
--  ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
-+  ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4))
- #define ALLOC_VERTS( nr ) \
--  radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 )
-+  rcommonAllocDmaLowVerts( &rmesa->radeon, nr, rmesa->radeon.swtcl.vertex_size * 4 )
- #define EMIT_VERTS( ctx, j, nr, buf ) \
-   _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
- 
-@@ -418,16 +365,13 @@ radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
- static GLboolean radeon_run_render( GLcontext *ctx,
- 				    struct tnl_pipeline_stage *stage )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    struct vertex_buffer *VB = &tnl->vb;
-    tnl_render_func *tab = TAG(render_tab_verts);
-    GLuint i;
- 
--   if (rmesa->swtcl.indexed_verts.buf) 
--      RELEASE_ELT_VERTS();
--   	
--   if (rmesa->swtcl.RenderIndex != 0 ||   
-+   if (rmesa->radeon.swtcl.RenderIndex != 0 ||   
-        !radeon_dma_validate_render( ctx, VB ))
-       return GL_TRUE;		
- 
-@@ -496,13 +440,13 @@ static void radeonResetLineStipple( GLcontext *ctx );
- 
- #undef LOCAL_VARS
- #undef ALLOC_VERTS
--#define CTX_ARG radeonContextPtr rmesa
--#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
--#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 )
-+#define CTX_ARG r100ContextPtr rmesa
-+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
-+#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, (size) * 4 )
- #undef LOCAL_VARS
- #define LOCAL_VARS						\
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
--   const char *radeonverts = (char *)rmesa->swtcl.verts;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
-+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;
- #define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int)))
- #define VERTEX radeonVertex 
- #undef TAG
-@@ -560,7 +504,7 @@ static struct {
- #define VERT_Y(_v) _v->v.y
- #define VERT_Z(_v) _v->v.z
- #define AREA_IS_CCW( a ) (a < 0)
--#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int)))
-+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int)))
- 
- #define VERT_SET_RGBA( v, c )  					\
- do {								\
-@@ -606,7 +550,7 @@ do {							\
- #undef INIT
- 
- #define LOCAL_VARS(n)							\
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);			\
-    GLuint color[n], spec[n];						\
-    GLuint coloroffset = rmesa->swtcl.coloroffset;	\
-    GLuint specoffset = rmesa->swtcl.specoffset;			\
-@@ -617,7 +561,7 @@ do {							\
-  ***********************************************************************/
- 
- #define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
--#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
-+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
- #undef TAG
- #define TAG(x) x
- #include "tnl_dd/t_dd_unfilled.h"
-@@ -673,9 +617,9 @@ static void init_rast_tab( void )
- } while (0)
- #undef LOCAL_VARS
- #define LOCAL_VARS						\
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
--   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
--   const char *radeonverts = (char *)rmesa->swtcl.verts;		\
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
-+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
-+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;		\
-    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
-    const GLboolean stipple = ctx->Line.StippleFlag;		\
-    (void) elt; (void) stipple;
-@@ -700,17 +644,17 @@ static void init_rast_tab( void )
- void radeonChooseRenderState( GLcontext *ctx )
- {
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint index = 0;
-    GLuint flags = ctx->_TriangleCaps;
- 
--   if (!rmesa->TclFallback || rmesa->Fallback) 
-+   if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) 
-       return;
- 
-    if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
-    if (flags & DD_TRI_UNFILLED)      index |= RADEON_UNFILLED_BIT;
- 
--   if (index != rmesa->swtcl.RenderIndex) {
-+   if (index != rmesa->radeon.swtcl.RenderIndex) {
-       tnl->Driver.Render.Points = rast_tab[index].points;
-       tnl->Driver.Render.Line = rast_tab[index].line;
-       tnl->Driver.Render.ClippedLine = rast_tab[index].line;
-@@ -727,7 +671,7 @@ void radeonChooseRenderState( GLcontext *ctx )
- 	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
-       }
- 
--      rmesa->swtcl.RenderIndex = index;
-+      rmesa->radeon.swtcl.RenderIndex = index;
-    }
- }
- 
-@@ -739,18 +683,18 @@ void radeonChooseRenderState( GLcontext *ctx )
- 
- static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
--   if (rmesa->swtcl.hw_primitive != hwprim) {
-+   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
-       RADEON_NEWPRIM( rmesa );
--      rmesa->swtcl.hw_primitive = hwprim;
-+      rmesa->radeon.swtcl.hw_primitive = hwprim;
-    }
- }
- 
- static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   rmesa->swtcl.render_primitive = prim;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   rmesa->radeon.swtcl.render_primitive = prim;
-    if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
-       radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
- }
-@@ -761,7 +705,7 @@ static void radeonRenderFinish( GLcontext *ctx )
- 
- static void radeonResetLineStipple( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    RADEON_STATECHANGE( rmesa, lin );
- }
- 
-@@ -795,17 +739,17 @@ static const char *getFallbackString(GLuint bit)
- 
- void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
--   GLuint oldfallback = rmesa->Fallback;
-+   GLuint oldfallback = rmesa->radeon.Fallback;
- 
-    if (mode) {
--      rmesa->Fallback |= bit;
-+      rmesa->radeon.Fallback |= bit;
-       if (oldfallback == 0) {
--	 RADEON_FIREVERTICES( rmesa );
-+	 radeon_firevertices(&rmesa->radeon);
- 	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
- 	 _swsetup_Wakeup( ctx );
--	 rmesa->swtcl.RenderIndex = ~0;
-+	 rmesa->radeon.swtcl.RenderIndex = ~0;
-          if (RADEON_DEBUG & DEBUG_FALLBACKS) {
-             fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
-                     bit, getFallbackString(bit));
-@@ -813,7 +757,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
-       }
-    }
-    else {
--      rmesa->Fallback &= ~bit;
-+      rmesa->radeon.Fallback &= ~bit;
-       if (oldfallback == bit) {
- 	 _swrast_flush( ctx );
- 	 tnl->Driver.Render.Start = radeonRenderStart;
-@@ -826,14 +770,14 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- 
- 	 tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
- 	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
--	 if (rmesa->TclFallback) {
--	    /* These are already done if rmesa->TclFallback goes to
-+	 if (rmesa->radeon.TclFallback) {
-+	    /* These are already done if rmesa->radeon.TclFallback goes to
- 	     * zero above. But not if it doesn't (RADEON_NO_TCL for
- 	     * example?)
- 	     */
- 	    _tnl_invalidate_vertex_state( ctx, ~0 );
- 	    _tnl_invalidate_vertices( ctx, ~0 );
--	    RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
-+	    RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
- 	    radeonChooseVertexState( ctx );
- 	    radeonChooseRenderState( ctx );
- 	 }
-@@ -853,7 +797,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- void radeonInitSwtcl( GLcontext *ctx )
- {
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    static int firsttime = 1;
- 
-    if (firsttime) {
-@@ -872,18 +816,15 @@ void radeonInitSwtcl( GLcontext *ctx )
-    _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
- 		       RADEON_MAX_TNL_VERTEX_SIZE);
-    
--   rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
--   rmesa->swtcl.RenderIndex = ~0;
--   rmesa->swtcl.render_primitive = GL_TRIANGLES;
--   rmesa->swtcl.hw_primitive = 0;
-+   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
-+   rmesa->radeon.swtcl.RenderIndex = ~0;
-+   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
-+   rmesa->radeon.swtcl.hw_primitive = 0;
- }
- 
- 
- void radeonDestroySwtcl( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
- 
--   if (rmesa->swtcl.indexed_verts.buf) 
--      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
--			      __FUNCTION__ );
- }
-diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.h b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
-index e485052..3ada989 100644
---- a/src/mesa/drivers/dri/radeon/radeon_swtcl.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
-@@ -63,5 +63,5 @@ extern void radeon_translate_vertex( GLcontext *ctx,
- 
- extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
- 
--
-+extern void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
- #endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
-index 779e9ae..5887ab3 100644
---- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
-@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "tnl/tnl.h"
- #include "tnl/t_pipeline.h"
- 
-+#include "radeon_common.h"
- #include "radeon_context.h"
- #include "radeon_state.h"
- #include "radeon_ioctl.h"
-@@ -104,7 +105,7 @@ static GLboolean discrete_prim[0x10] = {
- };
-    
- 
--#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
-+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
- #define ELT_TYPE  GLushort
- 
- #define ELT_INIT(prim, hw_prim) \
-@@ -125,7 +126,7 @@ static GLboolean discrete_prim[0x10] = {
- 
- #define RESET_STIPPLE() do {			\
-    RADEON_STATECHANGE( rmesa, lin );		\
--   radeonEmitState( rmesa );			\
-+   radeonEmitState(&rmesa->radeon);			\
- } while (0)
- 
- #define AUTO_STIPPLE( mode )  do {		\
-@@ -136,31 +137,29 @@ static GLboolean discrete_prim[0x10] = {
-    else						\
-       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
- 	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
--   radeonEmitState( rmesa );			\
-+   radeonEmitState(&rmesa->radeon);		\
- } while (0)
- 
- 
- 
- #define ALLOC_ELTS(nr)	radeonAllocElts( rmesa, nr )
- 
--static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) 
-+static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) 
- {
--   if (rmesa->dma.flush)
--      rmesa->dma.flush( rmesa );
-+      if (rmesa->radeon.dma.flush)
-+	 rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- 
--   radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
--			   rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
-+      rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->radeon.hw.max_state_size + ELTS_BUFSZ(nr) + 
-+			       AOS_BUFSZ(rmesa->tcl.nr_aos_components), __FUNCTION__);
- 
--   radeonEmitAOS( rmesa,
--		rmesa->tcl.aos_components,
--		rmesa->tcl.nr_aos_components, 0 );
-+      radeonEmitAOS( rmesa,
-+		     rmesa->tcl.nr_aos_components, 0 );
- 
--   return radeonAllocEltsOpenEnded( rmesa,
--				    rmesa->tcl.vertex_format, 
--				    rmesa->tcl.hw_primitive, nr );
-+      return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
-+				       rmesa->tcl.hw_primitive, nr );
- }
- 
--#define CLOSE_ELTS()  RADEON_NEWPRIM( rmesa )
-+#define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
- 
- 
- 
-@@ -174,14 +173,14 @@ static void radeonEmitPrim( GLcontext *ctx,
- 		       GLuint start, 
- 		       GLuint count)	
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
-    radeonTclPrimitive( ctx, prim, hwprim );
-    
--   radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
--			    rmesa->hw.max_state_size + VBUF_BUFSZ );
-+   rcommonEnsureCmdBufSpace( &rmesa->radeon,
-+			     AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
-+			     rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ );
- 
-    radeonEmitAOS( rmesa,
--		  rmesa->tcl.aos_components,
- 		  rmesa->tcl.nr_aos_components,
- 		  start );
-    
-@@ -254,7 +253,7 @@ void radeonTclPrimitive( GLcontext *ctx,
- 			 GLenum prim,
- 			 int hw_prim )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint se_cntl;
-    GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
- 
-@@ -371,7 +370,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
- static GLboolean radeon_run_tcl_render( GLcontext *ctx,
- 					struct tnl_pipeline_stage *stage )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    struct vertex_buffer *VB = &tnl->vb;
-    GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
-@@ -379,7 +378,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
- 
-    /* TODO: separate this from the swtnl pipeline 
-     */
--   if (rmesa->TclFallback)
-+   if (rmesa->radeon.TclFallback)
-       return GL_TRUE;	/* fallback to software t&l */
- 
-    if (VB->Count == 0)
-@@ -461,7 +460,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage =
- 
- static void transition_to_swtnl( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    GLuint se_cntl;
- 
-@@ -490,7 +489,7 @@ static void transition_to_swtnl( GLcontext *ctx )
- 
- static void transition_to_hwtnl( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    TNLcontext *tnl = TNL_CONTEXT(ctx);
-    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
- 
-@@ -509,15 +508,15 @@ static void transition_to_hwtnl( GLcontext *ctx )
- 
-    tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
- 
--   if ( rmesa->dma.flush )			
--      rmesa->dma.flush( rmesa );	
-+   if ( rmesa->radeon.dma.flush )			
-+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	
- 
--   rmesa->dma.flush = NULL;
-+   rmesa->radeon.dma.flush = NULL;
-    rmesa->swtcl.vertex_format = 0;
-    
--   if (rmesa->swtcl.indexed_verts.buf) 
--      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
--			      __FUNCTION__ );
-+   //   if (rmesa->swtcl.indexed_verts.buf) 
-+   //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
-+   //			      __FUNCTION__ );
- 
-    if (RADEON_DEBUG & DEBUG_FALLBACKS) 
-       fprintf(stderr, "Radeon end tcl fallback\n");
-@@ -550,11 +549,11 @@ static char *getFallbackString(GLuint bit)
- 
- void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   GLuint oldfallback = rmesa->TclFallback;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   GLuint oldfallback = rmesa->radeon.TclFallback;
- 
-    if (mode) {
--      rmesa->TclFallback |= bit;
-+      rmesa->radeon.TclFallback |= bit;
-       if (oldfallback == 0) {
- 	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
- 	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
-@@ -563,7 +562,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
-       }
-    }
-    else {
--      rmesa->TclFallback &= ~bit;
-+      rmesa->radeon.TclFallback &= ~bit;
-       if (oldfallback == bit) {
- 	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
- 	    fprintf(stderr, "Radeon end tcl fallback %s\n",
-diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
-index b0aec21..2dfb504 100644
---- a/src/mesa/drivers/dri/radeon/radeon_tex.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
-@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/texobj.h"
- 
- #include "radeon_context.h"
-+#include "radeon_mipmap_tree.h"
- #include "radeon_state.h"
- #include "radeon_ioctl.h"
- #include "radeon_swtcl.h"
-@@ -170,10 +171,13 @@ static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
- {
-    GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK);
- 
-+   /* Force revalidation to account for switches from/to mipmapping. */
-+   t->validated = GL_FALSE;
-+
-    t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK);
- 
-    /* r100 chips can't handle mipmaps/aniso for cubemap/volume textures */
--   if ( t->base.tObj->Target == GL_TEXTURE_CUBE_MAP ) {
-+   if ( t->base.Target == GL_TEXTURE_CUBE_MAP ) {
-       switch ( minf ) {
-       case GL_NEAREST:
-       case GL_NEAREST_MIPMAP_NEAREST:
-@@ -244,433 +248,13 @@ static void radeonSetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] )
-    t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
- }
- 
--
--/**
-- * Allocate space for and load the mesa images into the texture memory block.
-- * This will happen before drawing with a new texture, or drawing with a
-- * texture after it was swapped out or teximaged again.
-- */
--
--static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj )
--{
--   radeonTexObjPtr t;
--
--   t = CALLOC_STRUCT( radeon_tex_obj );
--   texObj->DriverData = t;
--   if ( t != NULL ) {
--      if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
--	 fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, (void *)t );
--      }
--
--      /* Initialize non-image-dependent parts of the state:
--       */
--      t->base.tObj = texObj;
--      t->border_fallback = GL_FALSE;
--
--      t->pp_txfilter = RADEON_BORDER_MODE_OGL;
--      t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
--			RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
--
--      make_empty_list( & t->base );
--
--      radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
--      radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
--      radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
--      radeonSetTexBorderColor( t, texObj->_BorderChan );
--   }
--
--   return t;
--}
--
--
--static const struct gl_texture_format *
--radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
--                           GLenum format, GLenum type )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   const GLboolean do32bpt =
--       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
--   const GLboolean force16bpt =
--       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
--   (void) format;
--
--   switch ( internalFormat ) {
--   case 4:
--   case GL_RGBA:
--   case GL_COMPRESSED_RGBA:
--      switch ( type ) {
--      case GL_UNSIGNED_INT_10_10_10_2:
--      case GL_UNSIGNED_INT_2_10_10_10_REV:
--	 return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555;
--      case GL_UNSIGNED_SHORT_4_4_4_4:
--      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--	 return _dri_texformat_argb4444;
--      case GL_UNSIGNED_SHORT_5_5_5_1:
--      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--	 return _dri_texformat_argb1555;
--      default:
--         return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb4444;
--      }
--
--   case 3:
--   case GL_RGB:
--   case GL_COMPRESSED_RGB:
--      switch ( type ) {
--      case GL_UNSIGNED_SHORT_4_4_4_4:
--      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
--	 return _dri_texformat_argb4444;
--      case GL_UNSIGNED_SHORT_5_5_5_1:
--      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
--	 return _dri_texformat_argb1555;
--      case GL_UNSIGNED_SHORT_5_6_5:
--      case GL_UNSIGNED_SHORT_5_6_5_REV:
--	 return _dri_texformat_rgb565;
--      default:
--         return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
--      }
--
--   case GL_RGBA8:
--   case GL_RGB10_A2:
--   case GL_RGBA12:
--   case GL_RGBA16:
--      return !force16bpt ?
--	  _dri_texformat_argb8888 : _dri_texformat_argb4444;
--
--   case GL_RGBA4:
--   case GL_RGBA2:
--      return _dri_texformat_argb4444;
--
--   case GL_RGB5_A1:
--      return _dri_texformat_argb1555;
--
--   case GL_RGB8:
--   case GL_RGB10:
--   case GL_RGB12:
--   case GL_RGB16:
--      return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
--
--   case GL_RGB5:
--   case GL_RGB4:
--   case GL_R3_G3_B2:
--      return _dri_texformat_rgb565;
--
--   case GL_ALPHA:
--   case GL_ALPHA4:
--   case GL_ALPHA8:
--   case GL_ALPHA12:
--   case GL_ALPHA16:
--   case GL_COMPRESSED_ALPHA:
--      return _dri_texformat_a8;
--
--   case 1:
--   case GL_LUMINANCE:
--   case GL_LUMINANCE4:
--   case GL_LUMINANCE8:
--   case GL_LUMINANCE12:
--   case GL_LUMINANCE16:
--   case GL_COMPRESSED_LUMINANCE:
--      return _dri_texformat_l8;
--
--   case 2:
--   case GL_LUMINANCE_ALPHA:
--   case GL_LUMINANCE4_ALPHA4:
--   case GL_LUMINANCE6_ALPHA2:
--   case GL_LUMINANCE8_ALPHA8:
--   case GL_LUMINANCE12_ALPHA4:
--   case GL_LUMINANCE12_ALPHA12:
--   case GL_LUMINANCE16_ALPHA16:
--   case GL_COMPRESSED_LUMINANCE_ALPHA:
--      return _dri_texformat_al88;
--
--   case GL_INTENSITY:
--   case GL_INTENSITY4:
--   case GL_INTENSITY8:
--   case GL_INTENSITY12:
--   case GL_INTENSITY16:
--   case GL_COMPRESSED_INTENSITY:
--      return _dri_texformat_i8;
--
--   case GL_YCBCR_MESA:
--      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
--          type == GL_UNSIGNED_BYTE)
--         return &_mesa_texformat_ycbcr;
--      else
--         return &_mesa_texformat_ycbcr_rev;
--
--   case GL_RGB_S3TC:
--   case GL_RGB4_S3TC:
--   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
--      return &_mesa_texformat_rgb_dxt1;
--
--   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
--      return &_mesa_texformat_rgba_dxt1;
--
--   case GL_RGBA_S3TC:
--   case GL_RGBA4_S3TC:
--   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
--      return &_mesa_texformat_rgba_dxt3;
--
--   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
--      return &_mesa_texformat_rgba_dxt5;
--
--   default:
--      _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__);
--      return NULL;
--   }
--
--   return NULL; /* never get here */
--}
--
--
--static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint border,
--                              GLenum format, GLenum type, const GLvoid *pixels,
--                              const struct gl_pixelstore_attrib *packing,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
--         return;
--      }
--   }
--
--   /* Note, this will call ChooseTextureFormat */
--   _mesa_store_teximage1d(ctx, target, level, internalFormat,
--                          width, border, format, type, pixels,
--                          &ctx->Unpack, texObj, texImage);
--
--   t->dirty_images[0] |= (1 << level);
--}
--
--
--static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset,
--                                 GLsizei width,
--                                 GLenum format, GLenum type,
--                                 const GLvoid *pixels,
--                                 const struct gl_pixelstore_attrib *packing,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
--         return;
--      }
--   }
--
--   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
--			     format, type, pixels, packing, texObj,
--			     texImage);
--
--   t->dirty_images[0] |= (1 << level);
--}
--
--
--static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint height, GLint border,
--                              GLenum format, GLenum type, const GLvoid *pixels,
--                              const struct gl_pixelstore_attrib *packing,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   if ( t != NULL ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
--         return;
--      }
--   }
--
--   /* Note, this will call ChooseTextureFormat */
--   _mesa_store_teximage2d(ctx, target, level, internalFormat,
--                          width, height, border, format, type, pixels,
--                          &ctx->Unpack, texObj, texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
--
--static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset, GLint yoffset,
--                                 GLsizei width, GLsizei height,
--                                 GLenum format, GLenum type,
--                                 const GLvoid *pixels,
--                                 const struct gl_pixelstore_attrib *packing,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
--         return;
--      }
--   }
--
--   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
--			     height, format, type, pixels, packing, texObj,
--			     texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
--static void radeonCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
--                              GLint internalFormat,
--                              GLint width, GLint height, GLint border,
--                              GLsizei imageSize, const GLvoid *data,
--                              struct gl_texture_object *texObj,
--                              struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   if ( t != NULL ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
--         return;
--      }
--   }
--
--   /* Note, this will call ChooseTextureFormat */
--   _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
--                                 height, border, imageSize, data, texObj, texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
--
--static void radeonCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
--                                 GLint xoffset, GLint yoffset,
--                                 GLsizei width, GLsizei height,
--                                 GLenum format,
--                                 GLsizei imageSize, const GLvoid *data,
--                                 struct gl_texture_object *texObj,
--                                 struct gl_texture_image *texImage )
--{
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
--   GLuint face;
--
--
--   /* which cube face or ordinary 2D image */
--   switch (target) {
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--      ASSERT(face < 6);
--      break;
--   default:
--      face = 0;
--   }
--
--   assert( t ); /* this _should_ be true */
--   if ( t ) {
--      driSwapOutTextureObject( t );
--   }
--   else {
--      t = (driTextureObject *) radeonAllocTexObj( texObj );
--      if (!t) {
--         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D");
--         return;
--      }
--   }
--
--   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
--                                 height, format, imageSize, data, texObj, texImage);
--
--   t->dirty_images[face] |= (1 << level);
--}
--
- #define SCALED_FLOAT_TO_BYTE( x, scale ) \
- 		(((GLuint)((255.0F / scale) * (x))) / 2)
- 
- static void radeonTexEnv( GLcontext *ctx, GLenum target,
- 			  GLenum pname, const GLfloat *param )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint unit = ctx->Texture.CurrentUnit;
-    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- 
-@@ -701,7 +285,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
-        * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping
-        * [0.0,4.0] to [0,127].
-        */
--      min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ?
-+      min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
- 	  0.0 : -1.0;
-       bias = CLAMP( *param, min, 4.0 );
-       if ( bias == 0 ) {
-@@ -734,7 +318,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
- 				struct gl_texture_object *texObj,
- 				GLenum pname, const GLfloat *params )
- {
--   radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
-+   radeonTexObj* t = radeon_tex_obj(texObj);
- 
-    if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
-       fprintf( stderr, "%s( %s )\n", __FUNCTION__,
-@@ -762,57 +346,51 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
-    case GL_TEXTURE_MAX_LEVEL:
-    case GL_TEXTURE_MIN_LOD:
-    case GL_TEXTURE_MAX_LOD:
-+
-       /* This isn't the most efficient solution but there doesn't appear to
-        * be a nice alternative.  Since there's no LOD clamping,
-        * we just have to rely on loading the right subset of mipmap levels
-        * to simulate a clamped LOD.
-        */
--      driSwapOutTextureObject( (driTextureObject *) t );
-+      if (t->mt) {
-+         radeon_miptree_unreference(t->mt);
-+	 t->mt = 0;
-+	 t->validated = GL_FALSE;
-+      }
-       break;
- 
-    default:
-       return;
-    }
--
--   /* Mark this texobj as dirty (one bit per tex unit)
--    */
--   t->dirty_state = TEX_ALL;
--}
--
--
--static void radeonBindTexture( GLcontext *ctx, GLenum target,
--			       struct gl_texture_object *texObj )
--{
--   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
--      fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj,
--	       ctx->Texture.CurrentUnit );
--   }
--
--   assert( (target != GL_TEXTURE_1D && target != GL_TEXTURE_2D &&
--            target != GL_TEXTURE_RECTANGLE_NV && target != GL_TEXTURE_CUBE_MAP) ||
--           (texObj->DriverData != NULL) );
- }
- 
--
- static void radeonDeleteTexture( GLcontext *ctx,
- 				 struct gl_texture_object *texObj )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   radeonTexObj* t = radeon_tex_obj(texObj);
-+   int i;
- 
-    if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
-       fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
- 	       _mesa_lookup_enum_by_nr( texObj->Target ) );
-    }
- 
--   if ( t != NULL ) {
--      if ( rmesa ) {
--         RADEON_FIREVERTICES( rmesa );
--      }
--
--      driDestroyTextureObject( t );
-+   if ( rmesa ) {
-+     radeon_firevertices(&rmesa->radeon);
-+     for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
-+       if ( t == rmesa->state.texture.unit[i].texobj ) {
-+	 rmesa->state.texture.unit[i].texobj = NULL;
-+	 rmesa->hw.tex[i].dirty = GL_FALSE;
-+	 rmesa->hw.cube[i].dirty = GL_FALSE;
-+       }
-+     }
-    }
- 
-+   if (t->mt) {
-+      radeon_miptree_unreference(t->mt);
-+      t->mt = 0;
-+   }
-    /* Free mipmap images and the texture object itself */
-    _mesa_delete_texture_object(ctx, texObj);
- }
-@@ -832,7 +410,7 @@ static void radeonTexGen( GLcontext *ctx,
- 			  GLenum pname,
- 			  const GLfloat *params )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLuint unit = ctx->Texture.CurrentUnit;
-    rmesa->recheck_texgen[unit] = GL_TRUE;
- }
-@@ -846,17 +424,27 @@ static void radeonTexGen( GLcontext *ctx,
- static struct gl_texture_object *
- radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   struct gl_texture_object *obj;
--   obj = _mesa_new_texture_object(ctx, name, target);
--   if (!obj)
--      return NULL;
--   obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
--   radeonAllocTexObj( obj );
--   return obj;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
-+
-+   _mesa_initialize_texture_object(&t->base, name, target);
-+   t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
-+
-+   t->border_fallback = GL_FALSE;
-+
-+   t->pp_txfilter = RADEON_BORDER_MODE_OGL;
-+   t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
-+		     RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
-+   
-+   radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT );
-+   radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
-+   radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter );
-+   radeonSetTexBorderColor( t, t->base._BorderChan );
-+   return &t->base;
- }
- 
- 
-+
- void radeonInitTextureFuncs( struct dd_function_table *functions )
- {
-    functions->ChooseTextureFormat	= radeonChooseTextureFormat;
-@@ -864,11 +452,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions )
-    functions->TexImage2D		= radeonTexImage2D;
-    functions->TexSubImage1D		= radeonTexSubImage1D;
-    functions->TexSubImage2D		= radeonTexSubImage2D;
-+   functions->GetTexImage               = radeonGetTexImage;
-+   functions->GetCompressedTexImage     = radeonGetCompressedTexImage;
- 
-    functions->NewTextureObject		= radeonNewTextureObject;
--   functions->BindTexture		= radeonBindTexture;
-+   //   functions->BindTexture		= radeonBindTexture;
-    functions->DeleteTexture		= radeonDeleteTexture;
--   functions->IsTextureResident		= driIsTextureResident;
- 
-    functions->TexEnv			= radeonTexEnv;
-    functions->TexParameter		= radeonTexParameter;
-@@ -877,5 +466,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions )
-    functions->CompressedTexImage2D	= radeonCompressedTexImage2D;
-    functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
- 
-+   functions->GenerateMipmap = radeonGenerateMipmap;
-+
-+   functions->NewTextureImage = radeonNewTextureImage;
-+   functions->FreeTexImageData = radeonFreeTexImageData;
-+   functions->MapTexture = radeonMapTexture;
-+   functions->UnmapTexture = radeonUnmapTexture;
-+
-    driInitTextureFormats();
- }
-diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h
-index 8000880..8c2f9be 100644
---- a/src/mesa/drivers/dri/radeon/radeon_tex.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_tex.h
-@@ -43,10 +43,10 @@ extern void radeonSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
- 
- extern void radeonUpdateTextureState( GLcontext *ctx );
- 
--extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t,
-+extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t,
- 				  GLuint face );
- 
--extern void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t );
-+extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t );
- 
- extern void radeonInitTextureFuncs( struct dd_function_table *functions );
- 
-diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c
-deleted file mode 100644
-index 5f7bbe6..0000000
---- a/src/mesa/drivers/dri/radeon/radeon_texmem.c
-+++ /dev/null
-@@ -1,404 +0,0 @@
--/**************************************************************************
--
--Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
--                     VA Linux Systems Inc., Fremont, California.
--
--All Rights Reserved.
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation on the rights to use, copy, modify, merge, publish,
--distribute, sub license, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice (including the
--next paragraph) shall be included in all copies or substantial
--portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
--NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
--SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
--IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
--IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
--SOFTWARE.
--
--**************************************************************************/
--
--/*
-- * Authors:
-- *   Kevin E. Martin <martin@valinux.com>
-- *   Gareth Hughes <gareth@valinux.com>
-- *
-- */
--#include <errno.h> 
--
--#include "main/glheader.h"
--#include "main/imports.h"
--#include "main/context.h"
--#include "main/macros.h"
--
--#include "radeon_context.h"
--#include "radeon_ioctl.h"
--#include "radeon_tex.h"
--
--#include <unistd.h>  /* for usleep() */
--
--
--/**
-- * Destroy any device-dependent state associated with the texture.  This may
-- * include NULLing out hardware state that points to the texture.
-- */
--void
--radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
--{
--   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
--      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)t, (void *)t->base.tObj );
--   }
--
--   if ( rmesa != NULL ) {
--      unsigned   i;
--
--
--      for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
--	 if ( t == rmesa->state.texture.unit[i].texobj ) {
--	    rmesa->state.texture.unit[i].texobj = NULL;
--	 }
--      }
--   }
--}
--
--
--/* ------------------------------------------------------------
-- * Texture image conversions
-- */
--
--
--static void radeonUploadRectSubImage( radeonContextPtr rmesa,
--				      radeonTexObjPtr t, 
--				      struct gl_texture_image *texImage,
--				      GLint x, GLint y, 
--				      GLint width, GLint height )
--{
--   const struct gl_texture_format *texFormat = texImage->TexFormat;
--   int blit_format, dstPitch, done;
--
--   switch ( texFormat->TexelBytes ) {
--   case 1:
--      blit_format = RADEON_GMC_DST_8BPP_CI;
--      break;
--   case 2:
--      blit_format = RADEON_GMC_DST_16BPP;
--      break;
--   case 4:
--      blit_format = RADEON_GMC_DST_32BPP;
--      break;
--   default:
--      fprintf( stderr, "radeonUploadRectSubImage: unknown blit_format (texelbytes=%d)\n", 
--      	       texFormat->TexelBytes);
--      return;
--   }
--
--   t->image[0][0].data = texImage->Data;
--
--   /* Currently don't need to cope with small pitches.
--    */
--   width = texImage->Width;
--   height = texImage->Height;
--   dstPitch = t->pp_txpitch + 32;
--
--   {	/* FIXME: prefer GART-texturing if possible */
--      /* Data not in GART memory, or bad pitch.
--       */
--      for (done = 0; done < height ; ) {
--	 struct radeon_dma_region region;
--	 int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
--	 int src_pitch;
--	 char *tex;
--
--         src_pitch = texImage->RowStride * texFormat->TexelBytes;
--
--	 tex = (char *)texImage->Data + done * src_pitch;
--
--	 memset(&region, 0, sizeof(region));
--	 radeonAllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
--
--	 /* Copy texdata to dma:
--	  */
--	 if (0)
--	    fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
--		    __FUNCTION__, src_pitch, dstPitch);
--
--	 if (src_pitch == dstPitch) {
--	    memcpy( region.address + region.start, tex, lines * src_pitch );
--	 } 
--	 else {
--	    char *buf = region.address + region.start;
--	    int i;
--	    for (i = 0 ; i < lines ; i++) {
--	       memcpy( buf, tex, src_pitch );
--	       buf += dstPitch;
--	       tex += src_pitch;
--	    }
--	 }
--
--	 radeonEmitWait( rmesa, RADEON_WAIT_3D );
--
--	 
--
--	 /* Blit to framebuffer
--	  */
--	 radeonEmitBlit( rmesa,
--		       blit_format,
--		       dstPitch, GET_START( &region ),
--		       dstPitch, t->bufAddr,
--		       0, 0,
--		       0, done,
--		       width, lines );
--	 
--	 radeonEmitWait( rmesa, RADEON_WAIT_2D );
--
--	 radeonReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
--	 done += lines;
--      }
--   }
--}
--
--
--/**
-- * Upload the texture image associated with texture \a t at the specified
-- * level at the address relative to \a start.
-- */
--static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, 
--			    GLint hwlevel,
--			    GLint x, GLint y, GLint width, GLint height,
--			    GLuint face )
--{
--   struct gl_texture_image *texImage = NULL;
--   GLuint offset;
--   GLint imageWidth, imageHeight;
--   GLint ret;
--   drm_radeon_texture_t tex;
--   drm_radeon_tex_image_t tmp;
--   const int level = hwlevel + t->base.firstLevel;
--
--   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
--      fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", 
--	       __FUNCTION__, (void *)t, (void *)t->base.tObj, level, width, height, face );
--   }
--
--   ASSERT(face < 6);
--
--   /* Ensure we have a valid texture to upload */
--   if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
--      _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
--      return;
--   }
--
--   texImage = t->base.tObj->Image[face][level];
--
--   if ( !texImage ) {
--      if ( RADEON_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
--      return;
--   }
--   if ( !texImage->Data ) {
--      if ( RADEON_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
--      return;
--   }
--
--
--   if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--      assert(level == 0);
--      assert(hwlevel == 0);
--      if ( RADEON_DEBUG & DEBUG_TEXTURE )
--	 fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
--      radeonUploadRectSubImage( rmesa, t, texImage, x, y, width, height );
--      return;
--   }
--
--   imageWidth = texImage->Width;
--   imageHeight = texImage->Height;
--
--   offset = t->bufAddr + t->base.totalSize * face / 6;
--
--   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
--      GLint imageX = 0;
--      GLint imageY = 0;
--      GLint blitX = t->image[face][hwlevel].x;
--      GLint blitY = t->image[face][hwlevel].y;
--      GLint blitWidth = t->image[face][hwlevel].width;
--      GLint blitHeight = t->image[face][hwlevel].height;
--      fprintf( stderr, "   upload image: %d,%d at %d,%d\n",
--	       imageWidth, imageHeight, imageX, imageY );
--      fprintf( stderr, "   upload  blit: %d,%d at %d,%d\n",
--	       blitWidth, blitHeight, blitX, blitY );
--      fprintf( stderr, "       blit ofs: 0x%07x level: %d/%d\n",
--	       (GLuint)offset, hwlevel, level );
--   }
--
--   t->image[face][hwlevel].data = texImage->Data;
--
--   /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
--    * NOTE: we're always use a 1KB-wide blit and I8 texture format.
--    * We used to use 1, 2 and 4-byte texels and used to use the texture
--    * width to dictate the blit width - but that won't work for compressed
--    * textures. (Brian)
--    * NOTE: can't do that with texture tiling. (sroland)
--    */
--   tex.offset = offset;
--   tex.image = &tmp;
--   /* copy (x,y,width,height,data) */
--   memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
--
--   if (texImage->TexFormat->TexelBytes) {
--      /* use multi-byte upload scheme */
--      tex.height = imageHeight;
--      tex.width = imageWidth;
--      tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK;
--      tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
--      tex.offset += tmp.x & ~1023;
--      tmp.x = tmp.x % 1024;
--      if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
--	 /* need something like "tiled coordinates" ? */
--	 tmp.y = tmp.x / (tex.pitch * 128) * 2;
--	 tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
--	 tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
--      }
--      else {
--	 tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
--      }
--      if ((t->tile_bits & RADEON_TXO_MACRO_TILE) &&
--	 (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) {
--	 /* radeon switches off macro tiling for small textures/mipmaps it seems */
--	 tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
--      }
--   }
--   else {
--      /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
--         needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
--      /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
--         so the kernel module reads the right amount of data. */
--      tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
--      tex.pitch = (BLIT_WIDTH_BYTES / 64);
--      tex.height = (imageHeight + 3) / 4;
--      tex.width = (imageWidth + 3) / 4;
--      switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) {
--      case RADEON_TXFORMAT_DXT1:
--         tex.width *= 8;
--         break;
--      case RADEON_TXFORMAT_DXT23:
--      case RADEON_TXFORMAT_DXT45:
--         tex.width *= 16;
--         break;
--      }
--   }
--
--   LOCK_HARDWARE( rmesa );
--   do {
--      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
--                                 &tex, sizeof(drm_radeon_texture_t) );
--   } while ( ret == -EAGAIN );
--
--   UNLOCK_HARDWARE( rmesa );
--
--   if ( ret ) {
--      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
--      fprintf( stderr, "   offset=0x%08x\n",
--	       offset );
--      fprintf( stderr, "   image width=%d height=%d\n",
--	       imageWidth, imageHeight );
--      fprintf( stderr, "    blit width=%d height=%d data=%p\n",
--	       t->image[face][hwlevel].width, t->image[face][hwlevel].height,
--	       t->image[face][hwlevel].data );
--      exit( 1 );
--   }
--}
--
--
--/**
-- * Upload the texture images associated with texture \a t.  This might
-- * require the allocation of texture memory.
-- * 
-- * \param rmesa Context pointer
-- * \param t Texture to be uploaded
-- * \param face Cube map face to be uploaded.  Zero for non-cube maps.
-- */
--
--int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint face )
--{
--   int numLevels;
--
--   if ( !t || t->base.totalSize == 0 || t->image_override )
--      return 0;
--
--   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
--      fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
--	       (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize,
--	       t->base.firstLevel, t->base.lastLevel );
--   }
--
--   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--   if (RADEON_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
--      radeonFinish( rmesa->glCtx );
--   }
--
--   LOCK_HARDWARE( rmesa );
--
--   if ( t->base.memBlock == NULL ) {
--      int heap;
--
--      heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
--				 (driTextureObject *) t );
--      if ( heap == -1 ) {
--	 UNLOCK_HARDWARE( rmesa );
--	 return -1;
--      }
--
--      /* Set the base offset of the texture image */
--      t->bufAddr = rmesa->radeonScreen->texOffset[heap] 
--	   + t->base.memBlock->ofs;
--      t->pp_txoffset = t->bufAddr;
--
--      if (!(t->base.tObj->Image[0][0]->IsClientData)) {
--	 /* hope it's safe to add that here... */
--	 t->pp_txoffset |= t->tile_bits;
--      }
--
--      /* Mark this texobj as dirty on all units:
--       */
--      t->dirty_state = TEX_ALL;
--   }
--
--
--   /* Let the world know we've used this memory recently.
--    */
--   driUpdateTextureLRU( (driTextureObject *) t );
--   UNLOCK_HARDWARE( rmesa );
--
--
--   /* Upload any images that are new */
--   if (t->base.dirty_images[face]) {
--      int i;
--      for ( i = 0 ; i < numLevels ; i++ ) {
--         if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
--            uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
--			    t->image[face][i].height, face );
--         }
--      }
--      t->base.dirty_images[face] = 0;
--   }
--
--   if (RADEON_DEBUG & DEBUG_SYNC) {
--      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
--      radeonFinish( rmesa->glCtx );
--   }
--
--   return 0;
--}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
-index 1e2f654..6a34f1e 100644
---- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
-@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/enums.h"
- 
- #include "radeon_context.h"
-+#include "radeon_mipmap_tree.h"
- #include "radeon_state.h"
- #include "radeon_ioctl.h"
- #include "radeon_swtcl.h"
-@@ -75,10 +76,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
- 			     && (tx_table[f].format != 0xffffffff) )
- 
--static const struct {
-+struct tx_table {
-    GLuint format, filter;
--}
--tx_table[] =
-+};
-+
-+static const struct tx_table tx_table[] =
- {
-    _ALPHA(RGBA8888),
-    _ALPHA_REV(RGBA8888),
-@@ -111,252 +113,6 @@ tx_table[] =
- #undef _ALPHA
- #undef _INVALID
- 
--/**
-- * This function computes the number of bytes of storage needed for
-- * the given texture object (all mipmap levels, all cube faces).
-- * The \c image[face][level].x/y/width/height parameters for upload/blitting
-- * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
-- * too.
-- * 
-- * \param rmesa Context pointer
-- * \param tObj GL texture object whose images are to be posted to
-- *                 hardware state.
-- */
--static void radeonSetTexImages( radeonContextPtr rmesa,
--				struct gl_texture_object *tObj )
--{
--   radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
--   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
--   GLint curOffset, blitWidth;
--   GLint i, texelBytes;
--   GLint numLevels;
--   GLint log2Width, log2Height, log2Depth;
--
--   /* Set the hardware texture format
--    */
--   if ( !t->image_override ) {
--      t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
--                          RADEON_TXFORMAT_ALPHA_IN_MAP);
--      t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
--
--      if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
--         t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
--         t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
--      }
--      else {
--         _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
--         return;
--      }
--   }
--
--   texelBytes = baseImage->TexFormat->TexelBytes;
--
--   /* Compute which mipmap levels we really want to send to the hardware.
--    */
--
--   if (tObj->Target != GL_TEXTURE_CUBE_MAP)
--      driCalculateTextureFirstLastLevel( (driTextureObject *) t );
--   else {
--      /* r100 can't handle mipmaps for cube/3d textures, so don't waste
--         memory for them */
--      t->base.firstLevel = t->base.lastLevel = tObj->BaseLevel;
--   }
--   log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
--   log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
--   log2Depth  = tObj->Image[0][t->base.firstLevel]->DepthLog2;
--
--   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
--
--   /* Calculate mipmap offsets and dimensions for blitting (uploading)
--    * The idea is that we lay out the mipmap levels within a block of
--    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
--    */
--   curOffset = 0;
--   blitWidth = BLIT_WIDTH_BYTES;
--   t->tile_bits = 0;
--
--   /* figure out if this texture is suitable for tiling. */
--   if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
--      if (rmesa->texmicrotile && (baseImage->Height > 1)) {
--	 /* allow 32 (bytes) x 1 mip (which will use two times the space
--	    the non-tiled version would use) max if base texture is large enough */
--	 if ((numLevels == 1) ||
--	   (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
--	       (baseImage->Width * texelBytes > 64)) ||
--	    ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
--	    /* R100 has two microtile bits (only the txoffset reg, not the blitter)
--	       weird: X2 + OPT: 32bit correct, 16bit completely hosed
--		      X2: 32bit correct, 16bit correct
--		      OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
--	    t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
--	 }
--      }
--      if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
--	 /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
--	    in the case if height is smaller than 16 (not 100% sure), as does the r200,
--	    so need to disable macro tiling in that case */
--	 if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
--	    t->tile_bits |= RADEON_TXO_MACRO_TILE;
--	 }
--      }
--   }
--
--   for (i = 0; i < numLevels; i++) {
--      const struct gl_texture_image *texImage;
--      GLuint size;
--
--      texImage = tObj->Image[0][i + t->base.firstLevel];
--      if ( !texImage )
--	 break;
--
--      /* find image size in bytes */
--      if (texImage->IsCompressed) {
--      /* need to calculate the size AFTER padding even though the texture is
--         submitted without padding.
--         Only handle pot textures currently - don't know if npot is even possible,
--         size calculation would certainly need (trivial) adjustments.
--         Align (and later pad) to 32byte, not sure what that 64byte blit width is
--         good for? */
--         if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) {
--            /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
--            if ((texImage->Width + 3) < 8) /* width one block */
--               size = texImage->CompressedSize * 4;
--            else if ((texImage->Width + 3) < 16)
--               size = texImage->CompressedSize * 2;
--            else size = texImage->CompressedSize;
--         }
--         else /* DXT3/5, 16 bytes per block */
--            if ((texImage->Width + 3) < 8)
--               size = texImage->CompressedSize * 2;
--            else size = texImage->CompressedSize;
--      }
--      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--	 size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
--      }
--      else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
--	 /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
--	    though the actual offset may be different (if texture is less than
--	    32 bytes width) to the untiled case */
--	 int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
--	 size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
--	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--      }
--      else {
--	 int w = (texImage->Width * texelBytes + 31) & ~31;
--	 size = w * texImage->Height * texImage->Depth;
--	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--      }
--      assert(size > 0);
--
--      /* Align to 32-byte offset.  It is faster to do this unconditionally
--       * (no branch penalty).
--       */
--
--      curOffset = (curOffset + 0x1f) & ~0x1f;
--
--      if (texelBytes) {
--	 t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
--	 t->image[0][i].y = 0;
--	 t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
--	 t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
--      }
--      else {
--         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
--         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
--         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
--         t->image[0][i].height = size / t->image[0][i].width;     
--      }
--
--#if 0
--      /* for debugging only and only  applicable to non-rectangle targets */
--      assert(size % t->image[0][i].width == 0);
--      assert(t->image[0][i].x == 0
--             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
--#endif
--
--      if (0)
--         fprintf(stderr,
--                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
--                 i, texImage->Width, texImage->Height,
--                 t->image[0][i].x, t->image[0][i].y,
--                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
--
--      curOffset += size;
--
--   }
--
--   /* Align the total size of texture memory block.
--    */
--   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
--
--   /* Setup remaining cube face blits, if needed */
--   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--      const GLuint faceSize = t->base.totalSize;
--      GLuint face;
--      /* reuse face 0 x/y/width/height - just update the offset when uploading */
--      for (face = 1; face < 6; face++) {
--         for (i = 0; i < numLevels; i++) {
--            t->image[face][i].x =  t->image[0][i].x;
--            t->image[face][i].y =  t->image[0][i].y;
--            t->image[face][i].width  = t->image[0][i].width;
--            t->image[face][i].height = t->image[0][i].height;
--         }
--      }
--      t->base.totalSize = 6 * faceSize; /* total texmem needed */
--   }
--
--   /* Hardware state:
--    */
--   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
--   t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT;
--
--   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
--		       RADEON_TXFORMAT_HEIGHT_MASK |
--                       RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
--                       RADEON_TXFORMAT_F5_WIDTH_MASK |
--                       RADEON_TXFORMAT_F5_HEIGHT_MASK);
--   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
--		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
--
--   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--      assert(log2Width == log2Height);
--      t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
--                         (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
--                         (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
--      t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
--                           (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
--                           (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
--                           (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
--                           (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
--                           (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
--                           (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
--                           (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
--   }
--
--   t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
--                   ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
--
--   /* Only need to round to nearest 32 for textures, but the blitter
--    * requires 64-byte aligned pitches, and we may/may not need the
--    * blitter.   NPOT only!
--    */
--   if ( !t->image_override ) {
--      if (baseImage->IsCompressed)
--         t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
--      else
--         t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
--      t->pp_txpitch -= 32;
--   }
--
--   t->dirty_state = TEX_ALL;
--
--   /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */
--}
--
--
--
- /* ================================================================
-  * Texture combine functions
-  */
-@@ -503,7 +259,7 @@ do {							\
- 
- static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-    GLuint color_combine, alpha_combine;
-    const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
-@@ -846,22 +602,21 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
- void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
-                         unsigned long long offset, GLint depth, GLuint pitch)
- {
--	radeonContextPtr rmesa = pDRICtx->driverPrivate;
-+	r100ContextPtr rmesa = pDRICtx->driverPrivate;
- 	struct gl_texture_object *tObj =
--	    _mesa_lookup_texture(rmesa->glCtx, texname);
--	radeonTexObjPtr t;
-+	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
-+	radeonTexObjPtr t = radeon_tex_obj(tObj);
- 
- 	if (tObj == NULL)
- 		return;
- 
--	t = (radeonTexObjPtr) tObj->DriverData;
--
- 	t->image_override = GL_TRUE;
- 
- 	if (!offset)
- 		return;
--
--	t->pp_txoffset = offset;
-+	
-+	t->bo = NULL;
-+	t->override_offset = offset;
- 	t->pp_txpitch = pitch - 32;
- 
- 	switch (depth) {
-@@ -901,12 +656,58 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
-                               RADEON_TXFORMAT_NON_POWER2)
- 
- 
--static void import_tex_obj_state( radeonContextPtr rmesa,
-+static void disable_tex_obj_state( r100ContextPtr rmesa, 
-+				   int unit )
-+{
-+   /* do not use RADEON_DB_STATE to avoid stale texture caches */
-+   uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
-+   GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
-+   GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
-+
-+   RADEON_STATECHANGE( rmesa, tex[unit] );
-+
-+   RADEON_STATECHANGE( rmesa, tcl );
-+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
-+					     RADEON_Q_BIT(unit));
-+   
-+   if (rmesa->radeon.TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
-+     TCL_FALLBACK( rmesa->radeon.glCtx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
-+     rmesa->recheck_texgen[unit] = GL_TRUE;
-+   }
-+
-+   if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
-+     /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
-+	cubic_map bit on unit 2 when the unit is disabled, otherwise every
-+	2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
-+	units, better be safe than sorry though).*/
-+     RADEON_STATECHANGE( rmesa, tex[unit] );
-+     rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
-+   }
-+
-+   {
-+      GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
-+      GLuint tmp = rmesa->TexGenEnabled;
-+
-+      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
-+      rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
-+      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
-+      rmesa->TexGenNeedNormals[unit] = 0;
-+      rmesa->TexGenEnabled |= 
-+	(RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
-+
-+      if (tmp != rmesa->TexGenEnabled) {
-+	rmesa->recheck_texgen[unit] = GL_TRUE;
-+	rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
-+      }
-+   }
-+}
-+
-+static void import_tex_obj_state( r100ContextPtr rmesa,
- 				  int unit,
- 				  radeonTexObjPtr texobj )
- {
- /* do not use RADEON_DB_STATE to avoid stale texture caches */
--   int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
-+   uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
-    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
- 
-    RADEON_STATECHANGE( rmesa, tex[unit] );
-@@ -915,10 +716,9 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
-    cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
-    cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
-    cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
--   cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
-    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
- 
--   if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-+   if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
-       GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
-       txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
-       txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
-@@ -928,22 +728,12 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
-    else {
-       se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit);
- 
--      if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
--	 int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
--	 GLuint bytesPerFace = texobj->base.totalSize / 6;
--	 ASSERT(texobj->base.totalSize % 6 == 0);
-+      if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
-+	 uint32_t *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
- 
- 	 RADEON_STATECHANGE( rmesa, cube[unit] );
- 	 cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
--	 /* dont know if this setup conforms to OpenGL.. 
--	  * at least it matches the behavior of mesa software renderer
--	  */
--	 cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */
--	 cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */
--	 cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */
--	 cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */
--	 cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */
--	 cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */
-+	 /* state filled out in the cube_emit */
-       }
-    }
- 
-@@ -952,13 +742,11 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
-       rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
-    }
- 
--   texobj->dirty_state &= ~(1<<unit);
-+   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
- }
- 
- 
--
--
--static void set_texgen_matrix( radeonContextPtr rmesa, 
-+static void set_texgen_matrix( r100ContextPtr rmesa, 
- 			       GLuint unit,
- 			       const GLfloat *s_plane,
- 			       const GLfloat *t_plane,
-@@ -986,14 +774,14 @@ static void set_texgen_matrix( radeonContextPtr rmesa,
-    rmesa->TexGenMatrix[unit].m[15] = q_plane[3];
- 
-    rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit;
--   rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-+   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
- }
- 
- /* Returns GL_FALSE if fallback required.
-  */
- static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-    GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
-    GLuint tmp = rmesa->TexGenEnabled;
-@@ -1094,283 +882,185 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
-    }
- 
-    if (tmp != rmesa->TexGenEnabled) {
--      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
-    }
- 
-    return GL_TRUE;
- }
- 
--
--static void disable_tex( GLcontext *ctx, int unit )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--
--   if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) {
--      /* Texture unit disabled */
--      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
--	 /* The old texture is no longer bound to this texture unit.
--	  * Mark it as such.
--	  */
--
--	 rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
--	 rmesa->state.texture.unit[unit].texobj = NULL;
--      }
--
--      RADEON_STATECHANGE( rmesa, ctx );
--      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= 
--	  ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit);
--
--      RADEON_STATECHANGE( rmesa, tcl );
--      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
--						RADEON_Q_BIT(unit));
--
--      if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
--	 TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
--	 rmesa->recheck_texgen[unit] = GL_TRUE;
--      }
--
--      if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
--      /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
--         cubic_map bit on unit 2 when the unit is disabled, otherwise every
--	 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
--	 units, better be safe than sorry though).*/
--	 RADEON_STATECHANGE( rmesa, tex[unit] );
--	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
--      }
--
--      {
--	 GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
--	 GLuint tmp = rmesa->TexGenEnabled;
--
--	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
--	 rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
--	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
--	 rmesa->TexGenNeedNormals[unit] = 0;
--	 rmesa->TexGenEnabled |= 
--	     (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
--
--	 if (tmp != rmesa->TexGenEnabled) {
--	    rmesa->recheck_texgen[unit] = GL_TRUE;
--	    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
--	 }
--      }
--   }
--}
--
--static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
--
--   /* Need to load the 2d images associated with this unit.
--    */
--   if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
--      t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
--      t->base.dirty_images[0] = ~0;
--   }
--
--   ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
--
--   if ( t->base.dirty_images[0] ) {
--      RADEON_FIREVERTICES( rmesa );
--      radeonSetTexImages( rmesa, tObj );
--      radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
--      if ( !t->base.memBlock && !t->image_override ) 
--	return GL_FALSE;
--   }
--
--   return GL_TRUE;
--}
--
--static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
-+/**
-+ * Compute the cached hardware register values for the given texture object.
-+ *
-+ * \param rmesa Context pointer
-+ * \param t the r300 texture object
-+ */
-+static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
--   GLuint face;
--
--   /* Need to load the 2d images associated with this unit.
--    */
--   if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
--      t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
--      for (face = 0; face < 6; face++)
--         t->base.dirty_images[face] = ~0;
--   }
-+   const struct gl_texture_image *firstImage;
-+   GLint log2Width, log2Height, log2Depth, texelBytes;
- 
--   ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
-+   firstImage = t->base.Image[0][t->mt->firstLevel];   
- 
--   if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
--        t->base.dirty_images[2] || t->base.dirty_images[3] ||
--        t->base.dirty_images[4] || t->base.dirty_images[5] ) {
--      /* flush */
--      RADEON_FIREVERTICES( rmesa );
--      /* layout memory space, once for all faces */
--      radeonSetTexImages( rmesa, tObj );
-+   if (firstImage->Border > 0) {
-+      fprintf(stderr, "%s: border\n", __FUNCTION__);
-+      return GL_FALSE;
-    }
- 
--   /* upload (per face) */
--   for (face = 0; face < 6; face++) {
--      if (t->base.dirty_images[face]) {
--         radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face );
-+   log2Width  = firstImage->WidthLog2;
-+   log2Height = firstImage->HeightLog2;
-+   log2Depth  = firstImage->DepthLog2;
-+   texelBytes = firstImage->TexFormat->TexelBytes;
-+
-+   if (!t->image_override) {
-+      if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
-+	const struct tx_table *table = tx_table;
-+
-+	 t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
-+			     RADEON_TXFORMAT_ALPHA_IN_MAP);
-+	 t->pp_txfilter &= ~RADEON_YUV_TO_RGB;	 
-+	 
-+	 t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
-+	 t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
-+      } else {
-+	 _mesa_problem(NULL, "unexpected texture format in %s",
-+		       __FUNCTION__);
-+	 return GL_FALSE;
-       }
-    }
--      
--   if ( !t->base.memBlock ) {
--      /* texmem alloc failed, use s/w fallback */
--      return GL_FALSE;
-+   
-+   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
-+   t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << RADEON_MAX_MIP_LEVEL_SHIFT;
-+	
-+   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
-+		       RADEON_TXFORMAT_HEIGHT_MASK |
-+		       RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
-+		       RADEON_TXFORMAT_F5_WIDTH_MASK |
-+		       RADEON_TXFORMAT_F5_HEIGHT_MASK);
-+   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
-+		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
-+   
-+   t->tile_bits = 0;
-+   
-+   if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
-+      ASSERT(log2Width == log2Height);
-+      t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
-+			 (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
-+			 /* don't think we need this bit, if it exists at all - fglrx does not set it */
-+			 (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
-+      t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
-+                           (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
-+                           (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
-+                           (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
-+                           (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
-+                           (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
-+                           (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
-+                           (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
-    }
- 
--   return GL_TRUE;
--}
-+   t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT)
-+		   | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT));
- 
--static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
--{
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
--
--   if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) {
--      t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
--      t->base.dirty_images[0] = ~0;
-+   if ( !t->image_override ) {
-+      if (firstImage->IsCompressed)
-+         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
-+      else
-+         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
-+      t->pp_txpitch -= 32;
-    }
- 
--   ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
--
--   if ( t->base.dirty_images[0] ) {
--      RADEON_FIREVERTICES( rmesa );
--      radeonSetTexImages( rmesa, tObj );
--      radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
--      if ( !t->base.memBlock &&
--           !t->image_override /* && !rmesa->prefer_gart_client_texturing  FIXME */ ) {
--	 fprintf(stderr, "%s: upload failed\n", __FUNCTION__);
--	 return GL_FALSE;
--      }
-+   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
-+      t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
-    }
- 
-    return GL_TRUE;
- }
- 
--
--static GLboolean update_tex_common( GLcontext *ctx, int unit )
-+static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
--   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--   struct gl_texture_object *tObj = texUnit->_Current;
--   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
--   GLenum format;
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-+   radeonTexObj *t = radeon_tex_obj(texObj);
-+   int ret;
- 
--   /* Fallback if there's a texture border */
--   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
--      fprintf(stderr, "%s: border\n", __FUNCTION__);
-+   if (!radeon_validate_texture_miptree(ctx, texObj))
-       return GL_FALSE;
--   }
-+
-+   ret = setup_hardware_state(rmesa, t, unit);
-+   if (ret == GL_FALSE)
-+     return GL_FALSE;
-+
-    /* yuv conversion only works in first unit */
-    if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB))
-       return GL_FALSE;
- 
--   /* Update state if this is a different texture object to last
--    * time.
--    */
--   if ( rmesa->state.texture.unit[unit].texobj != t ) {
--      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
--	 /* The old texture is no longer bound to this texture unit.
--	  * Mark it as such.
--	  */
--
--	 rmesa->state.texture.unit[unit].texobj->base.bound &= 
--	     ~(1UL << unit);
--      }
-+   RADEON_STATECHANGE( rmesa, ctx );
-+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
-+     (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
- 
--      rmesa->state.texture.unit[unit].texobj = t;
--      t->base.bound |= (1UL << unit);
--      t->dirty_state |= 1<<unit;
--      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
--   }
-+   RADEON_STATECHANGE( rmesa, tcl );
-+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
- 
-+   rmesa->recheck_texgen[unit] = GL_TRUE;
- 
--   /* Newly enabled?
--    */
--   if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) {
--      RADEON_STATECHANGE( rmesa, ctx );
--      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
--	  (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
--
--      RADEON_STATECHANGE( rmesa, tcl );
--
--      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
--
--      rmesa->recheck_texgen[unit] = GL_TRUE;
--   }
--
--   if (t->dirty_state & (1<<unit)) {
--      import_tex_obj_state( rmesa, unit, t );
--      /* may need to update texture matrix (for texrect adjustments) */
--      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
--   }
-+   import_tex_obj_state( rmesa, unit, t );
- 
-    if (rmesa->recheck_texgen[unit]) {
-       GLboolean fallback = !radeon_validate_texgen( ctx, unit );
-       TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
-       rmesa->recheck_texgen[unit] = 0;
--      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
-    }
- 
--   format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
--   if ( rmesa->state.texture.unit[unit].format != format ||
--	rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
--      rmesa->state.texture.unit[unit].format = format;
--      rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
--      if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
--	 return GL_FALSE;
--      }
-+   if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
-+     return GL_FALSE;
-    }
--
-    FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
-+
-+   t->validated = GL_TRUE;
-    return !t->border_fallback;
- }
- 
--
--
- static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
- {
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- 
--   if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
--      return (enable_tex_rect( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--   else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
--      return (enable_tex_2d( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
--   }
--   else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
--      return (enable_tex_cube( ctx, unit ) &&
--	      update_tex_common( ctx, unit ));
-+
-+   if (ctx->Texture.Unit[unit]._ReallyEnabled & TEXTURE_3D_BIT) {
-+     return GL_FALSE;
-    }
--   else if ( texUnit->_ReallyEnabled ) {
--      return GL_FALSE;
-+
-+   if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
-+     /* disable the unit */
-+     disable_tex_obj_state(rmesa, unit);
-+     return GL_TRUE;
-    }
--   else {
--      disable_tex( ctx, unit );
--      return GL_TRUE;
-+
-+   if (!radeon_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
-+    _mesa_warning(ctx,
-+		  "failed to validate texture for unit %d.\n",
-+		  unit);
-+    rmesa->state.texture.unit[unit].texobj = NULL;
-+    return GL_FALSE;
-    }
-+   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
-+   return GL_TRUE;
- }
- 
- void radeonUpdateTextureState( GLcontext *ctx )
- {
--   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-    GLboolean ok;
- 
-+   /* set the ctx all textures off */
-+   RADEON_STATECHANGE( rmesa, ctx );
-+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((RADEON_TEX_ENABLE_MASK) | (RADEON_TEX_BLEND_ENABLE_MASK));
-+
-    ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
- 	 radeonUpdateTextureUnit( ctx, 1 ) &&
- 	 radeonUpdateTextureUnit( ctx, 2 ));
- 
-    FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
- 
--   if (rmesa->TclFallback)
-+   if (rmesa->radeon.TclFallback)
-       radeonChooseVertexState( ctx );
- }
 diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
 new file mode 100644
 index 0000000..63680b4
@@ -34369,17 +6081,6 @@ index 0000000..d90fda7
 +				 struct gl_texture_image *texImage);
 +
 +#endif
-diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
-index 596a8aa..0df634b 100644
---- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h
-+++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
-@@ -2031,6 +2031,9 @@
- #define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
- #define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
- #define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
-+#define R200_CP_CMD_3D_DRAW_VBUF_2      0xC0003400
-+#define R200_CP_CMD_3D_DRAW_IMMD_2      0xC0003500
-+#define R200_CP_CMD_3D_DRAW_INDX_2      0xC0003600
- #define RADEON_CP_PACKET3_CNTL_PAINT                0xC0009100
- #define RADEON_CP_PACKET3_CNTL_BITBLT               0xC0009200
- #define RADEON_CP_PACKET3_CNTL_SMALLTEXT            0xC0009300
+-- 
+1.6.0.3
+
author	Dave Airlie <airlied@fedoraproject.org>	2009-03-04 07:00:36 +0000
committer	Dave Airlie <airlied@fedoraproject.org>	2009-03-04 07:00:36 +0000
commit	16d2d54dc632d4106d24c81ac8c89ecaa4550b11 (patch)
tree	15c183204f198ab1d5800489fafda62ece7f7d6f
parent	a5cda00fd401899ec0d38e3519a8f1ad9cd3e1d3 (diff)
download	mesa-16d2d54dc632d4106d24c81ac8c89ecaa4550b11.tar.gz mesa-16d2d54dc632d4106d24c81ac8c89ecaa4550b11.tar.xz mesa-16d2d54dc632d4106d24c81ac8c89ecaa4550b11.zip