summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@fedoraproject.org>2009-03-05 08:19:34 +0000
committerDave Airlie <airlied@fedoraproject.org>2009-03-05 08:19:34 +0000
commit9eb6e52b8fa2772e97047f4fc9942634373b9ab1 (patch)
tree7659e15eba04277105775e7cc1d7a84aa8f396d9
parent16d2d54dc632d4106d24c81ac8c89ecaa4550b11 (diff)
downloadmesa-9eb6e52b8fa2772e97047f4fc9942634373b9ab1.tar.gz
mesa-9eb6e52b8fa2772e97047f4fc9942634373b9ab1.tar.xz
mesa-9eb6e52b8fa2772e97047f4fc9942634373b9ab1.zip
- radeon-rewrite.patch: fixup link against libdrm_radeon
-rw-r--r--mesa.spec5
-rw-r--r--radeon-rewrite.patch29657
2 files changed, 29616 insertions, 46 deletions
diff --git a/mesa.spec b/mesa.spec
index af182f9..b559ef5 100644
--- a/mesa.spec
+++ b/mesa.spec
@@ -20,7 +20,7 @@
Summary: Mesa graphics libraries
Name: mesa
Version: 7.3
-Release: 9%{?dist}
+Release: 10%{?dist}
License: MIT
Group: System Environment/Libraries
URL: http://www.mesa3d.org
@@ -427,6 +427,9 @@ rm -rf $RPM_BUILD_ROOT
%{_libdir}/mesa-demos-data
%changelog
+* Thu Mar 05 2009 Dave Airlie <airlied@redhat.com> 7.3-10
+- radeon-rewrite.patch: fixup link against libdrm_radeon
+
* Wed Mar 04 2009 Dave Airlie <airlied@redhat.com> 7.3-9
- try again: pull in 7.4 fixes, dri configs changes, new radeon-rewrite
diff --git a/radeon-rewrite.patch b/radeon-rewrite.patch
index 0edb095..1eb201c 100644
--- a/radeon-rewrite.patch
+++ b/radeon-rewrite.patch
@@ -1,47 +1,19513 @@
-From c4030c794274b22ba6ccb7c919900b41f5c723f2 Mon Sep 17 00:00:00 2001
-From: Dave Airlie <airlied@redhat.com>
-Date: Wed, 4 Mar 2009 16:51:14 +1000
-Subject: [PATCH] radeon/r100/r200: import latest merge
+commit 263b887d85e3eac9a32673c8ed3004c3129ce997
+Author: Dave Airlie <airlied@redhat.com>
+Date: Sun Feb 15 17:03:47 2009 +1000
----
- src/mesa/drivers/dri/radeon/radeon_bo_drm.h | 182 ++++
- src/mesa/drivers/dri/radeon/radeon_bo_legacy.c | 825 +++++++++++++++++
- src/mesa/drivers/dri/radeon/radeon_bo_legacy.h | 47 +
- src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h | 67 ++
- src/mesa/drivers/dri/radeon/radeon_cmdbuf.h | 143 +++
- src/mesa/drivers/dri/radeon/radeon_common.c | 849 +++++++++++++++++
- src/mesa/drivers/dri/radeon/radeon_common.h | 55 ++
- .../drivers/dri/radeon/radeon_common_context.c | 589 ++++++++++++
- .../drivers/dri/radeon/radeon_common_context.h | 508 ++++++++++
- src/mesa/drivers/dri/radeon/radeon_cs_drm.h | 207 +++++
- src/mesa/drivers/dri/radeon/radeon_cs_legacy.c | 504 ++++++++++
- src/mesa/drivers/dri/radeon/radeon_cs_legacy.h | 40 +
- src/mesa/drivers/dri/radeon/radeon_dma.c | 323 +++++++
- src/mesa/drivers/dri/radeon/radeon_dma.h | 51 +
- src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c | 360 ++++++++
- src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h | 97 ++
- src/mesa/drivers/dri/radeon/radeon_texture.c | 966 ++++++++++++++++++++
- src/mesa/drivers/dri/radeon/radeon_texture.h | 118 +++
- 18 files changed, 5931 insertions(+), 0 deletions(-)
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_bo_drm.h
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_common.c
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_common.h
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_common_context.c
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_common_context.h
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_cs_drm.h
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_dma.c
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_dma.h
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_texture.c
- create mode 100644 src/mesa/drivers/dri/radeon/radeon_texture.h
+ radeon: add cflags to decide whether to link libdrm_radeon or not.
+
+ You don't need libdrm_radeon for the legacy driver to build,
+ only for the experimental mm/cs paths.
+commit 61e6b2aee3069700db397f26d7ae1384641367ff
+Author: Ian Romanick <idr@freedesktop.org>
+Date: Fri Jan 9 15:43:17 2009 -0800
+
+ Track two sets of back-face stencil state
+
+ Track separate back-face stencil state for OpenGL 2.0 /
+ GL_ATI_separate_stencil and GL_EXT_stencil_two_side. This allows all
+ three to be enabled in a driver. One set of state is set via the 2.0
+ or ATI functions and is used when STENCIL_TEST_TWO_SIDE_EXT is
+ disabled. The other is set by StencilFunc and StencilOp when the
+ active stencil face is set to BACK. The GL_EXT_stencil_two_side spec has
+ more details.
+
+ http://opengl.org/registry/specs/EXT/stencil_two_side.txt
+
+commit 86691da4b5f43be625ec510b7fe40657b9985783
+Author: Dave Airlie <airlied@redhat.com>
+Date: Wed Mar 4 16:51:14 2009 +1000
+
+ radeon/r100/r200: import latest merge
+diff --git a/configs/autoconf.in b/configs/autoconf.in
+index 4a89716..f18d119 100644
+--- a/configs/autoconf.in
++++ b/configs/autoconf.in
+@@ -20,6 +20,8 @@ CXXFLAGS = @CPPFLAGS@ @CXXFLAGS@ \
+ $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES)
+ LDFLAGS = @LDFLAGS@
+ EXTRA_LIB_PATH = @EXTRA_LIB_PATH@
++RADEON_CFLAGS = @RADEON_CFLAGS@
++RADEON_LDFLAGS = @RADEON_LDFLAGS@
+
+ # Assembler
+ ASM_SOURCES = @ASM_SOURCES@
+diff --git a/configure.ac b/configure.ac
+index 73caf00..48f4eac 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -413,6 +413,8 @@ AC_SUBST([SRC_DIRS])
+ AC_SUBST([GLU_DIRS])
+ AC_SUBST([DRIVER_DIRS])
+ AC_SUBST([WINDOW_SYSTEM])
++AC_SUBST([RADEON_CFLAGS])
++AC_SUBST([RADEON_LDFLAGS])
+
+ dnl
+ dnl User supplied program configuration
+@@ -540,6 +542,13 @@ dri)
+ GL_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED dri2proto >= $DRI2PROTO_REQUIRED"
+ DRI_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED"
+
++ PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon], HAVE_LIBDRM_RADEON=yes, HAVE_LIBDRM_RADEON=no)
++
++ if test "$HAVE_LIBDRM_RADEON" = yes; then
++ RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS"
++ RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS
++ fi
++
+ # find the DRI deps for libGL
+ if test "$x11_pkgconfig" = yes; then
+ # add xcb modules if necessary
+diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
+index fa8121e..abae4b3 100644
+--- a/src/mesa/drivers/dri/i965/brw_cc.c
++++ b/src/mesa/drivers/dri/i965/brw_cc.c
+@@ -84,6 +84,7 @@ static void
+ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+ {
+ struct gl_stencil_attrib *stencil = brw->attribs.Stencil;
++ const unsigned back = stencil->_BackFace;
+
+ memset(key, 0, sizeof(*key));
+
+@@ -100,13 +101,13 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+ key->stencil_test_mask[0] = stencil->ValueMask[0];
+ }
+ if (key->stencil_two_side) {
+- key->stencil_func[1] = stencil->Function[1];
+- key->stencil_fail_op[1] = stencil->FailFunc[1];
+- key->stencil_pass_depth_fail_op[1] = stencil->ZFailFunc[1];
+- key->stencil_pass_depth_pass_op[1] = stencil->ZPassFunc[1];
+- key->stencil_ref[1] = stencil->Ref[1];
+- key->stencil_write_mask[1] = stencil->WriteMask[1];
+- key->stencil_test_mask[1] = stencil->ValueMask[1];
++ key->stencil_func[1] = stencil->Function[back];
++ key->stencil_fail_op[1] = stencil->FailFunc[back];
++ key->stencil_pass_depth_fail_op[1] = stencil->ZFailFunc[back];
++ key->stencil_pass_depth_pass_op[1] = stencil->ZPassFunc[back];
++ key->stencil_ref[1] = stencil->Ref[back];
++ key->stencil_write_mask[1] = stencil->WriteMask[back];
++ key->stencil_test_mask[1] = stencil->ValueMask[back];
+ }
+
+ if (brw->attribs.Color->_LogicOpEnabled)
+diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
+index c50b0d2..24149cf 100644
+--- a/src/mesa/drivers/dri/i965/brw_wm.c
++++ b/src/mesa/drivers/dri/i965/brw_wm.c
+@@ -189,8 +189,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
+ lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+ if (brw->attribs.Stencil->WriteMask[0] ||
+- (brw->attribs.Stencil->_TestTwoSide &&
+- brw->attribs.Stencil->WriteMask[1]))
++ brw->attribs.Stencil->WriteMask[brw->attribs.Stencil->_BackFace])
+ lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+ }
+
+diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
+index e9144ac..e593ed9 100644
+--- a/src/mesa/drivers/dri/r200/Makefile
++++ b/src/mesa/drivers/dri/r200/Makefile
+@@ -3,6 +3,8 @@
+ TOP = ../../../../..
+ include $(TOP)/configs/current
+
++CFLAGS += $(RADEON_CFLAGS)
++
+ LIBNAME = r200_dri.so
+
+ MINIGLX_SOURCES = server/radeon_dri.c
+@@ -11,25 +13,35 @@ ifeq ($(USING_EGL), 1)
+ EGL_SOURCES = server/radeon_egl.c
+ endif
+
++RADEON_COMMON_SOURCES = \
++ radeon_texture.c \
++ radeon_common_context.c \
++ radeon_common.c \
++ radeon_dma.c \
++ radeon_lock.c \
++ radeon_bo_legacy.c \
++ radeon_cs_legacy.c \
++ radeon_mipmap_tree.c \
++ radeon_span.c
++
++
+ DRIVER_SOURCES = r200_context.c \
+ r200_ioctl.c \
+- r200_lock.c \
+ r200_state.c \
+ r200_state_init.c \
+ r200_cmdbuf.c \
+ r200_pixel.c \
+ r200_tex.c \
+- r200_texmem.c \
+ r200_texstate.c \
+ r200_tcl.c \
+ r200_swtcl.c \
+- r200_span.c \
+ r200_maos.c \
+ r200_sanity.c \
+ r200_fragshader.c \
+ r200_vertprog.c \
+ radeon_screen.c \
+- $(EGL_SOURCES)
++ $(EGL_SOURCES) \
++ $(RADEON_COMMON_SOURCES)
+
+ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
+
+@@ -48,7 +60,29 @@ SYMLINKS = \
+ COMMON_SYMLINKS = \
+ radeon_chipset.h \
+ radeon_screen.c \
+- radeon_screen.h
++ radeon_screen.h \
++ radeon_bo_legacy.c \
++ radeon_cs_legacy.c \
++ radeon_bo_legacy.h \
++ radeon_cs_legacy.h \
++ radeon_bocs_wrapper.h \
++ radeon_span.h \
++ radeon_span.c \
++ radeon_lock.c \
++ radeon_lock.h \
++ radeon_common.c \
++ radeon_common_context.c \
++ radeon_common_context.h \
++ radeon_common.h \
++ radeon_cmdbuf.h \
++ radeon_mipmap_tree.c \
++ radeon_mipmap_tree.h \
++ radeon_texture.c \
++ radeon_texture.h \
++ radeon_dma.c \
++ radeon_dma.h
++
++DRI_LIB_DEPS += $(RADEON_LDFLAGS)
+
+ ##### TARGETS #####
+
+diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
+index e163377..ae31bcb 100644
+--- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c
++++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
+@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "swrast/swrast.h"
+ #include "main/simple_list.h"
+
++#include "radeon_common.h"
+ #include "r200_context.h"
+ #include "r200_state.h"
+ #include "r200_ioctl.h"
+@@ -45,18 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r200_sanity.h"
+ #include "radeon_reg.h"
+
+-static void print_state_atom( struct r200_state_atom *state )
+-{
+- int i;
+-
+- fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
+-
+- if (0 & R200_DEBUG & DEBUG_VERBOSE)
+- for (i = 0 ; i < state->cmd_size ; i++)
+- fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
+-
+-}
+-
+ /* The state atoms will be emitted in the order they appear in the atom list,
+ * so this step is important.
+ */
+@@ -64,141 +53,56 @@ void r200SetUpAtomList( r200ContextPtr rmesa )
+ {
+ int i, mtu;
+
+- mtu = rmesa->glCtx->Const.MaxTextureUnits;
+-
+- make_empty_list(&rmesa->hw.atomlist);
+- rmesa->hw.atomlist.name = "atom-list";
+-
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf );
++ mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
++
++ make_empty_list(&rmesa->radeon.hw.atomlist);
++ rmesa->radeon.hw.atomlist.name = "atom-list";
++
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.set );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf );
+ for (i = 0; i < mtu; ++i)
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] );
+ for (i = 0; i < mtu; ++i)
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] );
+ for (i = 0; i < 6; ++i)
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] );
+ for (i = 0; i < 8; ++i)
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] );
+ for (i = 0; i < 3 + mtu; ++i)
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt );
+ for (i = 0; i < 2; ++i)
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] );
+ for (i = 0; i < 6; ++i)
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.spr );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ptp );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.prf );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pvs );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[0] );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[1] );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[0] );
+- insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[1] );
+-}
+-
+-static void r200SaveHwState( r200ContextPtr rmesa )
+-{
+- struct r200_state_atom *atom;
+- char * dest = rmesa->backup_store.cmd_buf;
+-
+- if (R200_DEBUG & DEBUG_STATE)
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- rmesa->backup_store.cmd_used = 0;
+-
+- foreach( atom, &rmesa->hw.atomlist ) {
+- if ( atom->check( rmesa->glCtx, atom->idx ) ) {
+- int size = atom->cmd_size * 4;
+- memcpy( dest, atom->cmd, size);
+- dest += size;
+- rmesa->backup_store.cmd_used += size;
+- if (R200_DEBUG & DEBUG_STATE)
+- print_state_atom( atom );
+- }
+- }
+-
+- assert( rmesa->backup_store.cmd_used <= R200_CMD_BUF_SZ );
+- if (R200_DEBUG & DEBUG_STATE)
+- fprintf(stderr, "Returning to r200EmitState\n");
+-}
+-
+-void r200EmitState( r200ContextPtr rmesa )
+-{
+- char *dest;
+- int mtu;
+- struct r200_state_atom *atom;
+-
+- if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- if (rmesa->save_on_next_emit) {
+- r200SaveHwState(rmesa);
+- rmesa->save_on_next_emit = GL_FALSE;
+- }
+-
+- if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
+- return;
+-
+- mtu = rmesa->glCtx->Const.MaxTextureUnits;
+-
+- /* To avoid going across the entire set of states multiple times, just check
+- * for enough space for the case of emitting all state, and inline the
+- * r200AllocCmdBuf code here without all the checks.
+- */
+- r200EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size );
+-
+- /* we need to calculate dest after EnsureCmdBufSpace
+- as we may flush the buffer - airlied */
+- dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+- if (R200_DEBUG & DEBUG_STATE) {
+- foreach( atom, &rmesa->hw.atomlist ) {
+- if ( atom->dirty || rmesa->hw.all_dirty ) {
+- if ( atom->check( rmesa->glCtx, atom->idx ) )
+- print_state_atom( atom );
+- else
+- fprintf(stderr, "skip state %s\n", atom->name);
+- }
+- }
+- }
+-
+- foreach( atom, &rmesa->hw.atomlist ) {
+- if ( rmesa->hw.all_dirty )
+- atom->dirty = GL_TRUE;
+- if ( atom->dirty ) {
+- if ( atom->check( rmesa->glCtx, atom->idx ) ) {
+- int size = atom->cmd_size * 4;
+- memcpy( dest, atom->cmd, size);
+- dest += size;
+- rmesa->store.cmd_used += size;
+- atom->dirty = GL_FALSE;
+- }
+- }
+- }
+-
+- assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
+-
+- rmesa->hw.is_dirty = GL_FALSE;
+- rmesa->hw.all_dirty = GL_FALSE;
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] );
++ insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] );
+ }
+
+ /* Fire a section of the retained (indexed_verts) buffer as a regular
+@@ -209,50 +113,81 @@ void r200EmitVbufPrim( r200ContextPtr rmesa,
+ GLuint vertex_nr )
+ {
+ drm_radeon_cmd_header_t *cmd;
++ BATCH_LOCALS(&rmesa->radeon);
+
+ assert(!(primitive & R200_VF_PRIM_WALK_IND));
+
+- r200EmitState( rmesa );
++ radeonEmitState(&rmesa->radeon);
+
+ if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
+ fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
+ rmesa->store.cmd_used/4, primitive, vertex_nr);
+-
+- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VBUF_BUFSZ,
+- __FUNCTION__ );
+- cmd[0].i = 0;
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+- cmd[1].i = R200_CP_CMD_3D_DRAW_VBUF_2;
+- cmd[2].i = (primitive |
+- R200_VF_PRIM_WALK_LIST |
+- R200_VF_COLOR_ORDER_RGBA |
+- (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
++
++ BEGIN_BATCH(3);
++ OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
++ OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA |
++ (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
++ END_BATCH();
+ }
+
++static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type)
++{
++ BATCH_LOCALS(&rmesa->radeon);
++
++ if (vertex_count > 0) {
++ BEGIN_BATCH(8+2);
++ OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_INDX_2, 0);
++ OUT_BATCH(R200_VF_PRIM_WALK_IND |
++ ((vertex_count + 0) << 16) |
++ type);
++
++ if (!rmesa->radeon.radeonScreen->kernel_mm) {
++ OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
++ OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
++ OUT_BATCH_RELOC(rmesa->tcl.elt_dma_offset,
++ rmesa->tcl.elt_dma_bo,
++ rmesa->tcl.elt_dma_offset,
++ RADEON_GEM_DOMAIN_GTT, 0, 0);
++ OUT_BATCH(vertex_count/2);
++ } else {
++ OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
++ OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
++ OUT_BATCH(rmesa->tcl.elt_dma_offset);
++ OUT_BATCH(vertex_count/2);
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->tcl.elt_dma_bo,
++ RADEON_GEM_DOMAIN_GTT, 0, 0);
++ }
++ END_BATCH();
++ }
++}
+
+-void r200FlushElts( r200ContextPtr rmesa )
++void r200FlushElts(GLcontext *ctx)
+ {
+- int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
++ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ int dwords;
+- int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 12)) / 2;
++ int nr, elt_used = rmesa->tcl.elt_used;
+
+ if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
+- fprintf(stderr, "%s\n", __FUNCTION__);
++ fprintf(stderr, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used);
++
++ assert( rmesa->radeon.dma.flush == r200FlushElts );
++ rmesa->radeon.dma.flush = NULL;
++
++ elt_used = (elt_used + 2) & ~2;
+
+- assert( rmesa->dma.flush == r200FlushElts );
+- rmesa->dma.flush = NULL;
++ nr = elt_used / 2;
+
+- /* Cope with odd number of elts:
+- */
+- rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
+- dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
++ radeon_bo_unmap(rmesa->tcl.elt_dma_bo);
+
+- cmd[1] |= (dwords - 3) << 16;
+- cmd[2] |= nr << R200_VF_VERTEX_NUMBER_SHIFT;
++ r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive);
++
++ radeon_bo_unref(rmesa->tcl.elt_dma_bo);
++ rmesa->tcl.elt_dma_bo = NULL;
+
+ if (R200_DEBUG & DEBUG_SYNC) {
+ fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+- r200Finish( rmesa->glCtx );
++ radeonFinish( rmesa->radeon.glCtx );
+ }
+ }
+
+@@ -261,7 +196,6 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+ GLuint primitive,
+ GLuint min_nr )
+ {
+- drm_radeon_cmd_header_t *cmd;
+ GLushort *retval;
+
+ if (R200_DEBUG & DEBUG_IOCTL)
+@@ -269,30 +203,25 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+
+ assert((primitive & R200_VF_PRIM_WALK_IND));
+
+- r200EmitState( rmesa );
+-
+- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, ELTS_BUFSZ(min_nr),
+- __FUNCTION__ );
+- cmd[0].i = 0;
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+- cmd[1].i = R200_CP_CMD_3D_DRAW_INDX_2;
+- cmd[2].i = (primitive |
+- R200_VF_PRIM_WALK_IND |
+- R200_VF_COLOR_ORDER_RGBA);
++ radeonEmitState(&rmesa->radeon);
+
++ rmesa->tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
++ 0, R200_ELT_BUF_SZ, 4,
++ RADEON_GEM_DOMAIN_GTT, 0);
++ rmesa->tcl.elt_dma_offset = 0;
++ rmesa->tcl.elt_used = min_nr * 2;
++
++ radeon_bo_map(rmesa->tcl.elt_dma_bo, 1);
++ retval = rmesa->tcl.elt_dma_bo->ptr + rmesa->tcl.elt_dma_offset;
+
+- retval = (GLushort *)(cmd+3);
+
+ if (R200_DEBUG & DEBUG_PRIMS)
+- fprintf(stderr, "%s: header 0x%x prim %x \n",
+- __FUNCTION__,
+- cmd[1].i, primitive);
+-
+- assert(!rmesa->dma.flush);
+- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+- rmesa->dma.flush = r200FlushElts;
++ fprintf(stderr, "%s: header prim %x \n",
++ __FUNCTION__, primitive);
+
+- rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
++ assert(!rmesa->radeon.dma.flush);
++ rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
++ rmesa->radeon.dma.flush = r200FlushElts;
+
+ return retval;
+ }
+@@ -300,129 +229,130 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+
+
+ void r200EmitVertexAOS( r200ContextPtr rmesa,
+- GLuint vertex_size,
+- GLuint offset )
++ GLuint vertex_size,
++ struct radeon_bo *bo,
++ GLuint offset )
+ {
+- drm_radeon_cmd_header_t *cmd;
++ BATCH_LOCALS(&rmesa->radeon);
+
+ if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
+ fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n",
+ __FUNCTION__, vertex_size, offset);
+
+- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
+- __FUNCTION__ );
+
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+- cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (2 << 16);
+- cmd[2].i = 1;
+- cmd[3].i = vertex_size | (vertex_size << 8);
+- cmd[4].i = offset;
++ BEGIN_BATCH(5);
++ OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2);
++ OUT_BATCH(1);
++ OUT_BATCH(vertex_size | (vertex_size << 8));
++ OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
++ END_BATCH();
+ }
+-
+
+-void r200EmitAOS( r200ContextPtr rmesa,
+- struct r200_dma_region **component,
+- GLuint nr,
+- GLuint offset )
++void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
+ {
+- drm_radeon_cmd_header_t *cmd;
+- int sz = AOS_BUFSZ(nr);
++ BATCH_LOCALS(&rmesa->radeon);
++ uint32_t voffset;
++ int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+ int i;
+- int *tmp;
+-
+- if (R200_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s nr arrays: %d\n", __FUNCTION__, nr);
+-
+- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sz, __FUNCTION__ );
+- cmd[0].i = 0;
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+- cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (((sz / sizeof(int)) - 3) << 16);
+- cmd[2].i = nr;
+- tmp = &cmd[0].i;
+- cmd += 3;
+-
+- for (i = 0 ; i < nr ; i++) {
+- if (i & 1) {
+- cmd[0].i |= ((component[i]->aos_stride << 24) |
+- (component[i]->aos_size << 16));
+- cmd[2].i = (component[i]->aos_start +
+- offset * component[i]->aos_stride * 4);
+- cmd += 3;
++
++ if (RADEON_DEBUG & DEBUG_VERTS)
++ fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
++ offset);
++
++ BEGIN_BATCH(sz+2+ (nr*2));
++ OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
++ OUT_BATCH(nr);
++
++
++ if (!rmesa->radeon.radeonScreen->kernel_mm) {
++ for (i = 0; i + 1 < nr; i += 2) {
++ OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
++ (rmesa->tcl.aos[i].stride << 8) |
++ (rmesa->tcl.aos[i + 1].components << 16) |
++ (rmesa->tcl.aos[i + 1].stride << 24));
++
++ voffset = rmesa->tcl.aos[i + 0].offset +
++ offset * 4 * rmesa->tcl.aos[i + 0].stride;
++ OUT_BATCH_RELOC(voffset,
++ rmesa->tcl.aos[i].bo,
++ voffset,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ voffset = rmesa->tcl.aos[i + 1].offset +
++ offset * 4 * rmesa->tcl.aos[i + 1].stride;
++ OUT_BATCH_RELOC(voffset,
++ rmesa->tcl.aos[i+1].bo,
++ voffset,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
+ }
+- else {
+- cmd[0].i = ((component[i]->aos_stride << 8) |
+- (component[i]->aos_size << 0));
+- cmd[1].i = (component[i]->aos_start +
+- offset * component[i]->aos_stride * 4);
++
++ if (nr & 1) {
++ OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
++ (rmesa->tcl.aos[nr - 1].stride << 8));
++ voffset = rmesa->tcl.aos[nr - 1].offset +
++ offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++ OUT_BATCH_RELOC(voffset,
++ rmesa->tcl.aos[nr - 1].bo,
++ voffset,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ }
++ } else {
++ for (i = 0; i + 1 < nr; i += 2) {
++ OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
++ (rmesa->tcl.aos[i].stride << 8) |
++ (rmesa->tcl.aos[i + 1].components << 16) |
++ (rmesa->tcl.aos[i + 1].stride << 24));
++
++ voffset = rmesa->tcl.aos[i + 0].offset +
++ offset * 4 * rmesa->tcl.aos[i + 0].stride;
++ OUT_BATCH(voffset);
++ voffset = rmesa->tcl.aos[i + 1].offset +
++ offset * 4 * rmesa->tcl.aos[i + 1].stride;
++ OUT_BATCH(voffset);
++ }
++
++ if (nr & 1) {
++ OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
++ (rmesa->tcl.aos[nr - 1].stride << 8));
++ voffset = rmesa->tcl.aos[nr - 1].offset +
++ offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++ OUT_BATCH(voffset);
++ }
++ for (i = 0; i + 1 < nr; i += 2) {
++ voffset = rmesa->tcl.aos[i + 0].offset +
++ offset * 4 * rmesa->tcl.aos[i + 0].stride;
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->tcl.aos[i+0].bo,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ voffset = rmesa->tcl.aos[i + 1].offset +
++ offset * 4 * rmesa->tcl.aos[i + 1].stride;
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->tcl.aos[i+1].bo,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ }
++ if (nr & 1) {
++ voffset = rmesa->tcl.aos[nr - 1].offset +
++ offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->tcl.aos[nr-1].bo,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
+ }
+ }
+-
+- if (R200_DEBUG & DEBUG_VERTS) {
+- fprintf(stderr, "%s:\n", __FUNCTION__);
+- for (i = 0 ; i < sz ; i++)
+- fprintf(stderr, " %d: %x\n", i, tmp[i]);
+- }
++ END_BATCH();
+ }
+
+-void r200EmitBlit( r200ContextPtr rmesa,
+- GLuint color_fmt,
+- GLuint src_pitch,
+- GLuint src_offset,
+- GLuint dst_pitch,
+- GLuint dst_offset,
+- GLint srcx, GLint srcy,
+- GLint dstx, GLint dsty,
+- GLuint w, GLuint h )
++void r200FireAOS(r200ContextPtr rmesa, int vertex_count, int type)
+ {
+- drm_radeon_cmd_header_t *cmd;
++ BATCH_LOCALS(&rmesa->radeon);
+
+- if (R200_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+- __FUNCTION__,
+- src_pitch, src_offset, srcx, srcy,
+- dst_pitch, dst_offset, dstx, dsty,
+- w, h);
+-
+- assert( (src_pitch & 63) == 0 );
+- assert( (dst_pitch & 63) == 0 );
+- assert( (src_offset & 1023) == 0 );
+- assert( (dst_offset & 1023) == 0 );
+- assert( w < (1<<16) );
+- assert( h < (1<<16) );
+-
+- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 8 * sizeof(int),
+- __FUNCTION__ );
+-
+-
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+- cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16);
+- cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+- RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+- RADEON_GMC_BRUSH_NONE |
+- (color_fmt << 8) |
+- RADEON_GMC_SRC_DATATYPE_COLOR |
+- RADEON_ROP3_S |
+- RADEON_DP_SRC_SOURCE_MEMORY |
+- RADEON_GMC_CLR_CMP_CNTL_DIS |
+- RADEON_GMC_WR_MSK_DIS );
+-
+- cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
+- cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
+- cmd[5].i = (srcx << 16) | srcy;
+- cmd[6].i = (dstx << 16) | dsty; /* dst */
+- cmd[7].i = (w << 16) | h;
++ BEGIN_BATCH(3);
++ OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
++ OUT_BATCH(R200_VF_PRIM_WALK_LIST | (vertex_count << 16) | type);
++ END_BATCH();
+ }
+
+-
+-void r200EmitWait( r200ContextPtr rmesa, GLuint flags )
+-{
+- drm_radeon_cmd_header_t *cmd;
+-
+- assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
+-
+- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 1 * sizeof(int),
+- __FUNCTION__ );
+- cmd[0].i = 0;
+- cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
+- cmd[0].wait.flags = flags;
+-}
+diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
+index 5531e0a..a744469 100644
+--- a/src/mesa/drivers/dri/r200/r200_context.c
++++ b/src/mesa/drivers/dri/r200/r200_context.c
+@@ -54,7 +54,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r200_context.h"
+ #include "r200_ioctl.h"
+ #include "r200_state.h"
+-#include "r200_span.h"
+ #include "r200_pixel.h"
+ #include "r200_tex.h"
+ #include "r200_swtcl.h"
+@@ -62,14 +61,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r200_maos.h"
+ #include "r200_vertprog.h"
+
+-#define need_GL_ARB_multisample
+-#define need_GL_ARB_texture_compression
+-#define need_GL_ARB_vertex_buffer_object
++#include "radeon_span.h"
++
+ #define need_GL_ARB_vertex_program
+ #define need_GL_ATI_fragment_shader
+ #define need_GL_EXT_blend_minmax
+ #define need_GL_EXT_fog_coord
+-#define need_GL_EXT_multi_draw_arrays
+ #define need_GL_EXT_secondary_color
+ #define need_GL_EXT_blend_equation_separate
+ #define need_GL_EXT_blend_func_separate
+@@ -82,9 +79,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "vblank.h"
+ #include "utils.h"
+ #include "xmlpool.h" /* for symbolic values of enum-type options */
+-#ifndef R200_DEBUG
+-int R200_DEBUG = (0);
+-#endif
+
+ /* Return various strings for glGetString().
+ */
+@@ -93,8 +87,8 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ static char buffer[128];
+ unsigned offset;
+- GLuint agp_mode = (rmesa->r200Screen->card_type == RADEON_CARD_PCI)? 0 :
+- rmesa->r200Screen->AGPMode;
++ GLuint agp_mode = (rmesa->radeon.radeonScreen->card_type == RADEON_CARD_PCI)? 0 :
++ rmesa->radeon.radeonScreen->AGPMode;
+
+ switch ( name ) {
+ case GL_VENDOR:
+@@ -105,7 +99,7 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
+ agp_mode );
+
+ sprintf( & buffer[ offset ], " %sTCL",
+- !(rmesa->TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
++ !(rmesa->radeon.TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
+ ? "" : "NO-" );
+
+ return (GLubyte *)buffer;
+@@ -120,20 +114,16 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
+ */
+ const struct dri_extension card_extensions[] =
+ {
+- { "GL_ARB_multisample", GL_ARB_multisample_functions },
+ { "GL_ARB_multitexture", NULL },
+ { "GL_ARB_texture_border_clamp", NULL },
+- { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions },
+ { "GL_ARB_texture_env_add", NULL },
+ { "GL_ARB_texture_env_combine", NULL },
+ { "GL_ARB_texture_env_dot3", NULL },
+ { "GL_ARB_texture_env_crossbar", NULL },
+ { "GL_ARB_texture_mirrored_repeat", NULL },
+- { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions },
+ { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions },
+ { "GL_EXT_blend_subtract", NULL },
+ { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+- { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions },
+ { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions },
+ { "GL_EXT_stencil_wrap", NULL },
+ { "GL_EXT_texture_edge_clamp", NULL },
+@@ -242,6 +232,40 @@ static const struct dri_debug_control debug_control[] =
+ { NULL, 0 }
+ };
+
++static void r200_get_lock(radeonContextPtr radeon)
++{
++ r200ContextPtr rmesa = (r200ContextPtr)radeon;
++ drm_radeon_sarea_t *sarea = radeon->sarea;
++ int i;
++
++ R200_STATECHANGE( rmesa, ctx );
++ if (rmesa->radeon.sarea->tiling_enabled) {
++ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
++ }
++ else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
++
++ if ( sarea->ctx_owner != rmesa->radeon.dri.hwContext ) {
++ sarea->ctx_owner = rmesa->radeon.dri.hwContext;
++ if (!radeon->radeonScreen->kernel_mm)
++ radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
++ }
++
++}
++
++static void r200_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
++{
++}
++
++
++static void r200_init_vtbl(radeonContextPtr radeon)
++{
++ radeon->vtbl.get_lock = r200_get_lock;
++ radeon->vtbl.update_viewport_offset = r200UpdateViewportOffset;
++ radeon->vtbl.update_draw_buffer = r200UpdateDrawBuffer;
++ radeon->vtbl.emit_cs_header = r200_vtbl_emit_cs_header;
++ radeon->vtbl.swtcl_flush = r200_swtcl_flush;
++}
++
+
+ /* Create the device specific rendering context.
+ */
+@@ -253,9 +277,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
+ struct dd_function_table functions;
+ r200ContextPtr rmesa;
+- GLcontext *ctx, *shareCtx;
++ GLcontext *ctx;
+ int i;
+- int tcl_mode, fthrottle_mode;
++ int tcl_mode;
+
+ assert(glVisual);
+ assert(driContextPriv);
+@@ -265,7 +289,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ rmesa = (r200ContextPtr) CALLOC( sizeof(*rmesa) );
+ if ( !rmesa )
+ return GL_FALSE;
+-
++
++ r200_init_vtbl(&rmesa->radeon);
+ /* init exp fog table data */
+ r200InitStaticFogData();
+
+@@ -273,12 +298,12 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ * Do this here so that initialMaxAnisotropy is set before we create
+ * the default textures.
+ */
+- driParseConfigFiles (&rmesa->optionCache, &screen->optionCache,
++ driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
+ screen->driScreen->myNum, "r200");
+- rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
+- "def_max_anisotropy");
++ rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
++ "def_max_anisotropy");
+
+- if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
++ if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
+ if ( sPriv->drm_version.minor < 13 )
+ fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
+ "disabling.\n", sPriv->drm_version.minor );
+@@ -299,59 +324,21 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ r200InitTextureFuncs(&functions);
+ r200InitShaderFuncs(&functions);
+
+- /* Allocate and initialize the Mesa context */
+- if (sharedContextPrivate)
+- shareCtx = ((r200ContextPtr) sharedContextPrivate)->glCtx;
+- else
+- shareCtx = NULL;
+- rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
+- &functions, (void *) rmesa);
+- if (!rmesa->glCtx) {
+- FREE(rmesa);
+- return GL_FALSE;
+- }
+- driContextPriv->driverPrivate = rmesa;
+-
+- /* Init r200 context data */
+- rmesa->dri.context = driContextPriv;
+- rmesa->dri.screen = sPriv;
+- rmesa->dri.drawable = NULL; /* Set by XMesaMakeCurrent */
+- rmesa->dri.hwContext = driContextPriv->hHWContext;
+- rmesa->dri.hwLock = &sPriv->pSAREA->lock;
+- rmesa->dri.fd = sPriv->fd;
+- rmesa->dri.drmMinor = sPriv->drm_version.minor;
+-
+- rmesa->r200Screen = screen;
+- rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
+- screen->sarea_priv_offset);
+-
+-
+- rmesa->dma.buf0_address = rmesa->r200Screen->buffers->list[0].address;
+-
+- (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
+- make_empty_list( & rmesa->swapped );
+-
+- rmesa->nr_heaps = 1 /* screen->numTexHeaps */ ;
+- assert(rmesa->nr_heaps < RADEON_NR_TEX_HEAPS);
+- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+- rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
+- screen->texSize[i],
+- 12,
+- RADEON_NR_TEX_REGIONS,
+- (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
+- & rmesa->sarea->tex_age[i],
+- & rmesa->swapped,
+- sizeof( r200TexObj ),
+- (destroy_texture_object_t *) r200DestroyTexObj );
++ if (!radeonInitContext(&rmesa->radeon, &functions,
++ glVisual, driContextPriv,
++ sharedContextPrivate)) {
++ FREE(rmesa);
++ return GL_FALSE;
+ }
+- rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
++
++ rmesa->radeon.texture_depth = driQueryOptioni (&rmesa->radeon.optionCache,
+ "texture_depth");
+- if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+- rmesa->texture_depth = ( screen->cpp == 4 ) ?
++ if (rmesa->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
++ rmesa->radeon.texture_depth = ( screen->cpp == 4 ) ?
+ DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+
+- rmesa->swtcl.RenderIndex = ~0;
+- rmesa->hw.all_dirty = 1;
++ rmesa->radeon.swtcl.RenderIndex = ~0;
++ rmesa->radeon.hw.all_dirty = 1;
+
+ /* Set the maximum texture size small enough that we can guarentee that
+ * all texture units can bind a maximal texture and have all of them in
+@@ -359,29 +346,13 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ * setting allow larger textures.
+ */
+
+- ctx = rmesa->glCtx;
+- ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache,
++ ctx = rmesa->radeon.glCtx;
++ ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
+ "texture_units");
+ ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+ ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+
+- i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures");
+-
+- driCalculateMaxTextureLevels( rmesa->texture_heaps,
+- rmesa->nr_heaps,
+- & ctx->Const,
+- 4,
+- 11, /* max 2D texture size is 2048x2048 */
+-#if ENABLE_HW_3D_TEXTURE
+- 8, /* max 3D texture size is 256^3 */
+-#else
+- 0, /* 3D textures unsupported */
+-#endif
+- 11, /* max cube texture size is 2048x2048 */
+- 11, /* max texture rectangle size is 2048x2048 */
+- 12,
+- GL_FALSE,
+- i );
++ i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
+
+ ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
+@@ -391,7 +362,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ ctx->Const.MinPointSizeAA = 1.0;
+ ctx->Const.MaxPointSizeAA = 1.0;
+ ctx->Const.PointSizeGranularity = 0.0625;
+- if (rmesa->r200Screen->drmSupportsPointSprites)
++ if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
+ ctx->Const.MaxPointSize = 2047.0;
+ else
+ ctx->Const.MaxPointSize = 1.0;
+@@ -411,6 +382,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ ctx->Const.VertexProgram.MaxNativeParameters = R200_VSF_MAX_PARAM;
+ ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+
++ ctx->Const.MaxDrawBuffers = 1;
++
+ /* Initialize the software rasterizer and helper modules.
+ */
+ _swrast_CreateContext( ctx );
+@@ -445,32 +418,32 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ _math_matrix_set_identity( &rmesa->tmpmat );
+
+ driInitExtensions( ctx, card_extensions, GL_TRUE );
+- if (!(rmesa->r200Screen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
++ if (!(rmesa->radeon.radeonScreen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
+ /* yuv textures don't work with some chips - R200 / rv280 okay so far
+ others get the bit ordering right but don't actually do YUV-RGB conversion */
+ _mesa_enable_extension( ctx, "GL_MESA_ycbcr_texture" );
+ }
+- if (rmesa->glCtx->Mesa_DXTn) {
++ if (rmesa->radeon.glCtx->Mesa_DXTn) {
+ _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+ _mesa_enable_extension( ctx, "GL_S3_s3tc" );
+ }
+- else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) {
++ else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
+ _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+ }
+
+- if (rmesa->r200Screen->drmSupportsCubeMapsR200)
++ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200)
+ _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
+- if (rmesa->r200Screen->drmSupportsBlendColor) {
++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+ driInitExtensions( ctx, blend_extensions, GL_FALSE );
+ }
+- if(rmesa->r200Screen->drmSupportsVertexProgram)
++ if(rmesa->radeon.radeonScreen->drmSupportsVertexProgram)
+ driInitSingleExtension( ctx, ARB_vp_extension );
+- if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program"))
++ if(driQueryOptionb(&rmesa->radeon.optionCache, "nv_vertex_program"))
+ driInitSingleExtension( ctx, NV_vp_extension );
+
+- if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader)
++ if ((ctx->Const.MaxTextureUnits == 6) && rmesa->radeon.radeonScreen->drmSupportsFragShader)
+ driInitSingleExtension( ctx, ATI_fs_extension );
+- if (rmesa->r200Screen->drmSupportsPointSprites)
++ if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
+ driInitExtensions( ctx, point_extensions, GL_FALSE );
+ #if 0
+ r200InitDriverFuncs( ctx );
+@@ -480,33 +453,15 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ #endif
+ /* plug in a few more device driver functions */
+ /* XXX these should really go right after _mesa_init_driver_functions() */
++ radeonInitSpanFuncs( ctx );
+ r200InitPixelFuncs( ctx );
+- r200InitSpanFuncs( ctx );
+ r200InitTnlFuncs( ctx );
+ r200InitState( rmesa );
+ r200InitSwtcl( ctx );
+
+- fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode");
+- rmesa->iw.irq_seq = -1;
+- rmesa->irqsEmitted = 0;
+- rmesa->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
+- rmesa->r200Screen->irq);
+-
+- rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+-
+- if (!rmesa->do_irqs)
+- fprintf(stderr,
+- "IRQ's not enabled, falling back to %s: %d %d\n",
+- rmesa->do_usleeps ? "usleeps" : "busy waits",
+- fthrottle_mode,
+- rmesa->r200Screen->irq);
+-
+ rmesa->prefer_gart_client_texturing =
+ (getenv("R200_GART_CLIENT_TEXTURES") != 0);
+
+- (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
+-
+-
+ #if DO_DEBUG
+ R200_DEBUG = driParseDebugString( getenv( "R200_DEBUG" ),
+ debug_control );
+@@ -514,18 +469,18 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ debug_control );
+ #endif
+
+- tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode");
+- if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
++ tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
++ if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
+ fprintf(stderr, "disabling 3D acceleration\n");
+ FALLBACK(rmesa, R200_FALLBACK_DISABLE, 1);
+ }
+ else if (tcl_mode == DRI_CONF_TCL_SW || getenv("R200_NO_TCL") ||
+- !(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) {
+- if (rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL) {
+- rmesa->r200Screen->chip_flags &= ~RADEON_CHIPSET_TCL;
++ !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
++ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
++ rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
+ fprintf(stderr, "Disabling HW TCL support\n");
+ }
+- TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
++ TCL_FALLBACK(rmesa->radeon.glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
+ }
+
+ return GL_TRUE;
+@@ -544,55 +499,33 @@ void r200DestroyContext( __DRIcontextPrivate *driContextPriv )
+
+ /* check if we're deleting the currently bound context */
+ if (rmesa == current) {
+- R200_FIREVERTICES( rmesa );
++ radeon_firevertices(&rmesa->radeon);
+ _mesa_make_current(NULL, NULL, NULL);
+ }
+
+ /* Free r200 context resources */
+ assert(rmesa); /* should never be null */
+ if ( rmesa ) {
+- GLboolean release_texture_heaps;
+
++ _swsetup_DestroyContext( rmesa->radeon.glCtx );
++ _tnl_DestroyContext( rmesa->radeon.glCtx );
++ _vbo_DestroyContext( rmesa->radeon.glCtx );
++ _swrast_DestroyContext( rmesa->radeon.glCtx );
+
+- release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
+- _swsetup_DestroyContext( rmesa->glCtx );
+- _tnl_DestroyContext( rmesa->glCtx );
+- _vbo_DestroyContext( rmesa->glCtx );
+- _swrast_DestroyContext( rmesa->glCtx );
++ r200DestroySwtcl( rmesa->radeon.glCtx );
++ r200ReleaseArrays( rmesa->radeon.glCtx, ~0 );
+
+- r200DestroySwtcl( rmesa->glCtx );
+- r200ReleaseArrays( rmesa->glCtx, ~0 );
+-
+- if (rmesa->dma.current.buf) {
+- r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+- r200FlushCmdBuf( rmesa, __FUNCTION__ );
+- }
+-
+- if (rmesa->state.scissor.pClipRects) {
+- FREE(rmesa->state.scissor.pClipRects);
+- rmesa->state.scissor.pClipRects = NULL;
++ if (rmesa->radeon.dma.current) {
++ radeonReleaseDmaRegion( &rmesa->radeon );
++ rcommonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ );
+ }
+
+- if ( release_texture_heaps ) {
+- /* This share group is about to go away, free our private
+- * texture object data.
+- */
+- int i;
+-
+- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+- driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
+- rmesa->texture_heaps[ i ] = NULL;
+- }
+-
+- assert( is_empty_list( & rmesa->swapped ) );
++ if (rmesa->radeon.state.scissor.pClipRects) {
++ FREE(rmesa->radeon.state.scissor.pClipRects);
++ rmesa->radeon.state.scissor.pClipRects = NULL;
+ }
+
+- /* free the Mesa context */
+- rmesa->glCtx->DriverCtx = NULL;
+- _mesa_destroy_context( rmesa->glCtx );
+-
+- /* free the option cache */
+- driDestroyOptionCache (&rmesa->optionCache);
++ radeonCleanupContext(&rmesa->radeon);
+
+ FREE( rmesa );
+ }
+@@ -600,107 +533,6 @@ void r200DestroyContext( __DRIcontextPrivate *driContextPriv )
+
+
+
+-
+-void
+-r200SwapBuffers( __DRIdrawablePrivate *dPriv )
+-{
+- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+- r200ContextPtr rmesa;
+- GLcontext *ctx;
+- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+- ctx = rmesa->glCtx;
+- if (ctx->Visual.doubleBufferMode) {
+- _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */
+- if ( rmesa->doPageFlip ) {
+- r200PageFlip( dPriv );
+- }
+- else {
+- r200CopyBuffer( dPriv, NULL );
+- }
+- }
+- }
+- else {
+- /* XXX this shouldn't be an error but we can't handle it for now */
+- _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+- }
+-}
+-
+-void
+-r200CopySubBuffer( __DRIdrawablePrivate *dPriv,
+- int x, int y, int w, int h )
+-{
+- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+- r200ContextPtr rmesa;
+- GLcontext *ctx;
+- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+- ctx = rmesa->glCtx;
+- if (ctx->Visual.doubleBufferMode) {
+- drm_clip_rect_t rect;
+- rect.x1 = x + dPriv->x;
+- rect.y1 = (dPriv->h - y - h) + dPriv->y;
+- rect.x2 = rect.x1 + w;
+- rect.y2 = rect.y1 + h;
+- _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */
+- r200CopyBuffer( dPriv, &rect );
+- }
+- }
+- else {
+- /* XXX this shouldn't be an error but we can't handle it for now */
+- _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+- }
+-}
+-
+-/* Force the context `c' to be the current context and associate with it
+- * buffer `b'.
+- */
+-GLboolean
+-r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
+- __DRIdrawablePrivate *driDrawPriv,
+- __DRIdrawablePrivate *driReadPriv )
+-{
+- if ( driContextPriv ) {
+- r200ContextPtr newCtx =
+- (r200ContextPtr) driContextPriv->driverPrivate;
+-
+- if (R200_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)newCtx->glCtx);
+-
+- newCtx->dri.readable = driReadPriv;
+-
+- if ( newCtx->dri.drawable != driDrawPriv ||
+- newCtx->lastStamp != driDrawPriv->lastStamp ) {
+- if (driDrawPriv->swap_interval == (unsigned)-1) {
+- driDrawPriv->vblFlags = (newCtx->r200Screen->irq != 0)
+- ? driGetDefaultVBlankFlags(&newCtx->optionCache)
+- : VBLANK_FLAG_NO_IRQ;
+-
+- driDrawableInitVBlank( driDrawPriv );
+- }
+-
+- newCtx->dri.drawable = driDrawPriv;
+-
+- r200SetCliprects(newCtx);
+- r200UpdateViewportOffset( newCtx->glCtx );
+- }
+-
+- _mesa_make_current( newCtx->glCtx,
+- (GLframebuffer *) driDrawPriv->driverPrivate,
+- (GLframebuffer *) driReadPriv->driverPrivate );
+-
+- _mesa_update_state( newCtx->glCtx );
+- r200ValidateState( newCtx->glCtx );
+-
+- } else {
+- if (R200_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+- _mesa_make_current( NULL, NULL, NULL );
+- }
+-
+- if (R200_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "End %s\n", __FUNCTION__);
+- return GL_TRUE;
+-}
+-
+ /* Force the context `c' to be unbound from its buffer.
+ */
+ GLboolean
+@@ -709,7 +541,7 @@ r200UnbindContext( __DRIcontextPrivate *driContextPriv )
+ r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate;
+
+ if (R200_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->glCtx);
++ fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->radeon.glCtx);
+
+ return GL_TRUE;
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h
+index 14a1dda..fcbe725 100644
+--- a/src/mesa/drivers/dri/r200/r200_context.h
++++ b/src/mesa/drivers/dri/r200/r200_context.h
+@@ -53,51 +53,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #error This driver requires a newer libdrm to compile
+ #endif
+
++#include "radeon_screen.h"
++#include "radeon_common.h"
++
++#include "radeon_lock.h"
++
+ struct r200_context;
+ typedef struct r200_context r200ContextRec;
+ typedef struct r200_context *r200ContextPtr;
+
+-/* This union is used to avoid warnings/miscompilation
+- with float to uint32_t casts due to strict-aliasing */
+-typedef union { GLfloat f; uint32_t ui32; } float_ui32_type;
+-
+-#include "r200_lock.h"
+-#include "radeon_screen.h"
+ #include "main/mm.h"
+
+-/* Flags for software fallback cases */
+-/* See correponding strings in r200_swtcl.c */
+-#define R200_FALLBACK_TEXTURE 0x01
+-#define R200_FALLBACK_DRAW_BUFFER 0x02
+-#define R200_FALLBACK_STENCIL 0x04
+-#define R200_FALLBACK_RENDER_MODE 0x08
+-#define R200_FALLBACK_DISABLE 0x10
+-#define R200_FALLBACK_BORDER_MODE 0x20
+-
+-/* The blit width for texture uploads
+- */
+-#define BLIT_WIDTH_BYTES 1024
+-
+-/* Use the templated vertex format:
+- */
+-#define COLOR_IS_RGBA
+-#define TAG(x) r200##x
+-#include "tnl_dd/t_dd_vertex.h"
+-#undef TAG
+-
+-typedef void (*r200_tri_func)( r200ContextPtr,
+- r200Vertex *,
+- r200Vertex *,
+- r200Vertex * );
+-
+-typedef void (*r200_line_func)( r200ContextPtr,
+- r200Vertex *,
+- r200Vertex * );
+-
+-typedef void (*r200_point_func)( r200ContextPtr,
+- r200Vertex * );
+-
+-
+ struct r200_vertex_program {
+ struct gl_vertex_program mesa_program; /* Must be first */
+ int translated;
+@@ -112,93 +78,11 @@ struct r200_vertex_program {
+ int fogmode;
+ };
+
+-struct r200_colorbuffer_state {
+- GLuint clear;
+-#if 000
+- GLint drawOffset, drawPitch;
+-#endif
+- int roundEnable;
+-};
+-
+-
+-struct r200_depthbuffer_state {
+- GLuint clear;
+- GLfloat scale;
+-};
+-
+-#if 000
+-struct r200_pixel_state {
+- GLint readOffset, readPitch;
+-};
+-#endif
+-
+-struct r200_scissor_state {
+- drm_clip_rect_t rect;
+- GLboolean enabled;
+-
+- GLuint numClipRects; /* Cliprects active */
+- GLuint numAllocedClipRects; /* Cliprects available */
+- drm_clip_rect_t *pClipRects;
+-};
+-
+-struct r200_stencilbuffer_state {
+- GLboolean hwBuffer;
+- GLuint clear; /* rb3d_stencilrefmask value */
+-};
+-
+-struct r200_stipple_state {
+- GLuint mask[32];
+-};
+-
+-
+-
+-#define TEX_0 0x1
+-#define TEX_1 0x2
+-#define TEX_2 0x4
+-#define TEX_3 0x8
+-#define TEX_4 0x10
+-#define TEX_5 0x20
+-#define TEX_ALL 0x3f
+-
+-typedef struct r200_tex_obj r200TexObj, *r200TexObjPtr;
+-
+-/* Texture object in locally shared texture space.
+- */
+-struct r200_tex_obj {
+- driTextureObject base;
+-
+- GLuint bufAddr; /* Offset to start of locally
+- shared texture block */
+-
+- GLuint dirty_state; /* Flags (1 per texunit) for
+- whether or not this texobj
+- has dirty hardware state
+- (pp_*) that needs to be
+- brought into the
+- texunit. */
+-
+- drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
+- /* Six, for the cube faces */
+- GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
+-
+- GLuint pp_txfilter; /* hardware register values */
+- GLuint pp_txformat;
+- GLuint pp_txformat_x;
+- GLuint pp_txoffset; /* Image location in texmem.
+- All cube faces follow. */
+- GLuint pp_txsize; /* npot only */
+- GLuint pp_txpitch; /* npot only */
+- GLuint pp_border_color;
+- GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */
+-
+- GLboolean border_fallback;
+-
+- GLuint tile_bits; /* hw texture tile bits used on this texture */
+-};
++#define R200_TEX_ALL 0x3f
+
+
+ struct r200_texture_env_state {
+- r200TexObjPtr texobj;
++ radeonTexObjPtr texobj;
+ GLuint outputreg;
+ GLuint unitneeded;
+ };
+@@ -210,19 +94,6 @@ struct r200_texture_state {
+ };
+
+
+-struct r200_state_atom {
+- struct r200_state_atom *next, *prev;
+- const char *name; /* for debug */
+- int cmd_size; /* size in bytes */
+- GLuint idx;
+- int *cmd; /* one or more cmd's */
+- int *lastcmd; /* one or more cmd's */
+- GLboolean dirty;
+- GLboolean (*check)( GLcontext *, int ); /* is this state active? */
+-};
+-
+-
+-
+ /* Trying to keep these relatively short as the variables are becoming
+ * extravagently long. Drop the driver name prefix off the front of
+ * everything - I think we know which driver we're in by now, and keep the
+@@ -597,181 +468,85 @@ struct r200_state_atom {
+
+
+ struct r200_hw_state {
+- /* Head of the linked list of state atoms. */
+- struct r200_state_atom atomlist;
+-
+ /* Hardware state, stored as cmdbuf commands:
+ * -- Need to doublebuffer for
+ * - reviving state after loss of context
+ * - eliding noop statechange loops? (except line stipple count)
+ */
+- struct r200_state_atom ctx;
+- struct r200_state_atom set;
+- struct r200_state_atom vte;
+- struct r200_state_atom lin;
+- struct r200_state_atom msk;
+- struct r200_state_atom vpt;
+- struct r200_state_atom vap;
+- struct r200_state_atom vtx;
+- struct r200_state_atom tcl;
+- struct r200_state_atom msl;
+- struct r200_state_atom tcg;
+- struct r200_state_atom msc;
+- struct r200_state_atom cst;
+- struct r200_state_atom tam;
+- struct r200_state_atom tf;
+- struct r200_state_atom tex[6];
+- struct r200_state_atom cube[6];
+- struct r200_state_atom zbs;
+- struct r200_state_atom mtl[2];
+- struct r200_state_atom mat[9];
+- struct r200_state_atom lit[8]; /* includes vec, scl commands */
+- struct r200_state_atom ucp[6];
+- struct r200_state_atom pix[6]; /* pixshader stages */
+- struct r200_state_atom eye; /* eye pos */
+- struct r200_state_atom grd; /* guard band clipping */
+- struct r200_state_atom fog;
+- struct r200_state_atom glt;
+- struct r200_state_atom prf;
+- struct r200_state_atom afs[2];
+- struct r200_state_atom pvs;
+- struct r200_state_atom vpi[2];
+- struct r200_state_atom vpp[2];
+- struct r200_state_atom atf;
+- struct r200_state_atom spr;
+- struct r200_state_atom ptp;
+-
+- int max_state_size; /* Number of bytes necessary for a full state emit. */
+- GLboolean is_dirty, all_dirty;
++ struct radeon_state_atom ctx;
++ struct radeon_state_atom set;
++ struct radeon_state_atom vte;
++ struct radeon_state_atom lin;
++ struct radeon_state_atom msk;
++ struct radeon_state_atom vpt;
++ struct radeon_state_atom vap;
++ struct radeon_state_atom vtx;
++ struct radeon_state_atom tcl;
++ struct radeon_state_atom msl;
++ struct radeon_state_atom tcg;
++ struct radeon_state_atom msc;
++ struct radeon_state_atom cst;
++ struct radeon_state_atom tam;
++ struct radeon_state_atom tf;
++ struct radeon_state_atom tex[6];
++ struct radeon_state_atom cube[6];
++ struct radeon_state_atom zbs;
++ struct radeon_state_atom mtl[2];
++ struct radeon_state_atom mat[9];
++ struct radeon_state_atom lit[8]; /* includes vec, scl commands */
++ struct radeon_state_atom ucp[6];
++ struct radeon_state_atom pix[6]; /* pixshader stages */
++ struct radeon_state_atom eye; /* eye pos */
++ struct radeon_state_atom grd; /* guard band clipping */
++ struct radeon_state_atom fog;
++ struct radeon_state_atom glt;
++ struct radeon_state_atom prf;
++ struct radeon_state_atom afs[2];
++ struct radeon_state_atom pvs;
++ struct radeon_state_atom vpi[2];
++ struct radeon_state_atom vpp[2];
++ struct radeon_state_atom atf;
++ struct radeon_state_atom spr;
++ struct radeon_state_atom ptp;
+ };
+
+ struct r200_state {
+ /* Derived state for internal purposes:
+ */
+- struct r200_colorbuffer_state color;
+- struct r200_depthbuffer_state depth;
+-#if 00
+- struct r200_pixel_state pixel;
+-#endif
+- struct r200_scissor_state scissor;
+- struct r200_stencilbuffer_state stencil;
+- struct r200_stipple_state stipple;
++ struct radeon_stipple_state stipple;
+ struct r200_texture_state texture;
+ GLuint envneeded;
+ };
+
+-/* Need refcounting on dma buffers:
+- */
+-struct r200_dma_buffer {
+- int refcount; /* the number of retained regions in buf */
+- drmBufPtr buf;
+-};
+-
+-#define GET_START(rvb) (rmesa->r200Screen->gart_buffer_offset + \
+- (rvb)->address - rmesa->dma.buf0_address + \
+- (rvb)->start)
+-
+-/* A retained region, eg vertices for indexed vertices.
+- */
+-struct r200_dma_region {
+- struct r200_dma_buffer *buf;
+- char *address; /* == buf->address */
+- int start, end, ptr; /* offsets from start of buf */
+- int aos_start;
+- int aos_stride;
+- int aos_size;
+-};
+-
+-
+-struct r200_dma {
+- /* Active dma region. Allocations for vertices and retained
+- * regions come from here. Also used for emitting random vertices,
+- * these may be flushed by calling flush_current();
+- */
+- struct r200_dma_region current;
+-
+- void (*flush)( r200ContextPtr );
+-
+- char *buf0_address; /* start of buf[0], for index calcs */
+- GLuint nr_released_bufs; /* flush after so many buffers released */
+-};
+-
+-struct r200_dri_mirror {
+- __DRIcontextPrivate *context; /* DRI context */
+- __DRIscreenPrivate *screen; /* DRI screen */
+- __DRIdrawablePrivate *drawable; /* DRI drawable bound to this ctx */
+- __DRIdrawablePrivate *readable; /* DRI readable bound to this ctx */
+-
+- drm_context_t hwContext;
+- drm_hw_lock_t *hwLock;
+- int fd;
+- int drmMinor;
+-};
+-
+-
+ #define R200_CMD_BUF_SZ (16*1024)
+
+-struct r200_store {
+- GLuint statenr;
+- GLuint primnr;
+- char cmd_buf[R200_CMD_BUF_SZ];
+- int cmd_used;
+- int elts_start;
+-};
+-
+-
++#define R200_ELT_BUF_SZ (16*1024)
+ /* r200_tcl.c
+ */
+ struct r200_tcl_info {
+ GLuint hw_primitive;
+
+ /* hw can handle 12 components max */
+- struct r200_dma_region *aos_components[12];
++ struct radeon_aos aos[12];
+ GLuint nr_aos_components;
+
+ GLuint *Elts;
+
+- struct r200_dma_region indexed_verts;
+- struct r200_dma_region vertex_data[15];
++ struct radeon_bo *elt_dma_bo;
++ int elt_dma_offset; /** Offset into this buffer object, in bytes */
++ int elt_used;
++
+ };
+
+
+ /* r200_swtcl.c
+ */
+ struct r200_swtcl_info {
+- GLuint RenderIndex;
+-
+- /**
+- * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is
+- * installed in the Mesa state vector.
+- */
+- GLuint vertex_size;
+
+- /**
+- * Attributes instructing the Mesa TCL pipeline where / how to put vertex
+- * data in the hardware buffer.
+- */
+- struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+
+- /**
+- * Number of elements of \c ::vertex_attrs that are actually used.
+- */
+- GLuint vertex_attr_count;
+-
+- /**
+- * Cached pointer to the buffer where Mesa will store vertex data.
+- */
+- GLubyte *verts;
+-
+- /* Fallback rasterization functions
+- */
+- r200_point_func draw_point;
+- r200_line_func draw_line;
+- r200_tri_func draw_tri;
+-
+- GLuint hw_primitive;
+- GLenum render_primitive;
+- GLuint numverts;
++ radeon_point_func draw_point;
++ radeon_line_func draw_line;
++ radeon_tri_func draw_tri;
+
+ /**
+ * Offset of the 4UB color data within a hardware (swtcl) vertex.
+@@ -787,27 +562,10 @@ struct r200_swtcl_info {
+ * Should Mesa project vertex data or will the hardware do it?
+ */
+ GLboolean needproj;
+-
+- struct r200_dma_region indexed_verts;
+-};
+-
+-
+-struct r200_ioctl {
+- GLuint vertex_offset;
+- GLuint vertex_size;
+ };
+
+
+
+-#define R200_MAX_PRIMS 64
+-
+-
+-
+-struct r200_prim {
+- GLuint start;
+- GLuint end;
+- GLuint prim;
+-};
+
+ /* A maximum total of 29 elements per vertex: 3 floats for position, 3
+ * floats for normal, 4 floats for color, 4 bytes for secondary color,
+@@ -822,9 +580,8 @@ struct r200_prim {
+
+ #define R200_MAX_VERTEX_SIZE ((3*6)+11)
+
+-
+ struct r200_context {
+- GLcontext *glCtx; /* Mesa context */
++ struct radeon_context radeon;
+
+ /* Driver and hardware state management
+ */
+@@ -832,56 +589,15 @@ struct r200_context {
+ struct r200_state state;
+ struct r200_vertex_program *curr_vp_hw;
+
+- /* Texture object bookkeeping
+- */
+- unsigned nr_heaps;
+- driTexHeap * texture_heaps[ RADEON_NR_TEX_HEAPS ];
+- driTextureObject swapped;
+- int texture_depth;
+- float initialMaxAnisotropy;
+-
+- /* Rasterization and vertex state:
+- */
+- GLuint TclFallback;
+- GLuint Fallback;
+- GLuint NewGLState;
+- DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */
+-
+ /* Vertex buffers
+ */
+- struct r200_ioctl ioctl;
+- struct r200_dma dma;
+- struct r200_store store;
+- /* A full state emit as of the first state emit in the main store, in case
+- * the context is lost.
+- */
+- struct r200_store backup_store;
+-
+- /* Page flipping
+- */
+- GLuint doPageFlip;
+-
+- /* Busy waiting
+- */
+- GLuint do_usleeps;
+- GLuint do_irqs;
+- GLuint irqsEmitted;
+- drm_radeon_irq_wait_t iw;
++ struct radeon_ioctl ioctl;
++ struct radeon_store store;
+
+ /* Clientdata textures;
+ */
+ GLuint prefer_gart_client_texturing;
+
+- /* Drawable, cliprect and scissor information
+- */
+- GLuint numClipRects; /* Cliprects for the draw buffer */
+- drm_clip_rect_t *pClipRects;
+- unsigned int lastStamp;
+- GLboolean lost_context;
+- GLboolean save_on_next_emit;
+- radeonScreenPtr r200Screen; /* Screen private DRI data */
+- drm_radeon_sarea_t *sarea; /* Private SAREA data */
+-
+ /* TCL stuff
+ */
+ GLmatrix TexGenMatrix[R200_MAX_TEXTURE_UNITS];
+@@ -893,15 +609,6 @@ struct r200_context {
+ GLuint TexGenCompSel;
+ GLmatrix tmpmat;
+
+- /* buffer swap
+- */
+- int64_t swap_ust;
+- int64_t swap_missed_ust;
+-
+- GLuint swap_count;
+- GLuint swap_missed_count;
+-
+-
+ /* r200_tcl.c
+ */
+ struct r200_tcl_info tcl;
+@@ -910,14 +617,6 @@ struct r200_context {
+ */
+ struct r200_swtcl_info swtcl;
+
+- /* Mirrors of some DRI state
+- */
+- struct r200_dri_mirror dri;
+-
+- /* Configuration cache
+- */
+- driOptionCache optionCache;
+-
+ GLboolean using_hyperz;
+ GLboolean texmicrotile;
+
+@@ -927,28 +626,10 @@ struct r200_context {
+ #define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx))
+
+
+-static INLINE GLuint r200PackColor( GLuint cpp,
+- GLubyte r, GLubyte g,
+- GLubyte b, GLubyte a )
+-{
+- switch ( cpp ) {
+- case 2:
+- return PACK_COLOR_565( r, g, b );
+- case 4:
+- return PACK_COLOR_8888( a, r, g, b );
+- default:
+- return 0;
+- }
+-}
+-
+-
+ extern void r200DestroyContext( __DRIcontextPrivate *driContextPriv );
+ extern GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ __DRIcontextPrivate *driContextPriv,
+ void *sharedContextPrivate);
+-extern void r200SwapBuffers( __DRIdrawablePrivate *dPriv );
+-extern void r200CopySubBuffer( __DRIdrawablePrivate * dPriv,
+- int x, int y, int w, int h );
+ extern GLboolean r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
+ __DRIdrawablePrivate *driDrawPriv,
+ __DRIdrawablePrivate *driReadPriv );
+@@ -957,28 +638,9 @@ extern GLboolean r200UnbindContext( __DRIcontextPrivate *driContextPriv );
+ /* ================================================================
+ * Debugging:
+ */
+-#define DO_DEBUG 1
+
+-#if DO_DEBUG
+-extern int R200_DEBUG;
+-#else
+-#define R200_DEBUG 0
+-#endif
++#define R200_DEBUG RADEON_DEBUG
++
+
+-#define DEBUG_TEXTURE 0x001
+-#define DEBUG_STATE 0x002
+-#define DEBUG_IOCTL 0x004
+-#define DEBUG_PRIMS 0x008
+-#define DEBUG_VERTS 0x010
+-#define DEBUG_FALLBACKS 0x020
+-#define DEBUG_VFMT 0x040
+-#define DEBUG_CODEGEN 0x080
+-#define DEBUG_VERBOSE 0x100
+-#define DEBUG_DRI 0x200
+-#define DEBUG_DMA 0x400
+-#define DEBUG_SANITY 0x800
+-#define DEBUG_SYNC 0x1000
+-#define DEBUG_PIXEL 0x2000
+-#define DEBUG_MEMORY 0x4000
+
+ #endif /* __R200_CONTEXT_H__ */
+diff --git a/src/mesa/drivers/dri/r200/r200_fragshader.c b/src/mesa/drivers/dri/r200/r200_fragshader.c
+index d514b28..85c1b7b 100644
+--- a/src/mesa/drivers/dri/r200/r200_fragshader.c
++++ b/src/mesa/drivers/dri/r200/r200_fragshader.c
+@@ -522,7 +522,7 @@ static void r200UpdateFSConstants( GLcontext *ctx )
+ CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.GlobalConstants[i][2]);
+ CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.GlobalConstants[i][3]);
+ }
+- rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor (
++ rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = radeonPackColor (
+ 4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] );
+ }
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
+index 0741e57..c08968f 100644
+--- a/src/mesa/drivers/dri/r200/r200_ioctl.c
++++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
+@@ -41,6 +41,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/context.h"
+ #include "swrast/swrast.h"
+
++#include "radeon_common.h"
++#include "radeon_lock.h"
+ #include "r200_context.h"
+ #include "r200_state.h"
+ #include "r200_ioctl.h"
+@@ -54,635 +56,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define R200_TIMEOUT 512
+ #define R200_IDLE_RETRY 16
+
+-
+-static void r200WaitForIdle( r200ContextPtr rmesa );
+-
+-
+-/* At this point we were in FlushCmdBufLocked but we had lost our context, so
+- * we need to unwire our current cmdbuf, hook the one with the saved state in
+- * it, flush it, and then put the current one back. This is so commands at the
+- * start of a cmdbuf can rely on the state being kept from the previous one.
+- */
+-static void r200BackUpAndEmitLostStateLocked( r200ContextPtr rmesa )
+-{
+- GLuint nr_released_bufs;
+- struct r200_store saved_store;
+-
+- if (rmesa->backup_store.cmd_used == 0)
+- return;
+-
+- if (R200_DEBUG & DEBUG_STATE)
+- fprintf(stderr, "Emitting backup state on lost context\n");
+-
+- rmesa->lost_context = GL_FALSE;
+-
+- nr_released_bufs = rmesa->dma.nr_released_bufs;
+- saved_store = rmesa->store;
+- rmesa->dma.nr_released_bufs = 0;
+- rmesa->store = rmesa->backup_store;
+- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+- rmesa->dma.nr_released_bufs = nr_released_bufs;
+- rmesa->store = saved_store;
+-}
+-
+-int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller )
+-{
+- int ret, i;
+- drm_radeon_cmd_buffer_t cmd;
+-
+- if (rmesa->lost_context)
+- r200BackUpAndEmitLostStateLocked( rmesa );
+-
+- if (R200_DEBUG & DEBUG_IOCTL) {
+- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+-
+- if (0 & R200_DEBUG & DEBUG_VERBOSE)
+- for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
+- fprintf(stderr, "%d: %x\n", i/4,
+- *(int *)(&rmesa->store.cmd_buf[i]));
+- }
+-
+- if (R200_DEBUG & DEBUG_DMA)
+- fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
+- rmesa->dma.nr_released_bufs);
+-
+-
+- if (R200_DEBUG & DEBUG_SANITY) {
+- if (rmesa->state.scissor.enabled)
+- ret = r200SanityCmdBuffer( rmesa,
+- rmesa->state.scissor.numClipRects,
+- rmesa->state.scissor.pClipRects);
+- else
+- ret = r200SanityCmdBuffer( rmesa,
+- rmesa->numClipRects,
+- rmesa->pClipRects);
+- if (ret) {
+- fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);
+- goto out;
+- }
+- }
+-
+-
+- if (R200_DEBUG & DEBUG_MEMORY) {
+- if (! driValidateTextureHeaps( rmesa->texture_heaps, rmesa->nr_heaps,
+- & rmesa->swapped ) ) {
+- fprintf( stderr, "%s: texture memory is inconsistent - expect "
+- "mangled textures\n", __FUNCTION__ );
+- }
+- }
+-
+-
+- cmd.bufsz = rmesa->store.cmd_used;
+- cmd.buf = rmesa->store.cmd_buf;
+-
+- if (rmesa->state.scissor.enabled) {
+- cmd.nbox = rmesa->state.scissor.numClipRects;
+- cmd.boxes = (drm_clip_rect_t *)rmesa->state.scissor.pClipRects;
+- } else {
+- cmd.nbox = rmesa->numClipRects;
+- cmd.boxes = (drm_clip_rect_t *)rmesa->pClipRects;
+- }
+-
+- ret = drmCommandWrite( rmesa->dri.fd,
+- DRM_RADEON_CMDBUF,
+- &cmd, sizeof(cmd) );
+-
+- if (ret)
+- fprintf(stderr, "drmCommandWrite: %d\n", ret);
+-
+- if (R200_DEBUG & DEBUG_SYNC) {
+- fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
+- r200WaitForIdleLocked( rmesa );
+- }
+-
+-
+- out:
+- rmesa->store.primnr = 0;
+- rmesa->store.statenr = 0;
+- rmesa->store.cmd_used = 0;
+- rmesa->dma.nr_released_bufs = 0;
+- rmesa->save_on_next_emit = 1;
+-
+- return ret;
+-}
+-
+-
+-/* Note: does not emit any commands to avoid recursion on
+- * r200AllocCmdBuf.
+- */
+-void r200FlushCmdBuf( r200ContextPtr rmesa, const char *caller )
+-{
+- int ret;
+-
+- LOCK_HARDWARE( rmesa );
+-
+- ret = r200FlushCmdBufLocked( rmesa, caller );
+-
+- UNLOCK_HARDWARE( rmesa );
+-
+- if (ret) {
+- fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret);
+- exit(ret);
+- }
+-}
+-
+-
+-/* =============================================================
+- * Hardware vertex buffer handling
+- */
+-
+-
+-void r200RefillCurrentDmaRegion( r200ContextPtr rmesa )
+-{
+- struct r200_dma_buffer *dmabuf;
+- int fd = rmesa->dri.fd;
+- int index = 0;
+- int size = 0;
+- drmDMAReq dma;
+- int ret;
+-
+- if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- if (rmesa->dma.flush) {
+- rmesa->dma.flush( rmesa );
+- }
+-
+- if (rmesa->dma.current.buf)
+- r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+-
+- if (rmesa->dma.nr_released_bufs > 4)
+- r200FlushCmdBuf( rmesa, __FUNCTION__ );
+-
+- dma.context = rmesa->dri.hwContext;
+- dma.send_count = 0;
+- dma.send_list = NULL;
+- dma.send_sizes = NULL;
+- dma.flags = 0;
+- dma.request_count = 1;
+- dma.request_size = RADEON_BUFFER_SIZE;
+- dma.request_list = &index;
+- dma.request_sizes = &size;
+- dma.granted_count = 0;
+-
+- LOCK_HARDWARE(rmesa); /* no need to validate */
+-
+- while (1) {
+- ret = drmDMA( fd, &dma );
+- if (ret == 0)
+- break;
+-
+- if (rmesa->dma.nr_released_bufs) {
+- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+- }
+-
+- if (rmesa->do_usleeps) {
+- UNLOCK_HARDWARE( rmesa );
+- DO_USLEEP( 1 );
+- LOCK_HARDWARE( rmesa );
+- }
+- }
+-
+- UNLOCK_HARDWARE(rmesa);
+-
+- if (R200_DEBUG & DEBUG_DMA)
+- fprintf(stderr, "Allocated buffer %d\n", index);
+-
+- dmabuf = CALLOC_STRUCT( r200_dma_buffer );
+- dmabuf->buf = &rmesa->r200Screen->buffers->list[index];
+- dmabuf->refcount = 1;
+-
+- rmesa->dma.current.buf = dmabuf;
+- rmesa->dma.current.address = dmabuf->buf->address;
+- rmesa->dma.current.end = dmabuf->buf->total;
+- rmesa->dma.current.start = 0;
+- rmesa->dma.current.ptr = 0;
+-}
+-
+-void r200ReleaseDmaRegion( r200ContextPtr rmesa,
+- struct r200_dma_region *region,
+- const char *caller )
+-{
+- if (R200_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+-
+- if (!region->buf)
+- return;
+-
+- if (rmesa->dma.flush)
+- rmesa->dma.flush( rmesa );
+-
+- if (--region->buf->refcount == 0) {
+- drm_radeon_cmd_header_t *cmd;
+-
+- if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+- fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
+- region->buf->buf->idx);
+-
+- cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sizeof(*cmd),
+- __FUNCTION__ );
+- cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
+- cmd->dma.buf_idx = region->buf->buf->idx;
+- FREE(region->buf);
+- rmesa->dma.nr_released_bufs++;
+- }
+-
+- region->buf = NULL;
+- region->start = 0;
+-}
+-
+-/* Allocates a region from rmesa->dma.current. If there isn't enough
+- * space in current, grab a new buffer (and discard what was left of current)
+- */
+-void r200AllocDmaRegion( r200ContextPtr rmesa,
+- struct r200_dma_region *region,
+- int bytes,
+- int alignment )
+-{
+- if (R200_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+-
+- if (rmesa->dma.flush)
+- rmesa->dma.flush( rmesa );
+-
+- if (region->buf)
+- r200ReleaseDmaRegion( rmesa, region, __FUNCTION__ );
+-
+- alignment--;
+- rmesa->dma.current.start = rmesa->dma.current.ptr =
+- (rmesa->dma.current.ptr + alignment) & ~alignment;
+-
+- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
+- r200RefillCurrentDmaRegion( rmesa );
+-
+- region->start = rmesa->dma.current.start;
+- region->ptr = rmesa->dma.current.start;
+- region->end = rmesa->dma.current.start + bytes;
+- region->address = rmesa->dma.current.address;
+- region->buf = rmesa->dma.current.buf;
+- region->buf->refcount++;
+-
+- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+- rmesa->dma.current.start =
+- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
+-
+- assert( rmesa->dma.current.ptr <= rmesa->dma.current.end );
+-}
+-
+-/* ================================================================
+- * SwapBuffers with client-side throttling
+- */
+-
+-static uint32_t r200GetLastFrame(r200ContextPtr rmesa)
+-{
+- drm_radeon_getparam_t gp;
+- int ret;
+- uint32_t frame;
+-
+- gp.param = RADEON_PARAM_LAST_FRAME;
+- gp.value = (int *)&frame;
+- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
+- &gp, sizeof(gp) );
+- if ( ret ) {
+- fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
+- exit(1);
+- }
+-
+- return frame;
+-}
+-
+-static void r200EmitIrqLocked( r200ContextPtr rmesa )
+-{
+- drm_radeon_irq_emit_t ie;
+- int ret;
+-
+- ie.irq_seq = &rmesa->iw.irq_seq;
+- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT,
+- &ie, sizeof(ie) );
+- if ( ret ) {
+- fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret );
+- exit(1);
+- }
+-}
+-
+-
+-static void r200WaitIrq( r200ContextPtr rmesa )
+-{
+- int ret;
+-
+- do {
+- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
+- &rmesa->iw, sizeof(rmesa->iw) );
+- } while (ret && (errno == EINTR || errno == EBUSY));
+-
+- if ( ret ) {
+- fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
+- exit(1);
+- }
+-}
+-
+-
+-static void r200WaitForFrameCompletion( r200ContextPtr rmesa )
+-{
+- drm_radeon_sarea_t *sarea = rmesa->sarea;
+-
+- if (rmesa->do_irqs) {
+- if (r200GetLastFrame(rmesa) < sarea->last_frame) {
+- if (!rmesa->irqsEmitted) {
+- while (r200GetLastFrame (rmesa) < sarea->last_frame)
+- ;
+- }
+- else {
+- UNLOCK_HARDWARE( rmesa );
+- r200WaitIrq( rmesa );
+- LOCK_HARDWARE( rmesa );
+- }
+- rmesa->irqsEmitted = 10;
+- }
+-
+- if (rmesa->irqsEmitted) {
+- r200EmitIrqLocked( rmesa );
+- rmesa->irqsEmitted--;
+- }
+- }
+- else {
+- while (r200GetLastFrame (rmesa) < sarea->last_frame) {
+- UNLOCK_HARDWARE( rmesa );
+- if (rmesa->do_usleeps)
+- DO_USLEEP( 1 );
+- LOCK_HARDWARE( rmesa );
+- }
+- }
+-}
+-
+-
+-
+-/* Copy the back color buffer to the front color buffer.
+- */
+-void r200CopyBuffer( __DRIdrawablePrivate *dPriv,
+- const drm_clip_rect_t *rect)
+-{
+- r200ContextPtr rmesa;
+- GLint nbox, i, ret;
+- GLboolean missed_target;
+- int64_t ust;
+- __DRIscreenPrivate *psp = dPriv->driScreenPriv;
+-
+- assert(dPriv);
+- assert(dPriv->driContextPriv);
+- assert(dPriv->driContextPriv->driverPrivate);
+-
+- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+-
+- if ( R200_DEBUG & DEBUG_IOCTL ) {
+- fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *)rmesa->glCtx );
+- }
+-
+- R200_FIREVERTICES( rmesa );
+-
+- LOCK_HARDWARE( rmesa );
+-
+-
+- /* Throttle the frame rate -- only allow one pending swap buffers
+- * request at a time.
+- */
+- r200WaitForFrameCompletion( rmesa );
+- if (!rect)
+- {
+- UNLOCK_HARDWARE( rmesa );
+- driWaitForVBlank( dPriv, & missed_target );
+- LOCK_HARDWARE( rmesa );
+- }
+-
+- nbox = dPriv->numClipRects; /* must be in locked region */
+-
+- for ( i = 0 ; i < nbox ; ) {
+- GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
+- drm_clip_rect_t *box = dPriv->pClipRects;
+- drm_clip_rect_t *b = rmesa->sarea->boxes;
+- GLint n = 0;
+-
+- for ( ; i < nr ; i++ ) {
+-
+- *b = box[i];
+-
+- if (rect)
+- {
+- if (rect->x1 > b->x1)
+- b->x1 = rect->x1;
+- if (rect->y1 > b->y1)
+- b->y1 = rect->y1;
+- if (rect->x2 < b->x2)
+- b->x2 = rect->x2;
+- if (rect->y2 < b->y2)
+- b->y2 = rect->y2;
+-
+- if (b->x1 >= b->x2 || b->y1 >= b->y2)
+- continue;
+- }
+-
+- b++;
+- n++;
+- }
+- rmesa->sarea->nbox = n;
+-
+- if (!n)
+- continue;
+-
+- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
+-
+- if ( ret ) {
+- fprintf( stderr, "DRM_R200_SWAP_BUFFERS: return = %d\n", ret );
+- UNLOCK_HARDWARE( rmesa );
+- exit( 1 );
+- }
+- }
+-
+- UNLOCK_HARDWARE( rmesa );
+- if (!rect)
+- {
+- rmesa->hw.all_dirty = GL_TRUE;
+-
+- rmesa->swap_count++;
+- (*psp->systemTime->getUST)( & ust );
+- if ( missed_target ) {
+- rmesa->swap_missed_count++;
+- rmesa->swap_missed_ust = ust - rmesa->swap_ust;
+- }
+-
+- rmesa->swap_ust = ust;
+-
+- sched_yield();
+- }
+-}
+-
+-void r200PageFlip( __DRIdrawablePrivate *dPriv )
++static void r200UserClear(GLcontext *ctx, GLuint flags)
+ {
+- r200ContextPtr rmesa;
+- GLint ret;
+- GLboolean missed_target;
+- __DRIscreenPrivate *psp = dPriv->driScreenPriv;
+-
+- assert(dPriv);
+- assert(dPriv->driContextPriv);
+- assert(dPriv->driContextPriv->driverPrivate);
++ if (flags & (RADEON_FRONT | RADEON_BACK)) {
+
+- rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+
+- if ( R200_DEBUG & DEBUG_IOCTL ) {
+- fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
+- rmesa->sarea->pfCurrentPage);
+- }
+-
+- R200_FIREVERTICES( rmesa );
+- LOCK_HARDWARE( rmesa );
+-
+- if (!dPriv->numClipRects) {
+- UNLOCK_HARDWARE( rmesa );
+- usleep( 10000 ); /* throttle invisible client 10ms */
+- return;
+ }
++
++ if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
++ && (flags & RADEON_CLEAR_FASTZ)) {
+
+- /* Need to do this for the perf box placement:
+- */
+- {
+- drm_clip_rect_t *box = dPriv->pClipRects;
+- drm_clip_rect_t *b = rmesa->sarea->boxes;
+- b[0] = box[0];
+- rmesa->sarea->nbox = 1;
+- }
+-
+- /* Throttle the frame rate -- only allow a few pending swap buffers
+- * request at a time.
+- */
+- r200WaitForFrameCompletion( rmesa );
+- UNLOCK_HARDWARE( rmesa );
+- driWaitForVBlank( dPriv, & missed_target );
+- if ( missed_target ) {
+- rmesa->swap_missed_count++;
+- (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
+ }
+- LOCK_HARDWARE( rmesa );
+
+- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
+-
+- UNLOCK_HARDWARE( rmesa );
+-
+- if ( ret ) {
+- fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
+- exit( 1 );
+- }
+-
+- rmesa->swap_count++;
+- (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
+-
+-#if 000
+- if ( rmesa->sarea->pfCurrentPage == 1 ) {
+- rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
+- rmesa->state.color.drawPitch = rmesa->r200Screen->frontPitch;
+- } else {
+- rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
+- rmesa->state.color.drawPitch = rmesa->r200Screen->backPitch;
+- }
+-
+- R200_STATECHANGE( rmesa, ctx );
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset
+- + rmesa->r200Screen->fbLocation;
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch;
+- if (rmesa->sarea->tiling_enabled) {
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+- }
+-#else
+- /* Get ready for drawing next frame. Update the renderbuffers'
+- * flippedOffset/Pitch fields so we draw into the right place.
+- */
+- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+- rmesa->sarea->pfCurrentPage);
+-
+-
+- r200UpdateDrawBuffer(rmesa->glCtx);
+-#endif
+ }
+
+-
+-/* ================================================================
+- * Buffer clear
+- */
+-static void r200Clear( GLcontext *ctx, GLbitfield mask )
++static void r200KernelClear(GLcontext *ctx, GLuint flags)
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+- GLuint flags = 0;
+- GLuint color_mask = 0;
+- GLint ret, i;
+- GLint cx, cy, cw, ch;
++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
++ GLint cx, cy, cw, ch, ret;
++ GLuint i;
+
+- if ( R200_DEBUG & DEBUG_IOCTL ) {
+- fprintf( stderr, "r200Clear\n");
+- }
+-
+- {
+- LOCK_HARDWARE( rmesa );
+- UNLOCK_HARDWARE( rmesa );
+- if ( dPriv->numClipRects == 0 )
+- return;
+- }
+-
+- r200Flush( ctx );
+-
+- if ( mask & BUFFER_BIT_FRONT_LEFT ) {
+- flags |= RADEON_FRONT;
+- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+- mask &= ~BUFFER_BIT_FRONT_LEFT;
+- }
+-
+- if ( mask & BUFFER_BIT_BACK_LEFT ) {
+- flags |= RADEON_BACK;
+- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+- mask &= ~BUFFER_BIT_BACK_LEFT;
+- }
+-
+- if ( mask & BUFFER_BIT_DEPTH ) {
+- flags |= RADEON_DEPTH;
+- mask &= ~BUFFER_BIT_DEPTH;
+- }
+-
+- if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
+- flags |= RADEON_STENCIL;
+- mask &= ~BUFFER_BIT_STENCIL;
+- }
+-
+- if ( mask ) {
+- if (R200_DEBUG & DEBUG_FALLBACKS)
+- fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
+- _swrast_Clear( ctx, mask );
+- }
+-
+- if ( !flags )
+- return;
+-
+- if (rmesa->using_hyperz) {
+- flags |= RADEON_USE_COMP_ZBUF;
+-/* if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200)
+- flags |= RADEON_USE_HIERZ; */
+- if (!(rmesa->state.stencil.hwBuffer) ||
+- ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+- ((rmesa->state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
+- flags |= RADEON_CLEAR_FASTZ;
+- }
+- }
+-
+- LOCK_HARDWARE( rmesa );
+-
+- /* compute region after locking: */
+- cx = ctx->DrawBuffer->_Xmin;
+- cy = ctx->DrawBuffer->_Ymin;
+- cw = ctx->DrawBuffer->_Xmax - cx;
+- ch = ctx->DrawBuffer->_Ymax - cy;
+-
+- /* Flip top to bottom */
+- cx += dPriv->x;
+- cy = dPriv->y + dPriv->h - cy - ch;
++ LOCK_HARDWARE( &rmesa->radeon );
+
+ /* Throttle the number of clear ioctls we do.
+ */
+@@ -693,7 +88,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
+
+ gp.param = RADEON_PARAM_LAST_CLEAR;
+ gp.value = (int *)&clear;
+- ret = drmCommandWriteRead( rmesa->dri.fd,
++ ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
+ DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
+
+ if ( ret ) {
+@@ -703,24 +98,34 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
+
+ /* Clear throttling needs more thought.
+ */
+- if ( rmesa->sarea->last_clear - clear <= 25 ) {
++ if ( rmesa->radeon.sarea->last_clear - clear <= 25 ) {
+ break;
+ }
+
+- if (rmesa->do_usleeps) {
+- UNLOCK_HARDWARE( rmesa );
++ if (rmesa->radeon.do_usleeps) {
++ UNLOCK_HARDWARE( &rmesa->radeon );
+ DO_USLEEP( 1 );
+- LOCK_HARDWARE( rmesa );
++ LOCK_HARDWARE( &rmesa->radeon );
+ }
+ }
+
+ /* Send current state to the hardware */
+- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
++
++
++ /* compute region after locking: */
++ cx = ctx->DrawBuffer->_Xmin;
++ cy = ctx->DrawBuffer->_Ymin;
++ cw = ctx->DrawBuffer->_Xmax - cx;
++ ch = ctx->DrawBuffer->_Ymax - cy;
+
++ /* Flip top to bottom */
++ cx += dPriv->x;
++ cy = dPriv->y + dPriv->h - cy - ch;
+ for ( i = 0 ; i < dPriv->numClipRects ; ) {
+ GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
+ drm_clip_rect_t *box = dPriv->pClipRects;
+- drm_clip_rect_t *b = rmesa->sarea->boxes;
++ drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
+ drm_radeon_clear_t clear;
+ drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
+ GLint n = 0;
+@@ -755,17 +160,17 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
+ }
+ }
+
+- rmesa->sarea->nbox = n;
++ rmesa->radeon.sarea->nbox = n;
+
+ clear.flags = flags;
+- clear.clear_color = rmesa->state.color.clear;
+- clear.clear_depth = rmesa->state.depth.clear; /* needed for hyperz */
++ clear.clear_color = rmesa->radeon.state.color.clear;
++ clear.clear_depth = rmesa->radeon.state.depth.clear; /* needed for hyperz */
+ clear.color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+- clear.depth_mask = rmesa->state.stencil.clear;
++ clear.depth_mask = rmesa->radeon.state.stencil.clear;
+ clear.depth_boxes = depth_boxes;
+
+ n--;
+- b = rmesa->sarea->boxes;
++ b = rmesa->radeon.sarea->boxes;
+ for ( ; n >= 0 ; n-- ) {
+ depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
+ depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
+@@ -774,83 +179,91 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
+ depth_boxes[n].f[CLEAR_DEPTH] = ctx->Depth.Clear;
+ }
+
+- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
++ ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
+ &clear, sizeof(clear));
+
+
+ if ( ret ) {
+- UNLOCK_HARDWARE( rmesa );
++ UNLOCK_HARDWARE( &rmesa->radeon );
+ fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
+ exit( 1 );
+ }
+ }
+-
+- UNLOCK_HARDWARE( rmesa );
+- rmesa->hw.all_dirty = GL_TRUE;
++ UNLOCK_HARDWARE( &rmesa->radeon );
+ }
+-
+-
+-void r200WaitForIdleLocked( r200ContextPtr rmesa )
++/* ================================================================
++ * Buffer clear
++ */
++static void r200Clear( GLcontext *ctx, GLbitfield mask )
+ {
+- int ret;
+- int i = 0;
+-
+- do {
+- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_CP_IDLE);
+- if (ret)
+- DO_USLEEP( 1 );
+- } while (ret && ++i < 100);
+-
+- if ( ret < 0 ) {
+- UNLOCK_HARDWARE( rmesa );
+- fprintf( stderr, "Error: R200 timed out... exiting\n" );
+- exit( -1 );
+- }
+-}
++ r200ContextPtr rmesa = R200_CONTEXT(ctx);
++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
++ GLuint flags = 0;
++ GLuint color_mask = 0;
++ GLint ret;
+
++ if ( R200_DEBUG & DEBUG_IOCTL ) {
++ fprintf( stderr, "r200Clear\n");
++ }
+
+-static void r200WaitForIdle( r200ContextPtr rmesa )
+-{
+- LOCK_HARDWARE(rmesa);
+- r200WaitForIdleLocked( rmesa );
+- UNLOCK_HARDWARE(rmesa);
+-}
++ {
++ LOCK_HARDWARE( &rmesa->radeon );
++ UNLOCK_HARDWARE( &rmesa->radeon );
++ if ( dPriv->numClipRects == 0 )
++ return;
++ }
+
++ radeonFlush( ctx );
+
+-void r200Flush( GLcontext *ctx )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT( ctx );
++ if ( mask & BUFFER_BIT_FRONT_LEFT ) {
++ flags |= RADEON_FRONT;
++ color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
++ mask &= ~BUFFER_BIT_FRONT_LEFT;
++ }
+
+- if (R200_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s\n", __FUNCTION__);
++ if ( mask & BUFFER_BIT_BACK_LEFT ) {
++ flags |= RADEON_BACK;
++ color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
++ mask &= ~BUFFER_BIT_BACK_LEFT;
++ }
+
+- if (rmesa->dma.flush)
+- rmesa->dma.flush( rmesa );
++ if ( mask & BUFFER_BIT_DEPTH ) {
++ flags |= RADEON_DEPTH;
++ mask &= ~BUFFER_BIT_DEPTH;
++ }
+
+- r200EmitState( rmesa );
+-
+- if (rmesa->store.cmd_used)
+- r200FlushCmdBuf( rmesa, __FUNCTION__ );
+-}
++ if ( (mask & BUFFER_BIT_STENCIL) && rmesa->radeon.state.stencil.hwBuffer ) {
++ flags |= RADEON_STENCIL;
++ mask &= ~BUFFER_BIT_STENCIL;
++ }
+
+-/* Make sure all commands have been sent to the hardware and have
+- * completed processing.
+- */
+-void r200Finish( GLcontext *ctx )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- r200Flush( ctx );
++ if ( mask ) {
++ if (R200_DEBUG & DEBUG_FALLBACKS)
++ fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
++ _swrast_Clear( ctx, mask );
++ }
++
++ if ( !flags )
++ return;
+
+- if (rmesa->do_irqs) {
+- LOCK_HARDWARE( rmesa );
+- r200EmitIrqLocked( rmesa );
+- UNLOCK_HARDWARE( rmesa );
+- r200WaitIrq( rmesa );
++ if (rmesa->using_hyperz) {
++ flags |= RADEON_USE_COMP_ZBUF;
++/* if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
++ flags |= RADEON_USE_HIERZ; */
++ if (!(rmesa->radeon.state.stencil.hwBuffer) ||
++ ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
++ ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
++ flags |= RADEON_CLEAR_FASTZ;
++ }
+ }
+- else
+- r200WaitForIdle( rmesa );
+-}
+
++ if (rmesa->radeon.radeonScreen->kernel_mm)
++ r200UserClear(ctx, flags);
++ else
++ r200KernelClear(ctx, flags);
++
++ rmesa->radeon.hw.all_dirty = GL_TRUE;
++}
+
+ /* This version of AllocateMemoryMESA allocates only GART memory, and
+ * only does so after the point at which the driver has been
+@@ -875,7 +288,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
+ fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq,
+ writefreq, priority);
+
+- if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map)
++ if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map)
+ return NULL;
+
+ if (getenv("R200_NO_ALLOC"))
+@@ -886,7 +299,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
+ alloc.size = size;
+ alloc.region_offset = &region_offset;
+
+- ret = drmCommandWriteRead( rmesa->r200Screen->driScreen->fd,
++ ret = drmCommandWriteRead( rmesa->radeon.radeonScreen->driScreen->fd,
+ DRM_RADEON_ALLOC,
+ &alloc, sizeof(alloc));
+
+@@ -896,7 +309,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
+ }
+
+ {
+- char *region_start = (char *)rmesa->r200Screen->gartTextures.map;
++ char *region_start = (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+ return (void *)(region_start + region_offset);
+ }
+ }
+@@ -914,24 +327,24 @@ void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer)
+ if (R200_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s %p\n", __FUNCTION__, pointer);
+
+- if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map) {
++ if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) {
+ fprintf(stderr, "%s: no context\n", __FUNCTION__);
+ return;
+ }
+
+- region_offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
++ region_offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+
+ if (region_offset < 0 ||
+- region_offset > rmesa->r200Screen->gartTextures.size) {
++ region_offset > rmesa->radeon.radeonScreen->gartTextures.size) {
+ fprintf(stderr, "offset %d outside range 0..%d\n", region_offset,
+- rmesa->r200Screen->gartTextures.size);
++ rmesa->radeon.radeonScreen->gartTextures.size);
+ return;
+ }
+
+ memfree.region = RADEON_MEM_REGION_GART;
+ memfree.region_offset = region_offset;
+
+- ret = drmCommandWrite( rmesa->r200Screen->driScreen->fd,
++ ret = drmCommandWrite( rmesa->radeon.radeonScreen->driScreen->fd,
+ DRM_RADEON_FREE,
+ &memfree, sizeof(memfree));
+
+@@ -956,16 +369,16 @@ GLuint r200GetMemoryOffsetMESA(__DRIscreen *screen, const GLvoid *pointer)
+
+ card_offset = r200GartOffsetFromVirtual( rmesa, pointer );
+
+- return card_offset - rmesa->r200Screen->gart_base;
++ return card_offset - rmesa->radeon.radeonScreen->gart_base;
+ }
+
+ GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
+ GLint size )
+ {
+- ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
++ ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+ int valid = (size >= 0 &&
+ offset >= 0 &&
+- offset + size < rmesa->r200Screen->gartTextures.size);
++ offset + size < rmesa->radeon.radeonScreen->gartTextures.size);
+
+ if (R200_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "r200IsGartMemory( %p ) : %d\n", pointer, valid );
+@@ -976,12 +389,12 @@ GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
+
+ GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
+ {
+- ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
++ ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+
+- if (offset < 0 || offset > rmesa->r200Screen->gartTextures.size)
++ if (offset < 0 || offset > rmesa->radeon.radeonScreen->gartTextures.size)
+ return ~0;
+ else
+- return rmesa->r200Screen->gart_texture_offset + offset;
++ return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
+ }
+
+
+@@ -989,7 +402,7 @@ GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
+ void r200InitIoctlFuncs( struct dd_function_table *functions )
+ {
+ functions->Clear = r200Clear;
+- functions->Finish = r200Finish;
+- functions->Flush = r200Flush;
++ functions->Finish = radeonFinish;
++ functions->Flush = radeonFlush;
+ }
+
+diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h
+index f7458e4..2a4b8a1 100644
+--- a/src/mesa/drivers/dri/r200/r200_ioctl.h
++++ b/src/mesa/drivers/dri/r200/r200_ioctl.h
+@@ -37,65 +37,30 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #include "main/simple_list.h"
+ #include "radeon_dri.h"
+-#include "r200_lock.h"
++
++#include "radeon_bocs_wrapper.h"
+
+ #include "xf86drm.h"
+ #include "drm.h"
+ #include "radeon_drm.h"
+
+-extern void r200EmitState( r200ContextPtr rmesa );
+ extern void r200EmitVertexAOS( r200ContextPtr rmesa,
+- GLuint vertex_size,
+- GLuint offset );
++ GLuint vertex_size,
++ struct radeon_bo *bo,
++ GLuint offset );
+
+ extern void r200EmitVbufPrim( r200ContextPtr rmesa,
+ GLuint primitive,
+ GLuint vertex_nr );
+
+-extern void r200FlushElts( r200ContextPtr rmesa );
++extern void r200FlushElts(GLcontext *ctx);
+
+ extern GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+ GLuint primitive,
+ GLuint min_nr );
+
+-extern void r200EmitAOS( r200ContextPtr rmesa,
+- struct r200_dma_region **regions,
+- GLuint n,
+- GLuint offset );
+-
+-extern void r200EmitBlit( r200ContextPtr rmesa,
+- GLuint color_fmt,
+- GLuint src_pitch,
+- GLuint src_offset,
+- GLuint dst_pitch,
+- GLuint dst_offset,
+- GLint srcx, GLint srcy,
+- GLint dstx, GLint dsty,
+- GLuint w, GLuint h );
+-
+-extern void r200EmitWait( r200ContextPtr rmesa, GLuint flags );
+-
+-extern void r200FlushCmdBuf( r200ContextPtr rmesa, const char * );
+-extern int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller );
+-
+-extern void r200RefillCurrentDmaRegion( r200ContextPtr rmesa );
+-
+-extern void r200AllocDmaRegion( r200ContextPtr rmesa,
+- struct r200_dma_region *region,
+- int bytes,
+- int alignment );
+-
+-extern void r200ReleaseDmaRegion( r200ContextPtr rmesa,
+- struct r200_dma_region *region,
+- const char *caller );
+-
+-extern void r200CopyBuffer( __DRIdrawablePrivate *drawable,
+- const drm_clip_rect_t *rect);
+-extern void r200PageFlip( __DRIdrawablePrivate *drawable );
+-extern void r200Flush( GLcontext *ctx );
+-extern void r200Finish( GLcontext *ctx );
+-extern void r200WaitForIdleLocked( r200ContextPtr rmesa );
+-extern void r200WaitForVBlank( r200ContextPtr rmesa );
++extern void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset);
++
+ extern void r200InitIoctlFuncs( struct dd_function_table *functions );
+
+ extern void *r200AllocateMemoryMESA( __DRIscreen *screen, GLsizei size, GLfloat readfreq,
+@@ -119,8 +84,8 @@ void r200SetUpAtomList( r200ContextPtr rmesa );
+ */
+ #define R200_NEWPRIM( rmesa ) \
+ do { \
+- if ( rmesa->dma.flush ) \
+- rmesa->dma.flush( rmesa ); \
++ if ( rmesa->radeon.dma.flush ) \
++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \
+ } while (0)
+
+ /* Can accomodate several state changes and primitive changes without
+@@ -130,7 +95,7 @@ do { \
+ do { \
+ R200_NEWPRIM( rmesa ); \
+ rmesa->hw.ATOM.dirty = GL_TRUE; \
+- rmesa->hw.is_dirty = GL_TRUE; \
++ rmesa->radeon.hw.is_dirty = GL_TRUE; \
+ } while (0)
+
+ #define R200_DB_STATE( ATOM ) \
+@@ -139,13 +104,13 @@ do { \
+
+ static INLINE int R200_DB_STATECHANGE(
+ r200ContextPtr rmesa,
+- struct r200_state_atom *atom )
++ struct radeon_state_atom *atom )
+ {
+ if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+- int *tmp;
++ GLuint *tmp;
+ R200_NEWPRIM( rmesa );
+ atom->dirty = GL_TRUE;
+- rmesa->hw.is_dirty = GL_TRUE;
++ rmesa->radeon.hw.is_dirty = GL_TRUE;
+ tmp = atom->cmd;
+ atom->cmd = atom->lastcmd;
+ atom->lastcmd = tmp;
+@@ -156,15 +121,6 @@ static INLINE int R200_DB_STATECHANGE(
+ }
+
+
+-/* Fire the buffered vertices no matter what.
+- */
+-#define R200_FIREVERTICES( rmesa ) \
+-do { \
+- if ( rmesa->store.cmd_used || rmesa->dma.flush ) { \
+- r200Flush( rmesa->glCtx ); \
+- } \
+-} while (0)
+-
+ /* Command lengths. Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
+ * are available, you will also be adding an rmesa->state.max_state_size because
+ * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
+@@ -174,36 +130,36 @@ do { \
+ #define ELTS_BUFSZ(nr) (12 + nr * 2)
+ #define VBUF_BUFSZ (3 * sizeof(int))
+
+-/* Ensure that a minimum amount of space is available in the command buffer.
+- * This is used to ensure atomicity of state updates with the rendering requests
+- * that rely on them.
+- *
+- * An alternative would be to implement a "soft lock" such that when the buffer
+- * wraps at an inopportune time, we grab the lock, flush the current buffer,
+- * and hang on to the lock until the critical section is finished and we flush
+- * the buffer again and unlock.
+- */
+-static INLINE void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes )
++static inline uint32_t cmdpacket3(int cmd_type)
+ {
+- if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
+- r200FlushCmdBuf( rmesa, __FUNCTION__ );
+- assert( bytes <= R200_CMD_BUF_SZ );
+-}
++ drm_radeon_cmd_header_t cmd;
+
+-/* Alloc space in the command buffer
+- */
+-static INLINE char *r200AllocCmdBuf( r200ContextPtr rmesa,
+- int bytes, const char *where )
+-{
+- char * head;
++ cmd.i = 0;
++ cmd.header.cmd_type = cmd_type;
+
+- if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
+- r200FlushCmdBuf( rmesa, where );
++ return (uint32_t)cmd.i;
+
+- head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+- rmesa->store.cmd_used += bytes;
+- assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
+- return head;
+ }
+
++#define OUT_BATCH_PACKET3(packet, num_extra) do { \
++ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
++ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3)); \
++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
++ } else { \
++ OUT_BATCH(CP_PACKET2); \
++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
++ } \
++ } while(0)
++
++#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do { \
++ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
++ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP)); \
++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
++ } else { \
++ OUT_BATCH(CP_PACKET2); \
++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
++ } \
++ } while(0)
++
++
+ #endif /* __R200_IOCTL_H__ */
+diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c
+deleted file mode 100644
+index 99661a4..0000000
+--- a/src/mesa/drivers/dri/r200/r200_lock.c
++++ /dev/null
+@@ -1,116 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#include "r200_context.h"
+-#include "r200_lock.h"
+-#include "r200_tex.h"
+-#include "r200_state.h"
+-#include "r200_ioctl.h"
+-
+-#include "drirenderbuffer.h"
+-
+-
+-#if DEBUG_LOCKING
+-char *prevLockFile = NULL;
+-int prevLockLine = 0;
+-#endif
+-
+-/* Turn on/off page flipping according to the flags in the sarea:
+- */
+-static void
+-r200UpdatePageFlipping( r200ContextPtr rmesa )
+-{
+- rmesa->doPageFlip = rmesa->sarea->pfState;
+- if (rmesa->glCtx->WinSysDrawBuffer) {
+- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+- rmesa->sarea->pfCurrentPage);
+- }
+-}
+-
+-
+-
+-/* Update the hardware state. This is called if another main/context.has
+- * grabbed the hardware lock, which includes the X server. This
+- * function also updates the driver's window state after the X server
+- * moves, resizes or restacks a window -- the change will be reflected
+- * in the drawable position and clip rects. Since the X server grabs
+- * the hardware lock when it changes the window state, this routine will
+- * automatically be called after such a change.
+- */
+-void r200GetLock( r200ContextPtr rmesa, GLuint flags )
+-{
+- __DRIdrawablePrivate *drawable = rmesa->dri.drawable;
+- __DRIdrawablePrivate *readable = rmesa->dri.readable;
+- __DRIscreenPrivate *sPriv = rmesa->dri.screen;
+- drm_radeon_sarea_t *sarea = rmesa->sarea;
+- int i;
+-
+- drmGetLock( rmesa->dri.fd, rmesa->dri.hwContext, flags );
+-
+- /* The window might have moved, so we might need to get new clip
+- * rects.
+- *
+- * NOTE: This releases and regrabs the hw lock to allow the X server
+- * to respond to the DRI protocol request for new drawable info.
+- * Since the hardware state depends on having the latest drawable
+- * clip rects, all state checking must be done _after_ this call.
+- */
+- DRI_VALIDATE_DRAWABLE_INFO( sPriv, drawable );
+- if (drawable != readable) {
+- DRI_VALIDATE_DRAWABLE_INFO( sPriv, readable );
+- }
+-
+- if ( rmesa->lastStamp != drawable->lastStamp ) {
+- r200UpdatePageFlipping( rmesa );
+- r200SetCliprects( rmesa );
+- r200UpdateViewportOffset( rmesa->glCtx );
+- driUpdateFramebufferSize(rmesa->glCtx, drawable);
+- }
+-
+- R200_STATECHANGE( rmesa, ctx );
+- if (rmesa->sarea->tiling_enabled) {
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+- }
+- else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
+-
+- if ( sarea->ctx_owner != rmesa->dri.hwContext ) {
+- sarea->ctx_owner = rmesa->dri.hwContext;
+- }
+-
+- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+- DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] );
+- }
+-
+- rmesa->lost_context = GL_TRUE;
+-}
+diff --git a/src/mesa/drivers/dri/r200/r200_lock.h b/src/mesa/drivers/dri/r200/r200_lock.h
+deleted file mode 100644
+index 4ff9890..0000000
+--- a/src/mesa/drivers/dri/r200/r200_lock.h
++++ /dev/null
+@@ -1,106 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#ifndef __R200_LOCK_H__
+-#define __R200_LOCK_H__
+-
+-extern void r200GetLock( r200ContextPtr rmesa, GLuint flags );
+-
+-/* Turn DEBUG_LOCKING on to find locking conflicts.
+- */
+-#define DEBUG_LOCKING 0
+-
+-#if DEBUG_LOCKING
+-extern char *prevLockFile;
+-extern int prevLockLine;
+-
+-#define DEBUG_LOCK() \
+- do { \
+- prevLockFile = (__FILE__); \
+- prevLockLine = (__LINE__); \
+- } while (0)
+-
+-#define DEBUG_RESET() \
+- do { \
+- prevLockFile = 0; \
+- prevLockLine = 0; \
+- } while (0)
+-
+-#define DEBUG_CHECK_LOCK() \
+- do { \
+- if ( prevLockFile ) { \
+- fprintf( stderr, \
+- "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \
+- prevLockFile, prevLockLine, __FILE__, __LINE__ ); \
+- exit( 1 ); \
+- } \
+- } while (0)
+-
+-#else
+-
+-#define DEBUG_LOCK()
+-#define DEBUG_RESET()
+-#define DEBUG_CHECK_LOCK()
+-
+-#endif
+-
+-/*
+- * !!! We may want to separate locks from locks with validation. This
+- * could be used to improve performance for those things commands that
+- * do not do any drawing !!!
+- */
+-
+-
+-/* Lock the hardware and validate our state.
+- */
+-#define LOCK_HARDWARE( rmesa ) \
+- do { \
+- char __ret = 0; \
+- DEBUG_CHECK_LOCK(); \
+- DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext, \
+- (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret ); \
+- if ( __ret ) \
+- r200GetLock( rmesa, 0 ); \
+- DEBUG_LOCK(); \
+- } while (0)
+-
+-#define UNLOCK_HARDWARE( rmesa ) \
+- do { \
+- DRM_UNLOCK( rmesa->dri.fd, \
+- rmesa->dri.hwLock, \
+- rmesa->dri.hwContext ); \
+- DEBUG_RESET(); \
+- } while (0)
+-
+-#endif /* __R200_LOCK_H__ */
+diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
+index 8512b9a..5dbc202 100644
+--- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c
++++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
+@@ -50,110 +50,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r200_maos.h"
+ #include "r200_tcl.h"
+
+-
+-#if 0
+-/* Usage:
+- * - from r200_tcl_render
+- * - call r200EmitArrays to ensure uptodate arrays in dma
+- * - emit primitives (new type?) which reference the data
+- * -- need to use elts for lineloop, quads, quadstrip/flat
+- * -- other primitives are all well-formed (need tristrip-1,fake-poly)
+- *
+- */
+-static void emit_ubyte_rgba3( GLcontext *ctx,
+- struct r200_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- r200_color_t *out = (r200_color_t *)(rvb->start + rvb->address);
+-
+- if (R200_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d out %p\n",
+- __FUNCTION__, count, stride, (void *)out);
+-
+- for (i = 0; i < count; i++) {
+- out->red = *data;
+- out->green = *(data+1);
+- out->blue = *(data+2);
+- out->alpha = 0xFF;
+- out++;
+- data += stride;
+- }
+-}
+-
+-static void emit_ubyte_rgba4( GLcontext *ctx,
+- struct r200_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (R200_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d\n",
+- __FUNCTION__, count, stride);
+-
+- if (stride == 4) {
+- for (i = 0; i < count; i++)
+- ((int *)out)[i] = LE32_TO_CPU(((int *)data)[i]);
+- } else {
+- for (i = 0; i < count; i++) {
+- *(int *)out++ = LE32_TO_CPU(*(int *)data);
+- data += stride;
+- }
+- }
+-}
+-
+-
+-static void emit_ubyte_rgba( GLcontext *ctx,
+- struct r200_dma_region *rvb,
+- char *data,
+- int size,
+- int stride,
+- int count )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+- if (R200_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+-
+- assert (!rvb->buf);
+-
+- if (stride == 0) {
+- r200AllocDmaRegion( rmesa, rvb, 4, 4 );
+- count = 1;
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = 0;
+- rvb->aos_size = 1;
+- }
+- else {
+- r200AllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = 1;
+- rvb->aos_size = 1;
+- }
+-
+- /* Emit the data
+- */
+- switch (size) {
+- case 3:
+- emit_ubyte_rgba3( ctx, rvb, data, stride, count );
+- break;
+- case 4:
+- emit_ubyte_rgba4( ctx, rvb, data, stride, count );
+- break;
+- default:
+- assert(0);
+- exit(1);
+- break;
+- }
+-}
+-#endif
+-
+-
+ #if defined(USE_X86_ASM)
+ #define COPY_DWORDS( dst, src, nr ) \
+ do { \
+@@ -174,204 +70,34 @@ do { \
+ } while (0)
+ #endif
+
+-
+-static void emit_vecfog( GLcontext *ctx,
+- struct r200_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
++static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
++ GLvoid *data, int stride, int count)
+ {
+- int i;
+- GLfloat *out;
+-
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+- if (R200_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d\n",
+- __FUNCTION__, count, stride);
+-
+- assert (!rvb->buf);
+-
+- if (stride == 0) {
+- r200AllocDmaRegion( rmesa, rvb, 4, 4 );
+- count = 1;
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = 0;
+- rvb->aos_size = 1;
+- }
+- else {
+- r200AllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = 1;
+- rvb->aos_size = 1;
+- }
+-
+- /* Emit the data
+- */
+- out = (GLfloat *)(rvb->address + rvb->start);
+- for (i = 0; i < count; i++) {
+- out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data );
+- out++;
+- data += stride;
+- }
+-
++ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ uint32_t *out;
++ int i;
++ int size = 1;
++
++ if (stride == 0) {
++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
++ count = 1;
++ aos->stride = 0;
++ } else {
++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
++ aos->stride = size;
++ }
++
++ aos->components = size;
++ aos->count = count;
++
++ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
++ for (i = 0; i < count; i++) {
++ out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data );
++ out++;
++ data += stride;
++ }
+ }
+
+-
+-static void emit_vec4( GLcontext *ctx,
+- struct r200_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (R200_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d\n",
+- __FUNCTION__, count, stride);
+-
+- if (stride == 4)
+- COPY_DWORDS( out, data, count );
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out++;
+- data += stride;
+- }
+-}
+-
+-
+-static void emit_vec8( GLcontext *ctx,
+- struct r200_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (R200_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d\n",
+- __FUNCTION__, count, stride);
+-
+- if (stride == 8)
+- COPY_DWORDS( out, data, count*2 );
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out[1] = *(int *)(data+4);
+- out += 2;
+- data += stride;
+- }
+-}
+-
+-static void emit_vec12( GLcontext *ctx,
+- struct r200_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (R200_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+- __FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+- if (stride == 12)
+- COPY_DWORDS( out, data, count*3 );
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out[1] = *(int *)(data+4);
+- out[2] = *(int *)(data+8);
+- out += 3;
+- data += stride;
+- }
+-}
+-
+-static void emit_vec16( GLcontext *ctx,
+- struct r200_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (R200_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d\n",
+- __FUNCTION__, count, stride);
+-
+- if (stride == 16)
+- COPY_DWORDS( out, data, count*4 );
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out[1] = *(int *)(data+4);
+- out[2] = *(int *)(data+8);
+- out[3] = *(int *)(data+12);
+- out += 4;
+- data += stride;
+- }
+-}
+-
+-
+-static void emit_vector( GLcontext *ctx,
+- struct r200_dma_region *rvb,
+- char *data,
+- int size,
+- int stride,
+- int count )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+- if (R200_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d size %d stride %d\n",
+- __FUNCTION__, count, size, stride);
+-
+- assert (!rvb->buf);
+-
+- if (stride == 0) {
+- r200AllocDmaRegion( rmesa, rvb, size * 4, 4 );
+- count = 1;
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = 0;
+- rvb->aos_size = size;
+- }
+- else {
+- r200AllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = size;
+- rvb->aos_size = size;
+- }
+-
+- /* Emit the data
+- */
+- switch (size) {
+- case 1:
+- emit_vec4( ctx, rvb, data, stride, count );
+- break;
+- case 2:
+- emit_vec8( ctx, rvb, data, stride, count );
+- break;
+- case 3:
+- emit_vec12( ctx, rvb, data, stride, count );
+- break;
+- case 4:
+- emit_vec16( ctx, rvb, data, stride, count );
+- break;
+- default:
+- assert(0);
+- exit(1);
+- break;
+- }
+-
+-}
+-
+-
+-
+ /* Emit any changed arrays to new GART memory, re-emit a packet to
+ * update the arrays.
+ */
+@@ -379,12 +105,12 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT( ctx );
+ struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+- struct r200_dma_region **component = rmesa->tcl.aos_components;
+ GLuint nr = 0;
+ GLuint vfmt0 = 0, vfmt1 = 0;
+ GLuint count = VB->Count;
+ GLuint i, emitsize;
+
++ // fprintf(stderr,"emit arrays\n");
+ for ( i = 0; i < 15; i++ ) {
+ GLubyte attrib = vimap_rev[i];
+ if (attrib != 255) {
+@@ -416,20 +142,20 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
+ case 3:
+ /* special handling to fix up fog. Will get us into trouble with vbos...*/
+ assert(attrib == VERT_ATTRIB_FOG);
+- if (!rmesa->tcl.vertex_data[i].buf) {
++ if (!rmesa->tcl.aos[i].bo) {
+ if (ctx->VertexProgram._Enabled)
+- emit_vector( ctx,
+- &(rmesa->tcl.vertex_data[i]),
+- (char *)VB->AttribPtr[attrib]->data,
+- 1,
+- VB->AttribPtr[attrib]->stride,
+- count);
++ rcommon_emit_vector( ctx,
++ &(rmesa->tcl.aos[nr]),
++ (char *)VB->AttribPtr[attrib]->data,
++ 1,
++ VB->AttribPtr[attrib]->stride,
++ count);
+ else
+- emit_vecfog( ctx,
+- &(rmesa->tcl.vertex_data[i]),
+- (char *)VB->AttribPtr[attrib]->data,
+- VB->AttribPtr[attrib]->stride,
+- count);
++ r200_emit_vecfog( ctx,
++ &(rmesa->tcl.aos[nr]),
++ (char *)VB->AttribPtr[attrib]->data,
++ VB->AttribPtr[attrib]->stride,
++ count);
+ }
+ vfmt0 |= R200_VTX_DISCRETE_FOG;
+ goto after_emit;
+@@ -473,17 +199,17 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
+ default:
+ assert(0);
+ }
+- if (!rmesa->tcl.vertex_data[i].buf) {
+- emit_vector( ctx,
+- &(rmesa->tcl.vertex_data[i]),
+- (char *)VB->AttribPtr[attrib]->data,
+- emitsize,
+- VB->AttribPtr[attrib]->stride,
+- count );
++ if (!rmesa->tcl.aos[nr].bo) {
++ rcommon_emit_vector( ctx,
++ &(rmesa->tcl.aos[nr]),
++ (char *)VB->AttribPtr[attrib]->data,
++ emitsize,
++ VB->AttribPtr[attrib]->stride,
++ count );
+ }
+ after_emit:
+ assert(nr < 12);
+- component[nr++] = &rmesa->tcl.vertex_data[i];
++ nr++;
+ }
+ }
+
+@@ -501,12 +227,11 @@ after_emit:
+ void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT( ctx );
+-
+- /* only do it for changed inputs ? */
+ int i;
+- for (i = 0; i < 15; i++) {
+- if (newinputs & (1 << i))
+- r200ReleaseDmaRegion( rmesa,
+- &rmesa->tcl.vertex_data[i], __FUNCTION__ );
++ for (i = 0; i < rmesa->tcl.nr_aos_components; i++) {
++ if (rmesa->tcl.aos[i].bo) {
++ radeon_bo_unref(rmesa->tcl.aos[i].bo);
++ rmesa->tcl.aos[i].bo = NULL;
++ }
+ }
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c
+index be68821..a6c6558 100644
+--- a/src/mesa/drivers/dri/r200/r200_pixel.c
++++ b/src/mesa/drivers/dri/r200/r200_pixel.c
+@@ -51,7 +51,7 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format,
+ const void *pixels, GLint sz, GLint pitch )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- GLuint cpp = rmesa->r200Screen->cpp;
++ GLuint cpp = rmesa->radeon.radeonScreen->cpp;
+
+ if (R200_DEBUG & DEBUG_PIXEL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+@@ -137,8 +137,8 @@ clip_pixelrect( const GLcontext *ctx,
+ if (*height <= 0)
+ return GL_FALSE;
+
+- *size = ((*y + *height - 1) * rmesa->r200Screen->frontPitch +
+- (*x + *width - 1) * rmesa->r200Screen->cpp);
++ *size = ((*y + *height - 1) * rmesa->radeon.radeonScreen->frontPitch +
++ (*x + *width - 1) * rmesa->radeon.radeonScreen->cpp);
+
+ return GL_TRUE;
+ }
+@@ -153,19 +153,20 @@ r200TryReadPixels( GLcontext *ctx,
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ GLint pitch = pack->RowLength ? pack->RowLength : width;
+ GLint blit_format;
+- GLuint cpp = rmesa->r200Screen->cpp;
++ GLuint cpp = rmesa->radeon.radeonScreen->cpp;
+ GLint size = width * height * cpp;
+
++ return GL_FALSE;
++#if 0
+ if (R200_DEBUG & DEBUG_PIXEL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ /* Only accelerate reading to GART buffers.
+ */
+ if ( !r200IsGartMemory(rmesa, pixels,
+- pitch * height * rmesa->r200Screen->cpp ) ) {
++ pitch * height * rmesa->radeon.radeonScreen->cpp ) ) {
+ if (R200_DEBUG & DEBUG_PIXEL)
+ fprintf(stderr, "%s: dest not GART\n", __FUNCTION__);
+- return GL_FALSE;
+ }
+
+ /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from
+@@ -180,7 +181,7 @@ r200TryReadPixels( GLcontext *ctx,
+ if (!check_color(ctx, type, format, pack, pixels, size, pitch))
+ return GL_FALSE;
+
+- switch ( rmesa->r200Screen->cpp ) {
++ switch ( rmesa->radeon.radeonScreen->cpp ) {
+ case 4:
+ blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
+ break;
+@@ -197,14 +198,14 @@ r200TryReadPixels( GLcontext *ctx,
+ * a full command buffer expects to be called unlocked. As a
+ * workaround, immediately flush the buffer on aquiring the lock.
+ */
+- LOCK_HARDWARE( rmesa );
++ LOCK_HARDWARE( &rmesa->radeon );
+
+ if (rmesa->store.cmd_used)
+- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+
+ if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height,
+ &size)) {
+- UNLOCK_HARDWARE( rmesa );
++ UNLOCK_HARDWARE( &rmesa->radeon );
+ if (R200_DEBUG & DEBUG_PIXEL)
+ fprintf(stderr, "%s totally clipped -- nothing to do\n",
+ __FUNCTION__);
+@@ -212,14 +213,14 @@ r200TryReadPixels( GLcontext *ctx,
+ }
+
+ {
+- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+ driRenderbuffer *drb = (driRenderbuffer *) ctx->ReadBuffer->_ColorReadBuffer;
+ int nbox = dPriv->numClipRects;
+ int src_offset = drb->offset
+- + rmesa->r200Screen->fbLocation;
++ + rmesa->radeon.radeonScreen->fbLocation;
+ int src_pitch = drb->pitch * drb->cpp;
+ int dst_offset = r200GartOffsetFromVirtual( rmesa, pixels );
+- int dst_pitch = pitch * rmesa->r200Screen->cpp;
++ int dst_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
+ drm_clip_rect_t *box = dPriv->pClipRects;
+ int i;
+
+@@ -257,12 +258,12 @@ r200TryReadPixels( GLcontext *ctx,
+ bw, bh );
+ }
+
+- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+ }
+- UNLOCK_HARDWARE( rmesa );
+-
+- r200Finish( ctx ); /* required by GL */
++ UNLOCK_HARDWARE( &rmesa->radeon );
+
++ radeonFinish( ctx ); /* required by GL */
++#endif
+ return GL_TRUE;
+ }
+
+@@ -292,7 +293,7 @@ static void do_draw_pix( GLcontext *ctx,
+ GLuint planemask)
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+ drm_clip_rect_t *box = dPriv->pClipRects;
+ struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0];
+ driRenderbuffer *drb = (driRenderbuffer *) rb;
+@@ -301,12 +302,12 @@ static void do_draw_pix( GLcontext *ctx,
+ int blit_format;
+ int size;
+ int src_offset = r200GartOffsetFromVirtual( rmesa, pixels );
+- int src_pitch = pitch * rmesa->r200Screen->cpp;
++ int src_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
+
+ if (R200_DEBUG & DEBUG_PIXEL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- switch ( rmesa->r200Screen->cpp ) {
++#if 0
++ switch ( rmesa->radeon.radeonScreen->cpp ) {
+ case 2:
+ blit_format = R200_CP_COLOR_FORMAT_RGB565;
+ break;
+@@ -318,17 +319,17 @@ static void do_draw_pix( GLcontext *ctx,
+ }
+
+
+- LOCK_HARDWARE( rmesa );
++ LOCK_HARDWARE( &rmesa->radeon );
+
+ if (rmesa->store.cmd_used)
+- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+
+ y -= height; /* cope with pixel zoom */
+
+ if (!clip_pixelrect(ctx, ctx->DrawBuffer,
+ &x, &y, &width, &height,
+ &size)) {
+- UNLOCK_HARDWARE( rmesa );
++ UNLOCK_HARDWARE( &rmesa->radeon );
+ return;
+ }
+
+@@ -357,15 +358,16 @@ static void do_draw_pix( GLcontext *ctx,
+ blit_format,
+ src_pitch, src_offset,
+ drb->pitch * drb->cpp,
+- drb->offset + rmesa->r200Screen->fbLocation,
++ drb->offset + rmesa->radeon.radeonScreen->fbLocation,
+ bx - x, by - y,
+ bx, by,
+ bw, bh );
+ }
+
+- r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+- r200WaitForIdleLocked( rmesa ); /* required by GL */
+- UNLOCK_HARDWARE( rmesa );
++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
++ radeonWaitForIdleLocked( &rmesa->radeon ); /* required by GL */
++ UNLOCK_HARDWARE( &rmesa->radeon );
++#endif
+ }
+
+
+@@ -381,7 +383,7 @@ r200TryDrawPixels( GLcontext *ctx,
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ GLint pitch = unpack->RowLength ? unpack->RowLength : width;
+ GLuint planemask;
+- GLuint cpp = rmesa->r200Screen->cpp;
++ GLuint cpp = rmesa->radeon.radeonScreen->cpp;
+ GLint size = height * pitch * cpp;
+
+ if (R200_DEBUG & DEBUG_PIXEL)
+@@ -395,7 +397,7 @@ r200TryDrawPixels( GLcontext *ctx,
+ case GL_RGB:
+ case GL_RGBA:
+ case GL_BGRA:
+- planemask = r200PackColor(cpp,
++ planemask = radeonPackColor(cpp,
+ ctx->Color.ColorMask[RCOMP],
+ ctx->Color.ColorMask[GCOMP],
+ ctx->Color.ColorMask[BCOMP],
+@@ -431,7 +433,7 @@ r200TryDrawPixels( GLcontext *ctx,
+ return GL_FALSE;
+ }
+
+- if ( r200IsGartMemory(rmesa, pixels, size) )
++ if (0)// r200IsGartMemory(rmesa, pixels, size) )
+ {
+ do_draw_pix( ctx, x, y, width, height, pitch, pixels, planemask );
+ return GL_TRUE;
+@@ -471,7 +473,7 @@ r200Bitmap( GLcontext *ctx, GLint px, GLint py,
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+- if (rmesa->Fallback)
++ if (rmesa->radeon.Fallback)
+ _swrast_Bitmap( ctx, px, py, width, height, unpack, bitmap );
+ else
+ r200PointsBitmap( ctx, px, py, width, height, unpack, bitmap );
+diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h
+index 5ce287f..526a624 100644
+--- a/src/mesa/drivers/dri/r200/r200_reg.h
++++ b/src/mesa/drivers/dri/r200/r200_reg.h
+@@ -463,8 +463,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define R200_VSC_UPDATE_USER_COLOR_1_ENABLE 0x00020000
+ /* gap */
+ #define R200_SE_TCL_VECTOR_INDX_REG 0x2200
++# define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT 16
++# define RADEON_VEC_INDX_DWORD_COUNT_SHIFT 28
+ #define R200_SE_TCL_VECTOR_DATA_REG 0x2204
+ #define R200_SE_TCL_SCALAR_INDX_REG 0x2208
++# define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT 16
+ #define R200_SE_TCL_SCALAR_DATA_REG 0x220c
+ /* gap */
+ #define R200_SE_TCL_MATRIX_SEL_0 0x2230
+@@ -949,6 +952,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define R200_LOD_BIAS_MASK (0xfff80000)
+ #define R200_LOD_BIAS_SHIFT 19
+ #define R200_PP_TXSIZE_0 0x2c0c /* NPOT only */
++#define R200_PP_TX_WIDTHMASK_SHIFT 0
++#define R200_PP_TX_HEIGHTMASK_SHIFT 16
++
+ #define R200_PP_TXPITCH_0 0x2c10 /* NPOT only */
+ #define R200_PP_BORDER_COLOR_0 0x2c14
+ #define R200_PP_CUBIC_FACES_0 0x2c18
+diff --git a/src/mesa/drivers/dri/r200/r200_span.c b/src/mesa/drivers/dri/r200/r200_span.c
+deleted file mode 100644
+index 9783678..0000000
+--- a/src/mesa/drivers/dri/r200/r200_span.c
++++ /dev/null
+@@ -1,307 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/colormac.h"
+-#include "swrast/swrast.h"
+-
+-#include "r200_context.h"
+-#include "r200_ioctl.h"
+-#include "r200_state.h"
+-#include "r200_span.h"
+-#include "r200_tex.h"
+-
+-#define DBG 0
+-
+-/*
+- * Note that all information needed to access pixels in a renderbuffer
+- * should be obtained through the gl_renderbuffer parameter, not per-context
+- * information.
+- */
+-#define LOCAL_VARS \
+- driRenderbuffer *drb = (driRenderbuffer *) rb; \
+- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
+- const GLuint bottom = dPriv->h - 1; \
+- GLubyte *buf = (GLubyte *) drb->flippedData \
+- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \
+- GLuint p; \
+- (void) p;
+-
+-#define LOCAL_DEPTH_VARS \
+- driRenderbuffer *drb = (driRenderbuffer *) rb; \
+- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
+- const GLuint bottom = dPriv->h - 1; \
+- GLuint xo = dPriv->x; \
+- GLuint yo = dPriv->y; \
+- GLubyte *buf = (GLubyte *) drb->Base.Data;
+-
+-#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+-
+-#define Y_FLIP(Y) (bottom - (Y))
+-
+-#define HW_LOCK()
+-
+-#define HW_UNLOCK()
+-
+-
+-
+-/* ================================================================
+- * Color buffer
+- */
+-
+-/* 16 bit, RGB565 color spanline and pixel functions
+- */
+-#define SPANTMP_PIXEL_FMT GL_RGB
+-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+-
+-#define TAG(x) r200##x##_RGB565
+-#define TAG2(x,y) r200##x##_RGB565##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+-#include "spantmp2.h"
+-
+-/* 32 bit, ARGB8888 color spanline and pixel functions
+- */
+-#define SPANTMP_PIXEL_FMT GL_BGRA
+-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+-
+-#define TAG(x) r200##x##_ARGB8888
+-#define TAG2(x,y) r200##x##_ARGB8888##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+-#include "spantmp2.h"
+-
+-
+-/* ================================================================
+- * Depth buffer
+- */
+-
+-/* The Radeon family has depth tiling on all the time, so we have to convert
+- * the x,y coordinates into the memory bus address (mba) in the same
+- * manner as the engine. In each case, the linear block address (ba)
+- * is calculated, and then wired with x and y to produce the final
+- * memory address.
+- * The chip will do address translation on its own if the surface registers
+- * are set up correctly. It is not quite enough to get it working with hyperz too...
+- */
+-
+-/* extract bit 'b' of x, result is zero or one */
+-#define BIT(x,b) ((x & (1<<b))>>b)
+-
+-static GLuint
+-r200_mba_z32( driRenderbuffer *drb, GLint x, GLint y )
+-{
+- GLuint pitch = drb->pitch;
+- if (drb->depthHasSurface) {
+- return 4 * (x + y * pitch);
+- }
+- else {
+- GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 5) + ((x & 0x7FF) >> 5);
+- GLuint a =
+- (BIT(x,0) << 2) |
+- (BIT(y,0) << 3) |
+- (BIT(x,1) << 4) |
+- (BIT(y,1) << 5) |
+- (BIT(x,3) << 6) |
+- (BIT(x,4) << 7) |
+- (BIT(x,2) << 8) |
+- (BIT(y,2) << 9) |
+- (BIT(y,3) << 10) |
+- (((pitch & 0x20) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
+- ((b >> 1) << 12);
+- return a;
+- }
+-}
+-
+-static GLuint
+-r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y )
+-{
+- GLuint pitch = drb->pitch;
+- if (drb->depthHasSurface) {
+- return 2 * (x + y * pitch);
+- }
+- else {
+- GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 6) + ((x & 0x7FF) >> 6);
+- GLuint a =
+- (BIT(x,0) << 1) |
+- (BIT(y,0) << 2) |
+- (BIT(x,1) << 3) |
+- (BIT(y,1) << 4) |
+- (BIT(x,2) << 5) |
+- (BIT(x,4) << 6) |
+- (BIT(x,5) << 7) |
+- (BIT(x,3) << 8) |
+- (BIT(y,2) << 9) |
+- (BIT(y,3) << 10) |
+- (((pitch & 0x40) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
+- ((b >> 1) << 12);
+- return a;
+- }
+-}
+-
+-
+-/* 16-bit depth buffer functions
+- */
+-#define VALUE_TYPE GLushort
+-
+-#define WRITE_DEPTH( _x, _y, d ) \
+- *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d;
+-
+-#define READ_DEPTH( d, _x, _y ) \
+- d = *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo ));
+-
+-#define TAG(x) r200##x##_z16
+-#include "depthtmp.h"
+-
+-
+-/* 24 bit depth, 8 bit stencil depthbuffer functions
+- */
+-#define VALUE_TYPE GLuint
+-
+-#define WRITE_DEPTH( _x, _y, d ) \
+-do { \
+- GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
+- tmp &= 0xff000000; \
+- tmp |= ((d) & 0x00ffffff); \
+- *(GLuint *)(buf + offset) = tmp; \
+-} while (0)
+-
+-#define READ_DEPTH( d, _x, _y ) \
+- d = *(GLuint *)(buf + r200_mba_z32( drb, _x + xo, \
+- _y + yo )) & 0x00ffffff;
+-
+-#define TAG(x) r200##x##_z24_s8
+-#include "depthtmp.h"
+-
+-
+-/* ================================================================
+- * Stencil buffer
+- */
+-
+-/* 24 bit depth, 8 bit stencil depthbuffer functions
+- */
+-#define WRITE_STENCIL( _x, _y, d ) \
+-do { \
+- GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
+- tmp &= 0x00ffffff; \
+- tmp |= (((d) & 0xff) << 24); \
+- *(GLuint *)(buf + offset) = tmp; \
+-} while (0)
+-
+-#define READ_STENCIL( d, _x, _y ) \
+-do { \
+- GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
+- tmp &= 0xff000000; \
+- d = tmp >> 24; \
+-} while (0)
+-
+-#define TAG(x) r200##x##_z24_s8
+-#include "stenciltmp.h"
+-
+-
+-/* Move locking out to get reasonable span performance (10x better
+- * than doing this in HW_LOCK above). WaitForIdle() is the main
+- * culprit.
+- */
+-
+-static void r200SpanRenderStart( GLcontext *ctx )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT( ctx );
+-
+- R200_FIREVERTICES( rmesa );
+- LOCK_HARDWARE( rmesa );
+- r200WaitForIdleLocked( rmesa );
+-
+- /* Read & rewrite the first pixel in the frame buffer. This should
+- * be a noop, right? In fact without this conform fails as reading
+- * from the framebuffer sometimes produces old results -- the
+- * on-card read cache gets mixed up and doesn't notice that the
+- * framebuffer has been updated.
+- *
+- * In the worst case this is buggy too as p might get the wrong
+- * value first time, so really need a hidden pixel somewhere for this.
+- */
+- {
+- int p;
+- driRenderbuffer *drb =
+- (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
+- volatile int *buf =
+- (volatile int *)(rmesa->dri.screen->pFB + drb->offset);
+- p = *buf;
+- *buf = p;
+- }
+-}
+-
+-static void r200SpanRenderFinish( GLcontext *ctx )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT( ctx );
+- _swrast_flush( ctx );
+- UNLOCK_HARDWARE( rmesa );
+-}
+-
+-void r200InitSpanFuncs( GLcontext *ctx )
+-{
+- struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+- swdd->SpanRenderStart = r200SpanRenderStart;
+- swdd->SpanRenderFinish = r200SpanRenderFinish;
+-}
+-
+-
+-
+-/**
+- * Plug in the Get/Put routines for the given driRenderbuffer.
+- */
+-void
+-radeonSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
+-{
+- if (drb->Base.InternalFormat == GL_RGBA) {
+- if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
+- r200InitPointers_RGB565(&drb->Base);
+- }
+- else {
+- r200InitPointers_ARGB8888(&drb->Base);
+- }
+- }
+- else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+- r200InitDepthPointers_z16(&drb->Base);
+- }
+- else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+- r200InitDepthPointers_z24_s8(&drb->Base);
+- }
+- else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+- r200InitStencilPointers_z24_s8(&drb->Base);
+- }
+-}
+diff --git a/src/mesa/drivers/dri/r200/r200_span.h b/src/mesa/drivers/dri/r200/r200_span.h
+deleted file mode 100644
+index bae5644..0000000
+--- a/src/mesa/drivers/dri/r200/r200_span.h
++++ /dev/null
+@@ -1,45 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#ifndef __R200_SPAN_H__
+-#define __R200_SPAN_H__
+-
+-#include "drirenderbuffer.h"
+-
+-extern void r200InitSpanFuncs( GLcontext *ctx );
+-
+-extern void
+-radeonSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+-
+-#endif
+diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
+index 0eaaaf6..126f78b 100644
+--- a/src/mesa/drivers/dri/r200/r200_state.c
++++ b/src/mesa/drivers/dri/r200/r200_state.c
+@@ -47,6 +47,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "tnl/t_pipeline.h"
+ #include "swrast_setup/swrast_setup.h"
+
++#include "radeon_common.h"
++#include "radeon_mipmap_tree.h"
+ #include "r200_context.h"
+ #include "r200_ioctl.h"
+ #include "r200_state.h"
+@@ -114,8 +116,8 @@ static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] )
+ CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
+ CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
+ CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
+- if (rmesa->r200Screen->drmSupportsBlendColor)
+- rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = r200PackColor( 4, color[0], color[1], color[2], color[3] );
++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
++ rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = radeonPackColor( 4, color[0], color[1], color[2], color[3] );
+ }
+
+ /**
+@@ -213,7 +215,7 @@ static void r200_set_blend_state( GLcontext * ctx )
+
+ R200_STATECHANGE( rmesa, ctx );
+
+- if (rmesa->r200Screen->drmSupportsBlendColor) {
++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+ if (ctx->Color.ColorLogicOpEnabled) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl | R200_ROP_ENABLE;
+ rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
+@@ -278,7 +280,7 @@ static void r200_set_blend_state( GLcontext * ctx )
+ return;
+ }
+
+- if (!rmesa->r200Screen->drmSupportsBlendColor) {
++ if (!rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
+ return;
+ }
+@@ -383,10 +385,10 @@ static void r200ClearDepth( GLcontext *ctx, GLclampd d )
+
+ switch ( format ) {
+ case R200_DEPTH_FORMAT_16BIT_INT_Z:
+- rmesa->state.depth.clear = d * 0x0000ffff;
++ rmesa->radeon.state.depth.clear = d * 0x0000ffff;
+ break;
+ case R200_DEPTH_FORMAT_24BIT_INT_Z:
+- rmesa->state.depth.clear = d * 0x00ffffff;
++ rmesa->radeon.state.depth.clear = d * 0x00ffffff;
+ break;
+ }
+ }
+@@ -480,7 +482,7 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+ case GL_FOG_COLOR:
+ R200_STATECHANGE( rmesa, ctx );
+ UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
+- i = r200PackColor( 4, col[0], col[1], col[2], 0 );
++ i = radeonPackColor( 4, col[0], col[1], col[2], 0 );
+ rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
+ rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
+ break;
+@@ -521,102 +523,6 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+ }
+ }
+
+-
+-/* =============================================================
+- * Scissoring
+- */
+-
+-
+-static GLboolean intersect_rect( drm_clip_rect_t *out,
+- drm_clip_rect_t *a,
+- drm_clip_rect_t *b )
+-{
+- *out = *a;
+- if ( b->x1 > out->x1 ) out->x1 = b->x1;
+- if ( b->y1 > out->y1 ) out->y1 = b->y1;
+- if ( b->x2 < out->x2 ) out->x2 = b->x2;
+- if ( b->y2 < out->y2 ) out->y2 = b->y2;
+- if ( out->x1 >= out->x2 ) return GL_FALSE;
+- if ( out->y1 >= out->y2 ) return GL_FALSE;
+- return GL_TRUE;
+-}
+-
+-
+-void r200RecalcScissorRects( r200ContextPtr rmesa )
+-{
+- drm_clip_rect_t *out;
+- int i;
+-
+- /* Grow cliprect store?
+- */
+- if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+- while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+- rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */
+- rmesa->state.scissor.numAllocedClipRects *= 2;
+- }
+-
+- if (rmesa->state.scissor.pClipRects)
+- FREE(rmesa->state.scissor.pClipRects);
+-
+- rmesa->state.scissor.pClipRects =
+- MALLOC( rmesa->state.scissor.numAllocedClipRects *
+- sizeof(drm_clip_rect_t) );
+-
+- if ( rmesa->state.scissor.pClipRects == NULL ) {
+- rmesa->state.scissor.numAllocedClipRects = 0;
+- return;
+- }
+- }
+-
+- out = rmesa->state.scissor.pClipRects;
+- rmesa->state.scissor.numClipRects = 0;
+-
+- for ( i = 0 ; i < rmesa->numClipRects ; i++ ) {
+- if ( intersect_rect( out,
+- &rmesa->pClipRects[i],
+- &rmesa->state.scissor.rect ) ) {
+- rmesa->state.scissor.numClipRects++;
+- out++;
+- }
+- }
+-}
+-
+-
+-static void r200UpdateScissor( GLcontext *ctx )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+- if ( rmesa->dri.drawable ) {
+- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+-
+- int x = ctx->Scissor.X;
+- int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
+- int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
+- int h = dPriv->h - ctx->Scissor.Y - 1;
+-
+- rmesa->state.scissor.rect.x1 = x + dPriv->x;
+- rmesa->state.scissor.rect.y1 = y + dPriv->y;
+- rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
+- rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
+-
+- r200RecalcScissorRects( rmesa );
+- }
+-}
+-
+-
+-static void r200Scissor( GLcontext *ctx,
+- GLint x, GLint y, GLsizei w, GLsizei h )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+- if ( ctx->Scissor.Enabled ) {
+- R200_FIREVERTICES( rmesa ); /* don't pipeline cliprect changes */
+- r200UpdateScissor( ctx );
+- }
+-
+-}
+-
+-
+ /* =============================================================
+ * Culling
+ */
+@@ -803,7 +709,7 @@ static void r200ColorMask( GLcontext *ctx,
+ GLboolean b, GLboolean a )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- GLuint mask = r200PackColor( rmesa->r200Screen->cpp,
++ GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
+ ctx->Color.ColorMask[RCOMP],
+ ctx->Color.ColorMask[GCOMP],
+ ctx->Color.ColorMask[BCOMP],
+@@ -834,7 +740,7 @@ static void r200PolygonOffset( GLcontext *ctx,
+ GLfloat factor, GLfloat units )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- float_ui32_type constant = { units * rmesa->state.depth.scale };
++ float_ui32_type constant = { units * rmesa->radeon.state.depth.scale };
+ float_ui32_type factoru = { factor };
+
+ /* factor *= 2; */
+@@ -861,15 +767,15 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
+
+ /* TODO: push this into cmd mechanism
+ */
+- R200_FIREVERTICES( rmesa );
+- LOCK_HARDWARE( rmesa );
++ radeon_firevertices(&rmesa->radeon);
++ LOCK_HARDWARE( &rmesa->radeon );
+
+ /* FIXME: Use window x,y offsets into stipple RAM.
+ */
+ stipple.mask = rmesa->state.stipple.mask;
+- drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE,
++ drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE,
+ &stipple, sizeof(stipple) );
+- UNLOCK_HARDWARE( rmesa );
++ UNLOCK_HARDWARE( &rmesa->radeon );
+ }
+
+ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+@@ -881,7 +787,7 @@ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+ * cases work.
+ */
+ TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag);
+- if (rmesa->TclFallback) {
++ if (rmesa->radeon.TclFallback) {
+ r200ChooseRenderState( ctx );
+ r200ChooseVertexState( ctx );
+ }
+@@ -958,7 +864,7 @@ static void r200UpdateSpecular( GLcontext *ctx )
+
+ /* Update vertex/render formats
+ */
+- if (rmesa->TclFallback) {
++ if (rmesa->radeon.TclFallback) {
+ r200ChooseRenderState( ctx );
+ r200ChooseVertexState( ctx );
+ }
+@@ -1430,7 +1336,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname,
+ rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE;
+ else
+ rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE);
+- if (rmesa->TclFallback) {
++ if (rmesa->radeon.TclFallback) {
+ r200ChooseRenderState( ctx );
+ r200ChooseVertexState( ctx );
+ }
+@@ -1675,7 +1581,7 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+- rmesa->state.stencil.clear =
++ rmesa->radeon.state.stencil.clear =
+ ((GLuint) (ctx->Stencil.Clear & 0xff) |
+ (0xff << R200_STENCIL_MASK_SHIFT) |
+ ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT));
+@@ -1700,19 +1606,19 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
+ void r200UpdateWindow( GLcontext *ctx )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+- GLfloat xoffset = (GLfloat)dPriv->x;
+- GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
++ GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
++ GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+ float_ui32_type sx = { v[MAT_SX] };
+ float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
+ float_ui32_type sy = { - v[MAT_SY] };
+ float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
+- float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
+- float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
++ float_ui32_type sz = { v[MAT_SZ] * rmesa->radeon.state.depth.scale };
++ float_ui32_type tz = { v[MAT_TZ] * rmesa->radeon.state.depth.scale };
+
+- R200_FIREVERTICES( rmesa );
++ radeon_firevertices(&rmesa->radeon);
+ R200_STATECHANGE( rmesa, vpt );
+
+ rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = sx.ui32;
+@@ -1744,7 +1650,7 @@ static void r200DepthRange( GLcontext *ctx, GLclampd nearval,
+ void r200UpdateViewportOffset( GLcontext *ctx )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+ GLfloat xoffset = (GLfloat)dPriv->x;
+ GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+@@ -1774,8 +1680,8 @@ void r200UpdateViewportOffset( GLcontext *ctx )
+ R200_STIPPLE_Y_OFFSET_MASK);
+
+ /* add magic offsets, then invert */
+- stx = 31 - ((rmesa->dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
+- sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
++ stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
++ sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1)
+ & R200_STIPPLE_COORD_MASK);
+
+ m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) |
+@@ -1788,7 +1694,7 @@ void r200UpdateViewportOffset( GLcontext *ctx )
+ }
+ }
+
+- r200UpdateScissor( ctx );
++ radeonUpdateScissor( ctx );
+ }
+
+
+@@ -1805,7 +1711,7 @@ static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] )
+ CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]);
+ CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]);
+ CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]);
+- rmesa->state.color.clear = r200PackColor( rmesa->r200Screen->cpp,
++ rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
+ color[0], color[1],
+ color[2], color[3] );
+ }
+@@ -1849,56 +1755,6 @@ static void r200LogicOpCode( GLcontext *ctx, GLenum opcode )
+ }
+
+
+-/*
+- * Set up the cliprects for either front or back-buffer drawing.
+- */
+-void r200SetCliprects( r200ContextPtr rmesa )
+-{
+- __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
+- __DRIdrawablePrivate *const readable = rmesa->dri.readable;
+- GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
+- GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
+-
+- if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BIT_BACK_LEFT) {
+- /* Can't ignore 2d windows if we are page flipping.
+- */
+- if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
+- rmesa->numClipRects = drawable->numClipRects;
+- rmesa->pClipRects = drawable->pClipRects;
+- }
+- else {
+- rmesa->numClipRects = drawable->numBackClipRects;
+- rmesa->pClipRects = drawable->pBackClipRects;
+- }
+- }
+- else {
+- /* front buffer (or none, or multiple buffers) */
+- rmesa->numClipRects = drawable->numClipRects;
+- rmesa->pClipRects = drawable->pClipRects;
+- }
+-
+- if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
+- _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
+- drawable->w, drawable->h);
+- draw_fb->Initialized = GL_TRUE;
+- }
+-
+- if (drawable != readable) {
+- if ((read_fb->Width != readable->w) ||
+- (read_fb->Height != readable->h)) {
+- _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
+- readable->w, readable->h);
+- read_fb->Initialized = GL_TRUE;
+- }
+- }
+-
+- if (rmesa->state.scissor.enabled)
+- r200RecalcScissorRects( rmesa );
+-
+- rmesa->lastStamp = drawable->lastStamp;
+-}
+-
+-
+ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+@@ -1907,7 +1763,7 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
+ fprintf(stderr, "%s %s\n", __FUNCTION__,
+ _mesa_lookup_enum_by_nr( mode ));
+
+- R200_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */
++ radeon_firevertices(&rmesa->radeon); /* don't pipeline cliprect changes */
+
+ if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+ /* 0 (GL_NONE) buffers or multiple color drawing buffers */
+@@ -1925,7 +1781,8 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
+ return;
+ }
+
+- r200SetCliprects( rmesa );
++ radeonSetCliprects( &rmesa->radeon );
++ radeonUpdatePageFlipping(&rmesa->radeon);
+
+ /* We'll set the drawing engine's offset/pitch parameters later
+ * when we update other state.
+@@ -2013,10 +1870,10 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
+ R200_STATECHANGE(rmesa, ctx );
+ if ( state ) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE;
+- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
+ } else {
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE;
+- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
+ }
+ break;
+
+@@ -2031,7 +1888,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
+ rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
+ }
+ r200UpdateSpecular( ctx ); /* for PK_SPEC */
+- if (rmesa->TclFallback)
++ if (rmesa->radeon.TclFallback)
+ r200ChooseVertexState( ctx );
+ _mesa_allow_light_in_model( ctx, !state );
+ break;
+@@ -2068,7 +1925,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
+ case GL_LIGHTING:
+ r200UpdateSpecular(ctx);
+ /* for reflection map fixup - might set recheck_texgen for all units too */
+- rmesa->NewGLState |= _NEW_TEXTURE;
++ rmesa->radeon.NewGLState |= _NEW_TEXTURE;
+ break;
+
+ case GL_LINE_SMOOTH:
+@@ -2181,13 +2038,13 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
+ }
+
+ case GL_SCISSOR_TEST:
+- R200_FIREVERTICES( rmesa );
+- rmesa->state.scissor.enabled = state;
+- r200UpdateScissor( ctx );
++ radeon_firevertices(&rmesa->radeon);
++ rmesa->radeon.state.scissor.enabled = state;
++ radeonUpdateScissor( ctx );
+ break;
+
+ case GL_STENCIL_TEST:
+- if ( rmesa->state.stencil.hwBuffer ) {
++ if ( rmesa->radeon.state.stencil.hwBuffer ) {
+ R200_STATECHANGE( rmesa, ctx );
+ if ( state ) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_STENCIL_ENABLE;
+@@ -2443,42 +2300,99 @@ r200UpdateDrawBuffer(GLcontext *ctx)
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+- driRenderbuffer *drb;
++ struct radeon_renderbuffer *rrb;
+
+ if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+- /* draw to front */
+- drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+- }
+- else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+- /* draw to back */
+- drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+- }
+- else {
+- /* drawing to multiple buffers, or none */
+- return;
++ /* draw to front */
++ rrb = (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++ } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
++ /* draw to back */
++ rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++ } else {
++ /* drawing to multiple buffers, or none */
++ return;
+ }
+
+- assert(drb);
+- assert(drb->flippedPitch);
++ assert(rrb);
++ assert(rrb->pitch);
+
+ R200_STATECHANGE( rmesa, ctx );
+
++#if 0
+ /* Note: we used the (possibly) page-flipped values */
+ rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
+- = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
++ = ((rrb->flippedOffset + rmesa->radeon.radeonScreen->fbLocation)
+ & R200_COLOROFFSET_MASK);
+ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
+- if (rmesa->sarea->tiling_enabled) {
++ if (rmesa->radeon.sarea->tiling_enabled) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+ }
++#endif
+ }
+
++static GLboolean r200ValidateBuffers(GLcontext *ctx)
++{
++ r200ContextPtr rmesa = R200_CONTEXT(ctx);
++ struct radeon_cs_space_check bos[8];
++ struct radeon_renderbuffer *rrb;
++ int num_bo = 0;
++ int i;
++ int flushed = 0, ret;
++again:
++ num_bo = 0;
++
++ rrb = radeon_get_colorbuffer(&rmesa->radeon);
++ /* color buffer */
++ if (rrb && rrb->bo) {
++ bos[num_bo].bo = rrb->bo;
++ bos[num_bo].read_domains = 0;
++ bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
++ bos[num_bo].new_accounted = 0;
++ num_bo++;
++ }
++
++ /* depth buffer */
++ rrb = radeon_get_depthbuffer(&rmesa->radeon);
++ /* color buffer */
++ if (rrb && rrb->bo) {
++ bos[num_bo].bo = rrb->bo;
++ bos[num_bo].read_domains = 0;
++ bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
++ bos[num_bo].new_accounted = 0;
++ num_bo++;
++ }
++
++ for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
++ radeonTexObj *t;
++
++ if (!ctx->Texture.Unit[i]._ReallyEnabled)
++ continue;
++
++ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
++ bos[num_bo].bo = t->mt->bo;
++ bos[num_bo].read_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
++ bos[num_bo].write_domain = 0;
++ bos[num_bo].new_accounted = 0;
++ num_bo++;
++ }
++
++ ret = radeon_cs_space_check(rmesa->radeon.cmdbuf.cs, bos, num_bo);
++ if (ret == RADEON_CS_SPACE_OP_TO_BIG)
++ return GL_FALSE;
++ if (ret == RADEON_CS_SPACE_FLUSH) {
++ radeonFlush(ctx);
++ if (flushed)
++ return GL_FALSE;
++ flushed = 1;
++ goto again;
++ }
++ return GL_TRUE;
++}
+
+-
+-void r200ValidateState( GLcontext *ctx )
++GLboolean r200ValidateState( GLcontext *ctx )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- GLuint new_state = rmesa->NewGLState;
++ GLuint new_state = rmesa->radeon.NewGLState;
+
+ if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+ r200UpdateDrawBuffer(ctx);
+@@ -2486,10 +2400,14 @@ void r200ValidateState( GLcontext *ctx )
+
+ if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
+ r200UpdateTextureState( ctx );
+- new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
++ new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
+ r200UpdateLocalViewer( ctx );
+ }
+
++ /* we need to do a space check here */
++ if (!r200ValidateBuffers(ctx))
++ return GL_FALSE;
++
+ /* FIXME: don't really need most of these when vertex progs are enabled */
+
+ /* Need an event driven matrix update?
+@@ -2533,7 +2451,8 @@ void r200ValidateState( GLcontext *ctx )
+ else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0);
+ }
+
+- rmesa->NewGLState = 0;
++ rmesa->radeon.NewGLState = 0;
++ return GL_TRUE;
+ }
+
+
+@@ -2544,7 +2463,7 @@ static void r200InvalidateState( GLcontext *ctx, GLuint new_state )
+ _vbo_InvalidateState( ctx, new_state );
+ _tnl_InvalidateState( ctx, new_state );
+ _ae_invalidate_state( ctx, new_state );
+- R200_CONTEXT(ctx)->NewGLState |= new_state;
++ R200_CONTEXT(ctx)->radeon.NewGLState |= new_state;
+ }
+
+ /* A hack. The r200 can actually cope just fine with materials
+@@ -2573,12 +2492,13 @@ static void r200WrapRunPipeline( GLcontext *ctx )
+ GLboolean has_material;
+
+ if (0)
+- fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
++ fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
+
+ /* Validate state:
+ */
+- if (rmesa->NewGLState)
+- r200ValidateState( ctx );
++ if (rmesa->radeon.NewGLState)
++ if (!r200ValidateState( ctx ))
++ FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
+
+ has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx );
+
+@@ -2636,7 +2556,7 @@ void r200InitStateFuncs( struct dd_function_table *functions )
+ functions->PointParameterfv = r200PointParameter;
+ functions->PointSize = r200PointSize;
+ functions->RenderMode = r200RenderMode;
+- functions->Scissor = r200Scissor;
++ functions->Scissor = radeonScissor;
+ functions->ShadeModel = r200ShadeModel;
+ functions->StencilFuncSeparate = r200StencilFuncSeparate;
+ functions->StencilMaskSeparate = r200StencilMaskSeparate;
+diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h
+index a917163..1dddbfd 100644
+--- a/src/mesa/drivers/dri/r200/r200_state.h
++++ b/src/mesa/drivers/dri/r200/r200_state.h
+@@ -43,13 +43,11 @@ extern void r200InitTnlFuncs( GLcontext *ctx );
+
+ extern void r200UpdateMaterial( GLcontext *ctx );
+
+-extern void r200SetCliprects( r200ContextPtr rmesa );
+-extern void r200RecalcScissorRects( r200ContextPtr rmesa );
+ extern void r200UpdateViewportOffset( GLcontext *ctx );
+ extern void r200UpdateWindow( GLcontext *ctx );
+ extern void r200UpdateDrawBuffer(GLcontext *ctx);
+
+-extern void r200ValidateState( GLcontext *ctx );
++extern GLboolean r200ValidateState( GLcontext *ctx );
+
+ extern void r200PrintDirty( r200ContextPtr rmesa,
+ const char *msg );
+@@ -59,7 +57,7 @@ extern void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+ #define FALLBACK( rmesa, bit, mode ) do { \
+ if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \
+ __FUNCTION__, bit, mode ); \
+- r200Fallback( rmesa->glCtx, bit, mode ); \
++ r200Fallback( rmesa->radeon.glCtx, bit, mode ); \
+ } while (0)
+
+ extern void r200LightingSpaceChange( GLcontext *ctx );
+diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
+index 9e4677e..b40690e 100644
+--- a/src/mesa/drivers/dri/r200/r200_state_init.c
++++ b/src/mesa/drivers/dri/r200/r200_state_init.c
+@@ -43,6 +43,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "tnl/t_pipeline.h"
+ #include "swrast_setup/swrast_setup.h"
+
++#include "radeon_common.h"
++#include "radeon_mipmap_tree.h"
+ #include "r200_context.h"
+ #include "r200_ioctl.h"
+ #include "r200_state.h"
+@@ -52,31 +54,145 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #include "xmlpool.h"
+
++/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
++ * 1.3 cmdbuffers allow all previous state to be updated as well as
++ * the tcl scalar and vector areas.
++ */
++static struct {
++ int start;
++ int len;
++ const char *name;
++} packet[RADEON_MAX_STATE_PACKETS] = {
++ {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
++ {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
++ {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
++ {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
++ {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
++ {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
++ {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
++ {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
++ {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
++ {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
++ {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
++ {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
++ {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
++ {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
++ {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
++ {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
++ {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
++ {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
++ {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
++ {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
++ {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
++ "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
++ {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
++ {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
++ {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
++ {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
++ {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
++ {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
++ {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
++ {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
++ {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
++ {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
++ {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
++ {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
++ {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
++ {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
++ {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
++ {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
++ {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
++ {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
++ {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
++ {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
++ {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
++ {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
++ {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
++ {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
++ {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
++ {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
++ {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
++ {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
++ {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
++ "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
++ {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
++ {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
++ {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
++ {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
++ {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
++ {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
++ {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
++ {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
++ {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
++ {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
++ {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
++ "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
++ {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
++ {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
++ {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
++ {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
++ {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
++ {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
++ {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
++ {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
++ {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
++ {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
++ {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
++ {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
++ {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
++ {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
++ {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
++ {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
++ {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
++ {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
++ {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
++ {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
++ {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
++ {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
++ {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
++ {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
++ {R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"}, /* 85 */
++ {R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
++ {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
++ {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
++ {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
++ {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
++ {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
++ {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
++ {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
++ {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
++};
++
+ /* =============================================================
+ * State initialization
+ */
+
+ void r200PrintDirty( r200ContextPtr rmesa, const char *msg )
+ {
+- struct r200_state_atom *l;
++ struct radeon_state_atom *l;
+
+ fprintf(stderr, msg);
+ fprintf(stderr, ": ");
+
+- foreach(l, &rmesa->hw.atomlist) {
+- if (l->dirty || rmesa->hw.all_dirty)
++ foreach(l, &rmesa->radeon.hw.atomlist) {
++ if (l->dirty || rmesa->radeon.hw.all_dirty)
+ fprintf(stderr, "%s, ", l->name);
+ }
+
+ fprintf(stderr, "\n");
+ }
+
+-static int cmdpkt( int id )
++static int cmdpkt( r200ContextPtr rmesa, int id )
+ {
+ drm_radeon_cmd_header_t h;
+- h.i = 0;
+- h.packet.cmd_type = RADEON_CMD_PACKET;
+- h.packet.packet_id = id;
++
++ if (rmesa->radeon.radeonScreen->kernel_mm) {
++ return CP_PACKET0(packet[id].start, packet[id].len - 1);
++ } else {
++ h.i = 0;
++ h.packet.cmd_type = RADEON_CMD_PACKET;
++ h.packet.packet_id = id;
++ }
+ return h.i;
+ }
+
+@@ -127,96 +243,388 @@ static int cmdscl2( int offset, int stride, int count )
+ }
+
+ #define CHECK( NM, FLAG ) \
+-static GLboolean check_##NM( GLcontext *ctx, int idx ) \
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
+ { \
+ r200ContextPtr rmesa = R200_CONTEXT(ctx); \
+- (void) idx; \
+ (void) rmesa; \
+- return FLAG; \
++ return (FLAG) ? atom->cmd_size : 0; \
+ }
+
+ #define TCL_CHECK( NM, FLAG ) \
+-static GLboolean check_##NM( GLcontext *ctx, int idx ) \
+-{ \
+- r200ContextPtr rmesa = R200_CONTEXT(ctx); \
+- (void) idx; \
+- return !rmesa->TclFallback && !ctx->VertexProgram._Enabled && (FLAG); \
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
++{ \
++ r200ContextPtr rmesa = R200_CONTEXT(ctx); \
++ return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \
+ }
+
+ #define TCL_OR_VP_CHECK( NM, FLAG ) \
+-static GLboolean check_##NM( GLcontext *ctx, int idx ) \
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
+ { \
+ r200ContextPtr rmesa = R200_CONTEXT(ctx); \
+- (void) idx; \
+- return !rmesa->TclFallback && (FLAG); \
++ return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0; \
+ }
+
+ #define VP_CHECK( NM, FLAG ) \
+-static GLboolean check_##NM( GLcontext *ctx, int idx ) \
+-{ \
+- r200ContextPtr rmesa = R200_CONTEXT(ctx); \
+- (void) idx; \
+- return !rmesa->TclFallback && ctx->VertexProgram._Enabled && (FLAG); \
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
++{ \
++ r200ContextPtr rmesa = R200_CONTEXT(ctx); \
++ (void) atom; \
++ return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \
+ }
+
+-
+ CHECK( always, GL_TRUE )
+ CHECK( never, GL_FALSE )
+ CHECK( tex_any, ctx->Texture._EnabledUnits )
+ CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) );
+-CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) )
+-CHECK( tex, rmesa->state.texture.unit[idx].unitneeded )
++CHECK( tex_pair, (rmesa->state.texture.unit[atom->idx].unitneeded | rmesa->state.texture.unit[atom->idx & ~1].unitneeded) )
++CHECK( tex, rmesa->state.texture.unit[atom->idx].unitneeded )
+ CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled )
+-CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) )
++ CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled) )
+ CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) )
+ CHECK( afs, ctx->ATIFragmentShader._Enabled )
+-CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT )
++CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT )
+ TCL_CHECK( tcl_fog, ctx->Fog.Enabled )
+ TCL_CHECK( tcl, GL_TRUE )
+-TCL_CHECK( tcl_tex, rmesa->state.texture.unit[idx].unitneeded )
++TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded )
+ TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
+-TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[idx].Enabled )
+-TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << idx)) )
++TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled )
++TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))) )
+ TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE )
+ VP_CHECK( tcl_vp, GL_TRUE )
+ VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 )
+ VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 )
+
++#define OUT_VEC(hdr, data) do { \
++ drm_radeon_cmd_header_t h; \
++ h.i = hdr; \
++ OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \
++ OUT_BATCH(0); \
++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \
++ OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1)); \
++ OUT_BATCH_TABLE((data), h.vectors.count); \
++ } while(0)
++
++#define OUT_VECLINEAR(hdr, data) do { \
++ drm_radeon_cmd_header_t h; \
++ uint32_t _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8); \
++ uint32_t _sz = h.veclinear.count * 4; \
++ h.i = hdr; \
++ OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \
++ OUT_BATCH(0); \
++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \
++ OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1)); \
++ OUT_BATCH_TABLE((data), _sz); \
++ } while(0)
++
++#define OUT_SCL(hdr, data) do { \
++ drm_radeon_cmd_header_t h; \
++ h.i = hdr; \
++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \
++ OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \
++ OUT_BATCH_TABLE((data), h.scalars.count); \
++ } while(0)
++
++#define OUT_SCL2(hdr, data) do { \
++ drm_radeon_cmd_header_t h; \
++ h.i = hdr; \
++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \
++ OUT_BATCH((h.scalars.offset + 0x100) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \
++ OUT_BATCH_TABLE((data), h.scalars.count); \
++ } while(0)
++
++static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r200ContextPtr r200 = R200_CONTEXT(ctx);
++ BATCH_LOCALS(&r200->radeon);
++ uint32_t dwords = atom->cmd_size;
++
++ dwords += 6;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1));
++ OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18));
++ END_BATCH();
++}
++
++static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r200ContextPtr r200 = R200_CONTEXT(ctx);
++ BATCH_LOCALS(&r200->radeon);
++ uint32_t dwords = atom->cmd_size;
++
++ dwords += 8;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
++ OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
++ END_BATCH();
++}
++
++static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r200ContextPtr r200 = R200_CONTEXT(ctx);
++ BATCH_LOCALS(&r200->radeon);
++ uint32_t dwords = atom->cmd_size;
++
++ dwords += 8;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1);
++ OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1);
++ END_BATCH();
++}
++
++static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r200ContextPtr r200 = R200_CONTEXT(ctx);
++ BATCH_LOCALS(&r200->radeon);
++ uint32_t dwords = atom->cmd_size;
++
++ dwords += 4;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_VECLINEAR(atom->cmd[0], atom->cmd+1);
++ END_BATCH();
++}
++
++static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r200ContextPtr r200 = R200_CONTEXT(ctx);
++ BATCH_LOCALS(&r200->radeon);
++ uint32_t dwords = atom->cmd_size;
++
++ dwords += 2;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_SCL(atom->cmd[0], atom->cmd+1);
++ END_BATCH();
++}
++
++
++static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r200ContextPtr r200 = R200_CONTEXT(ctx);
++ BATCH_LOCALS(&r200->radeon);
++ uint32_t dwords = atom->cmd_size;
++
++ dwords += 4;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_VEC(atom->cmd[0], atom->cmd+1);
++ END_BATCH();
++}
++
++static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r200ContextPtr r200 = R200_CONTEXT(ctx);
++ BATCH_LOCALS(&r200->radeon);
++ struct radeon_renderbuffer *rrb;
++ uint32_t cbpitch;
++ uint32_t zbpitch, depth_fmt;
++ uint32_t dwords = atom->cmd_size;
++
++ /* output the first 7 bytes of context */
++ BEGIN_BATCH_NO_AUTOSTATE(dwords+2+2);
++ OUT_BATCH_TABLE(atom->cmd, 5);
++
++ rrb = radeon_get_depthbuffer(&r200->radeon);
++ if (!rrb) {
++ OUT_BATCH(0);
++ OUT_BATCH(0);
++ } else {
++ zbpitch = (rrb->pitch / rrb->cpp);
++ if (r200->using_hyperz)
++ zbpitch |= RADEON_DEPTH_HYPERZ;
++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++ OUT_BATCH(zbpitch);
++ if (rrb->cpp == 4)
++ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
++ else
++ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
++ }
++
++ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
++ OUT_BATCH(atom->cmd[CTX_CMD_1]);
++ OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
++
++ rrb = radeon_get_colorbuffer(&r200->radeon);
++ if (!rrb || !rrb->bo) {
++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
++ OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
++ } else {
++ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
++ if (rrb->cpp == 4)
++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
++ else
++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
++
++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++ }
++
++ OUT_BATCH(atom->cmd[CTX_CMD_2]);
++
++ if (!rrb || !rrb->bo) {
++ OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
++ } else {
++ cbpitch = (rrb->pitch / rrb->cpp);
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
++ cbpitch |= R200_COLOR_TILE_ENABLE;
++ OUT_BATCH(cbpitch);
++ }
++
++ if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
++ OUT_BATCH_TABLE((atom->cmd + 14), 4);
++
++ END_BATCH();
++}
++
++static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r200ContextPtr r200 = R200_CONTEXT(ctx);
++ BATCH_LOCALS(&r200->radeon);
++ struct radeon_renderbuffer *rrb, *drb;
++ uint32_t cbpitch = 0;
++ uint32_t zbpitch = 0;
++ uint32_t dwords = atom->cmd_size;
++ uint32_t depth_fmt;
++
++ rrb = radeon_get_colorbuffer(&r200->radeon);
++ if (!rrb || !rrb->bo) {
++ return;
++ }
++
++ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
++ if (rrb->cpp == 4)
++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
++ else
++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
++
++ cbpitch = (rrb->pitch / rrb->cpp);
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
++ cbpitch |= R200_COLOR_TILE_ENABLE;
++
++ drb = radeon_get_depthbuffer(&r200->radeon);
++ if (drb) {
++ zbpitch = (drb->pitch / drb->cpp);
++ if (drb->cpp == 4)
++ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
++ else
++ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
++ }
++
++ if (drb)
++ dwords += 4;
++ if (rrb)
++ dwords += 4;
++
++ /* output the first 7 bytes of context */
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++
++ /* In the CS case we need to split this up */
++ OUT_BATCH(CP_PACKET0(packet[0].start, 3));
++ OUT_BATCH_TABLE((atom->cmd + 1), 4);
++
++ if (drb) {
++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++
++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
++ OUT_BATCH(zbpitch);
++ }
++
++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
++ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
++ OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
++ OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
++
++
++ if (rrb) {
++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++ }
++
++ if (rrb) {
++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
++ OUT_BATCH(cbpitch);
++ }
++
++ if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
++ OUT_BATCH_TABLE((atom->cmd + 14), 4);
++ }
++
++ END_BATCH();
++}
++
++static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r200ContextPtr r200 = R200_CONTEXT(ctx);
++ BATCH_LOCALS(&r200->radeon);
++ uint32_t dwords = atom->cmd_size;
++ int i = atom->idx;
++ radeonTexObj *t = r200->state.texture.unit[i].texobj;
++
++ if (t && t->mt && !t->image_override)
++ dwords += 2;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_BATCH_TABLE(atom->cmd, 10);
++ if (t && !t->image_override) {
++ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
++ RADEON_GEM_DOMAIN_VRAM, 0, 0);
++ } else if (!t) {
++ /* workaround for old CS mechanism */
++ OUT_BATCH(r200->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
++ } else if (t->image_override)
++ OUT_BATCH(t->override_offset);
++
++ END_BATCH();
++}
++
++static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r200ContextPtr r200 = R200_CONTEXT(ctx);
++ BATCH_LOCALS(&r200->radeon);
++ uint32_t dwords = atom->cmd_size;
++ int i = atom->idx;
++ radeonTexObj *t = r200->state.texture.unit[i].texobj;
++ GLuint size;
++
++ BEGIN_BATCH_NO_AUTOSTATE(dwords + (2 * 5));
++ OUT_BATCH_TABLE(atom->cmd, 3);
++
++ if (t && !t->image_override) {
++ size = t->mt->totalsize / 6;
++ OUT_BATCH_RELOC(0, t->mt->bo, size, RADEON_GEM_DOMAIN_VRAM, 0, 0);
++ OUT_BATCH_RELOC(0, t->mt->bo, size * 2, RADEON_GEM_DOMAIN_VRAM, 0, 0);
++ OUT_BATCH_RELOC(0, t->mt->bo, size * 3, RADEON_GEM_DOMAIN_VRAM, 0, 0);
++ OUT_BATCH_RELOC(0, t->mt->bo, size * 4, RADEON_GEM_DOMAIN_VRAM, 0, 0);
++ OUT_BATCH_RELOC(0, t->mt->bo, size * 5, RADEON_GEM_DOMAIN_VRAM, 0, 0);
++ }
++ END_BATCH();
++}
+
+ /* Initialize the context's hardware state.
+ */
+ void r200InitState( r200ContextPtr rmesa )
+ {
+- GLcontext *ctx = rmesa->glCtx;
+- GLuint color_fmt, depth_fmt, i;
+- GLint drawPitch, drawOffset;
+-
+- switch ( rmesa->r200Screen->cpp ) {
+- case 2:
+- color_fmt = R200_COLOR_FORMAT_RGB565;
+- break;
+- case 4:
+- color_fmt = R200_COLOR_FORMAT_ARGB8888;
+- break;
+- default:
+- fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
+- exit( -1 );
+- }
++ GLcontext *ctx = rmesa->radeon.glCtx;
++ GLuint i;
+
+- rmesa->state.color.clear = 0x00000000;
++ rmesa->radeon.state.color.clear = 0x00000000;
+
+ switch ( ctx->Visual.depthBits ) {
+ case 16:
+- rmesa->state.depth.clear = 0x0000ffff;
+- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
+- depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z;
+- rmesa->state.stencil.clear = 0x00000000;
++ rmesa->radeon.state.depth.clear = 0x0000ffff;
++ rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffff;
++ rmesa->radeon.state.stencil.clear = 0x00000000;
+ break;
+ case 24:
+- rmesa->state.depth.clear = 0x00ffffff;
+- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
+- depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z;
+- rmesa->state.stencil.clear = 0xffff0000;
++ rmesa->radeon.state.depth.clear = 0x00ffffff;
++ rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffffff;
++ rmesa->radeon.state.stencil.clear = 0xffff0000;
+ break;
+ default:
+ fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
+@@ -225,52 +633,37 @@ void r200InitState( r200ContextPtr rmesa )
+ }
+
+ /* Only have hw stencil when depth buffer is 24 bits deep */
+- rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
++ rmesa->radeon.state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
+ ctx->Visual.depthBits == 24 );
+
+- rmesa->Fallback = 0;
++ rmesa->radeon.Fallback = 0;
+
+- if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
+- drawOffset = rmesa->r200Screen->backOffset;
+- drawPitch = rmesa->r200Screen->backPitch;
+- } else {
+- drawOffset = rmesa->r200Screen->frontOffset;
+- drawPitch = rmesa->r200Screen->frontPitch;
+- }
+-#if 000
+- if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
+- rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
+- rmesa->state.color.drawPitch = rmesa->r200Screen->backPitch;
+- } else {
+- rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
+- rmesa->state.color.drawPitch = rmesa->r200Screen->frontPitch;
+- }
+-
+- rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
+- rmesa->state.pixel.readPitch = rmesa->state.color.drawPitch;
+-#endif
+-
+- rmesa->hw.max_state_size = 0;
++ rmesa->radeon.hw.max_state_size = 0;
+
+ #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX ) \
+ do { \
+ rmesa->hw.ATOM.cmd_size = SZ; \
+- rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \
+- rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \
++ rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
++ rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
+ rmesa->hw.ATOM.name = NM; \
+ rmesa->hw.ATOM.idx = IDX; \
+ rmesa->hw.ATOM.check = check_##CHK; \
+ rmesa->hw.ATOM.dirty = GL_FALSE; \
+- rmesa->hw.max_state_size += SZ * sizeof(int); \
++ rmesa->radeon.hw.max_state_size += SZ * sizeof(int); \
+ } while (0)
+
+
+ /* Allocate state buffers:
+ */
+- if (rmesa->r200Screen->drmSupportsBlendColor)
++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
+ ALLOC_STATE( ctx, always, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
+ else
+ ALLOC_STATE( ctx, always, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 );
++
++ if (rmesa->radeon.radeonScreen->kernel_mm)
++ rmesa->hw.ctx.emit = ctx_emit_cs;
++ else
++ rmesa->hw.ctx.emit = ctx_emit;
+ ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
+ ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+ ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+@@ -282,8 +675,8 @@ void r200InitState( r200ContextPtr rmesa )
+ ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
+ ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
+ ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
+- if (rmesa->r200Screen->drmSupportsFragShader) {
+- if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
++ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
++ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+ /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
+ ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
+ ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
+@@ -303,7 +696,7 @@ void r200InitState( r200ContextPtr rmesa )
+ ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
+ }
+ else {
+- if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
++ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+ ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
+ ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
+ ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+@@ -321,13 +714,18 @@ void r200InitState( r200ContextPtr rmesa )
+ ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+ ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
+ }
+- if (rmesa->r200Screen->drmSupportsCubeMapsR200) {
++
++ for (i = 0; i < 5; i++)
++ rmesa->hw.tex[i].emit = tex_emit;
++ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) {
+ ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
+ ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
+ ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 );
+ ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
+ ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
+ ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
++ for (i = 0; i < 5; i++)
++ rmesa->hw.cube[i].emit = cube_emit;
+ }
+ else {
+ ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
+@@ -337,7 +735,8 @@ void r200InitState( r200ContextPtr rmesa )
+ ALLOC_STATE( cube[4], never, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
+ ALLOC_STATE( cube[5], never, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
+ }
+- if (rmesa->r200Screen->drmSupportsVertexProgram) {
++
++ if (rmesa->radeon.radeonScreen->drmSupportsVertexProgram) {
+ ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
+ ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
+ ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
+@@ -390,13 +789,13 @@ void r200InitState( r200ContextPtr rmesa )
+ ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
+ ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 );
+ ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 );
+- if (rmesa->r200Screen->drmSupportsTriPerf) {
++ if (rmesa->radeon.radeonScreen->drmSupportsTriPerf) {
+ ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
+ }
+ else {
+ ALLOC_STATE( prf, never, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
+ }
+- if (rmesa->r200Screen->drmSupportsPointSprites) {
++ if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) {
+ ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
+ ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 );
+ }
+@@ -409,87 +808,115 @@ void r200InitState( r200ContextPtr rmesa )
+
+ /* Fill in the packet headers:
+ */
+- rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
+- rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
+- rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
+- if (rmesa->r200Screen->drmSupportsBlendColor)
+- rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(R200_EMIT_RB3D_BLENDCOLOR);
+- rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
+- rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
+- rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
+- rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
+- rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
+- rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
+- rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(R200_EMIT_PP_CNTL_X);
+- rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(R200_EMIT_RB3D_DEPTHXY_OFFSET);
+- rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(R200_EMIT_RE_AUX_SCISSOR_CNTL);
+- rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(R200_EMIT_RE_SCISSOR_TL_0);
+- rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(R200_EMIT_SE_VAP_CNTL_STATUS);
+- rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE);
+- rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
+- rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3);
+- rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0);
+- if (rmesa->r200Screen->drmSupportsFragShader) {
+- rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR);
+- rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0);
+- rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
+- rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1);
+- rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
+- rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2);
+- rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
+- rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3);
+- rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
+- rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4);
+- rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
+- rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5);
+- rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
++ rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
++ rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
++ rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
++ rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(rmesa, R200_EMIT_RB3D_BLENDCOLOR);
++ rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
++ rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
++ rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
++ rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
++ rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
++ rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
++ rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CNTL_X);
++ rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(rmesa, R200_EMIT_RB3D_DEPTHXY_OFFSET);
++ rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(rmesa, R200_EMIT_RE_AUX_SCISSOR_CNTL);
++ rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(rmesa, R200_EMIT_RE_SCISSOR_TL_0);
++ rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(rmesa, R200_EMIT_SE_VAP_CNTL_STATUS);
++ rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(rmesa, R200_EMIT_RE_POINTSIZE);
++ rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(rmesa, R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
++ rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TAM_DEBUG3);
++ rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(rmesa, R200_EMIT_TFACTOR_0);
++ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
++ rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(rmesa, R200_EMIT_ATF_TFACTOR);
++ rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_0);
++ rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
++ rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_1);
++ rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
++ rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_2);
++ rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
++ rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_3);
++ rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
++ rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_4);
++ rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
++ rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_5);
++ rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
+ } else {
+- rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
+- rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
+- rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
+- rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
+- rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2);
+- rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
+- rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3);
+- rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
+- rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4);
+- rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
+- rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5);
+- rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
+- }
+- rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0);
+- rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1);
+- rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(R200_EMIT_VAP_PVS_CNTL);
+- rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0);
+- rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0);
+- rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1);
+- rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_1);
+- rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_2);
+- rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_2);
+- rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_3);
+- rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_3);
+- rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_4);
+- rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_4);
+- rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_5);
+- rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_5);
+- rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_0);
+- rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_1);
+- rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_2);
+- rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_3);
+- rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_4);
+- rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_5);
+- rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
+- rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
+- rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
+- rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(R200_EMIT_TEX_PROC_CTL_2);
+- rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(R200_EMIT_MATRIX_SELECT_0);
+- rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(R200_EMIT_VAP_CTL);
+- rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(R200_EMIT_VTX_FMT_0);
+- rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(R200_EMIT_OUTPUT_VTX_COMP_SEL);
+- rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(R200_EMIT_SE_VTX_STATE_CNTL);
+- rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(R200_EMIT_VTE_CNTL);
+- rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(R200_EMIT_PP_TRI_PERF_CNTL);
+- rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(R200_EMIT_TCL_POINT_SPRITE_CNTL);
++ rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_0);
++ rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
++ rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_1);
++ rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
++ rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_2);
++ rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
++ rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_3);
++ rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
++ rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_4);
++ rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
++ rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_5);
++ rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
++ }
++ rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_0);
++ rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_1);
++ rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_PVS_CNTL);
++ rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_0);
++ rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_0);
++ rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_1);
++ rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_1);
++ rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_2);
++ rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_2);
++ rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_3);
++ rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_3);
++ rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_4);
++ rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_4);
++ rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_5);
++ rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_5);
++ rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_0);
++ rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_1);
++ rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_2);
++ rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_3);
++ rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_4);
++ rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_5);
++ rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
++ rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
++ rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(rmesa, R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
++ rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(rmesa, R200_EMIT_TEX_PROC_CTL_2);
++ rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(rmesa, R200_EMIT_MATRIX_SELECT_0);
++ rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_CTL);
++ rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTX_FMT_0);
++ rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(rmesa, R200_EMIT_OUTPUT_VTX_COMP_SEL);
++ rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(rmesa, R200_EMIT_SE_VTX_STATE_CNTL);
++ rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL);
++ rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL);
++ rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL);
++ if (rmesa->radeon.radeonScreen->kernel_mm) {
++ rmesa->hw.mtl[0].emit = mtl_emit;
++ rmesa->hw.mtl[1].emit = mtl_emit;
++
++ rmesa->hw.vpi[0].emit = veclinear_emit;
++ rmesa->hw.vpi[1].emit = veclinear_emit;
++ rmesa->hw.vpp[0].emit = veclinear_emit;
++ rmesa->hw.vpp[1].emit = veclinear_emit;
++
++ rmesa->hw.grd.emit = scl_emit;
++ rmesa->hw.fog.emit = vec_emit;
++ rmesa->hw.glt.emit = vec_emit;
++ rmesa->hw.eye.emit = vec_emit;
++
++ for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++)
++ rmesa->hw.mat[i].emit = vec_emit;
++
++ for (i = 0; i < 8; i++)
++ rmesa->hw.lit[i].emit = lit_emit;
++
++ for (i = 0; i < 6; i++)
++ rmesa->hw.ucp[i].emit = vec_emit;
++
++ rmesa->hw.ptp.emit = ptp_emit;
++ }
++
++
++
+ rmesa->hw.mtl[0].cmd[MTL_CMD_0] =
+ cmdvec( R200_VS_MAT_0_EMISS, 1, 16 );
+ rmesa->hw.mtl[0].cmd[MTL_CMD_1] =
+@@ -567,7 +994,7 @@ void r200InitState( r200ContextPtr rmesa )
+ (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+ (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
+
+- if (rmesa->r200Screen->drmSupportsBlendColor) {
++ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000;
+ rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
+ (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+@@ -578,18 +1005,17 @@ void r200InitState( r200ContextPtr rmesa )
+ }
+
+ rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
+- rmesa->r200Screen->depthOffset + rmesa->r200Screen->fbLocation;
++ rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation;
+
+ rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] =
+- ((rmesa->r200Screen->depthPitch &
++ ((rmesa->radeon.radeonScreen->depthPitch &
+ R200_DEPTHPITCH_MASK) |
+ R200_DEPTH_ENDIAN_NO_SWAP);
+
+ if (rmesa->using_hyperz)
+ rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ;
+
+- rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
+- R200_Z_TEST_LESS |
++ rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (R200_Z_TEST_LESS |
+ R200_STENCIL_TEST_ALWAYS |
+ R200_STENCIL_FAIL_KEEP |
+ R200_STENCIL_ZPASS_KEEP |
+@@ -599,15 +1025,14 @@ void r200InitState( r200ContextPtr rmesa )
+ if (rmesa->using_hyperz) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE |
+ R200_Z_DECOMPRESSION_ENABLE;
+-/* if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200)
++/* if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
+ rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
+ }
+
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE
+ | R200_TEX_BLEND_0_ENABLE);
+
+- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = color_fmt;
+- switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) {
++ switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
+ case DRI_CONF_DITHER_XERRORDIFFRESET:
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT;
+ break;
+@@ -615,41 +1040,19 @@ void r200InitState( r200ContextPtr rmesa )
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE;
+ break;
+ }
+- if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) ==
++ if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
+ DRI_CONF_ROUND_ROUND )
+- rmesa->state.color.roundEnable = R200_ROUND_ENABLE;
++ rmesa->radeon.state.color.roundEnable = R200_ROUND_ENABLE;
+ else
+- rmesa->state.color.roundEnable = 0;
+- if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) ==
++ rmesa->radeon.state.color.roundEnable = 0;
++ if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
+ DRI_CONF_COLOR_REDUCTION_DITHER )
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE;
+ else
+- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
+-
+-#if 000
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->state.color.drawOffset +
+- rmesa->r200Screen->fbLocation)
+- & R200_COLOROFFSET_MASK);
+-
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch &
+- R200_COLORPITCH_MASK) |
+- R200_COLOR_ENDIAN_NO_SWAP);
+-#else
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
+- rmesa->r200Screen->fbLocation)
+- & R200_COLOROFFSET_MASK);
+-
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
+- R200_COLORPITCH_MASK) |
+- R200_COLOR_ENDIAN_NO_SWAP);
+-#endif
+- /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
+- if (rmesa->sarea->tiling_enabled) {
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+- }
++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
+
+ rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK *
+- driQueryOptionf (&rmesa->optionCache,"texture_blend_quality");
++ driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality");
+ rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0;
+
+ rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW |
+@@ -704,7 +1107,7 @@ void r200InitState( r200ContextPtr rmesa )
+ R200_VC_NO_SWAP;
+ #endif
+
+- if (!(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) {
++ if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+ /* Bypass TCL */
+ rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8);
+ }
+@@ -743,28 +1146,28 @@ void r200InitState( r200ContextPtr rmesa )
+ rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
+ (/* R200_TEXCOORD_PROJ | */
+ 0x100000); /* Small default bias */
+- if (rmesa->r200Screen->drmSupportsFragShader) {
++ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+ rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
+- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
+ rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
+ }
+ else {
+ rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] =
+- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ }
+
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =
+- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] =
+- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] =
+- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] =
+- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] =
+- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+
+ rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] =
+ (R200_TXC_ARG_A_ZERO |
+@@ -967,5 +1370,7 @@ void r200InitState( r200ContextPtr rmesa )
+
+ r200LightingSpaceChange( ctx );
+
+- rmesa->hw.all_dirty = GL_TRUE;
++ rmesa->radeon.hw.all_dirty = GL_TRUE;
++
++ rcommonInitCmdBuf(&rmesa->radeon);
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
+index b25f028..b006409 100644
+--- a/src/mesa/drivers/dri/r200/r200_swtcl.c
++++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
+@@ -55,27 +55,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r200_tcl.h"
+
+
+-static void flush_last_swtcl_prim( r200ContextPtr rmesa );
+-
+-
+ /***********************************************************************
+ * Initialization
+ ***********************************************************************/
+
+ #define EMIT_ATTR( ATTR, STYLE, F0 ) \
+ do { \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \
+- rmesa->swtcl.vertex_attr_count++; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \
++ rmesa->radeon.swtcl.vertex_attr_count++; \
+ fmt_0 |= F0; \
+ } while (0)
+
+ #define EMIT_PAD( N ) \
+ do { \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \
+- rmesa->swtcl.vertex_attr_count++; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \
++ rmesa->radeon.swtcl.vertex_attr_count++; \
+ } while (0)
+
+ static void r200SetVertexFormat( GLcontext *ctx )
+@@ -100,7 +97,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
+ }
+
+ assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+- rmesa->swtcl.vertex_attr_count = 0;
++ rmesa->radeon.swtcl.vertex_attr_count = 0;
+
+ /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+ * build up a hardware vertex.
+@@ -185,7 +182,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
+ rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_SPEC_ALPHA;
+ }
+
+- if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) ||
++ if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
+ (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) ||
+ (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
+ R200_NEWPRIM(rmesa);
+@@ -193,26 +190,20 @@ static void r200SetVertexFormat( GLcontext *ctx )
+ rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0;
+ rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1;
+
+- rmesa->swtcl.vertex_size =
++ rmesa->radeon.swtcl.vertex_size =
+ _tnl_install_attrs( ctx,
+- rmesa->swtcl.vertex_attrs,
+- rmesa->swtcl.vertex_attr_count,
++ rmesa->radeon.swtcl.vertex_attrs,
++ rmesa->radeon.swtcl.vertex_attr_count,
+ NULL, 0 );
+- rmesa->swtcl.vertex_size /= 4;
+- RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
++ rmesa->radeon.swtcl.vertex_size /= 4;
++ RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
+ }
+ }
+
+
+ static void r200RenderStart( GLcontext *ctx )
+ {
+- r200ContextPtr rmesa = R200_CONTEXT( ctx );
+-
+ r200SetVertexFormat( ctx );
+-
+- if (rmesa->dma.flush != 0 &&
+- rmesa->dma.flush != flush_last_swtcl_prim)
+- rmesa->dma.flush( rmesa );
+ }
+
+
+@@ -232,7 +223,7 @@ void r200ChooseVertexState( GLcontext *ctx )
+ * rasterization fallback. As this function will be called again when we
+ * leave a rasterization fallback, we can just skip it for now.
+ */
+- if (rmesa->Fallback != 0)
++ if (rmesa->radeon.Fallback != 0)
+ return;
+
+ vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
+@@ -273,78 +264,27 @@ void r200ChooseVertexState( GLcontext *ctx )
+ }
+ }
+
+-
+-/* Flush vertices in the current dma region.
+- */
+-static void flush_last_swtcl_prim( r200ContextPtr rmesa )
+-{
+- if (R200_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- rmesa->dma.flush = NULL;
+-
+- if (rmesa->dma.current.buf) {
+- struct r200_dma_region *current = &rmesa->dma.current;
+- GLuint current_offset = (rmesa->r200Screen->gart_buffer_offset +
+- current->buf->buf->idx * RADEON_BUFFER_SIZE +
+- current->start);
+-
+- assert (!(rmesa->swtcl.hw_primitive & R200_VF_PRIM_WALK_IND));
+-
+- assert (current->start +
+- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+- current->ptr);
+-
+- if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+- r200EnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
+- rmesa->hw.max_state_size + VBUF_BUFSZ );
+- r200EmitVertexAOS( rmesa,
+- rmesa->swtcl.vertex_size,
+- current_offset);
+-
+- r200EmitVbufPrim( rmesa,
+- rmesa->swtcl.hw_primitive,
+- rmesa->swtcl.numverts);
+- }
+-
+- rmesa->swtcl.numverts = 0;
+- current->start = current->ptr;
+- }
+-}
+-
+-
+-/* Alloc space in the current dma region.
+- */
+-static INLINE void *
+-r200AllocDmaLowVerts( r200ContextPtr rmesa, int nverts, int vsize )
++void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
+ {
+- GLuint bytes = vsize * nverts;
+-
+- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
+- r200RefillCurrentDmaRegion( rmesa );
+-
+- if (!rmesa->dma.flush) {
+- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+- rmesa->dma.flush = flush_last_swtcl_prim;
+- }
++ r200ContextPtr rmesa = R200_CONTEXT(ctx);
++ rcommonEnsureCmdBufSpace(&rmesa->radeon,
++ rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
++ __FUNCTION__);
+
+- ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+- ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
+- ASSERT( rmesa->dma.current.start +
+- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+- rmesa->dma.current.ptr );
+
++ radeonEmitState(&rmesa->radeon);
++ r200EmitVertexAOS( rmesa,
++ rmesa->radeon.swtcl.vertex_size,
++ rmesa->radeon.dma.current,
++ current_offset);
+
+- {
+- GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
+- rmesa->dma.current.ptr += bytes;
+- rmesa->swtcl.numverts += nverts;
+- return head;
+- }
++
++ r200EmitVbufPrim( rmesa,
++ rmesa->radeon.swtcl.hw_primitive,
++ rmesa->radeon.swtcl.numverts);
+
+ }
+
+-
+ /**************************************************************************/
+
+
+@@ -392,13 +332,13 @@ static void r200ResetLineStipple( GLcontext *ctx );
+ #undef LOCAL_VARS
+ #undef ALLOC_VERTS
+ #define CTX_ARG r200ContextPtr rmesa
+-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
+-#define ALLOC_VERTS( n, size ) r200AllocDmaLowVerts( rmesa, n, size * 4 )
++#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
++#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 )
+ #define LOCAL_VARS \
+ r200ContextPtr rmesa = R200_CONTEXT(ctx); \
+- const char *r200verts = (char *)rmesa->swtcl.verts;
+-#define VERT(x) (r200Vertex *)(r200verts + ((x) * vertsize * sizeof(int)))
+-#define VERTEX r200Vertex
++ const char *r200verts = (char *)rmesa->radeon.swtcl.verts;
++#define VERT(x) (radeonVertex *)(r200verts + ((x) * vertsize * sizeof(int)))
++#define VERTEX radeonVertex
+ #define DO_DEBUG_VERTS (1 && (R200_DEBUG & DEBUG_VERTS))
+
+ #undef TAG
+@@ -456,11 +396,11 @@ static struct {
+ #define VERT_Y(_v) _v->v.y
+ #define VERT_Z(_v) _v->v.z
+ #define AREA_IS_CCW( a ) (a < 0)
+-#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
++#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
+
+ #define VERT_SET_RGBA( v, c ) \
+ do { \
+- r200_color_t *color = (r200_color_t *)&((v)->ui[coloroffset]); \
++ radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \
+@@ -472,7 +412,7 @@ do { \
+ #define VERT_SET_SPEC( v, c ) \
+ do { \
+ if (specoffset) { \
+- r200_color_t *spec = (r200_color_t *)&((v)->ui[specoffset]); \
++ radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]); \
+@@ -481,8 +421,8 @@ do { \
+ #define VERT_COPY_SPEC( v0, v1 ) \
+ do { \
+ if (specoffset) { \
+- r200_color_t *spec0 = (r200_color_t *)&((v0)->ui[specoffset]); \
+- r200_color_t *spec1 = (r200_color_t *)&((v1)->ui[specoffset]); \
++ radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]); \
++ radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]); \
+ spec0->red = spec1->red; \
+ spec0->green = spec1->green; \
+ spec0->blue = spec1->blue; \
+@@ -513,7 +453,7 @@ do { \
+ ***********************************************************************/
+
+ #define RASTERIZE(x) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, x) )
+-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
++#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+ #undef TAG
+ #define TAG(x) x
+ #include "tnl_dd/t_dd_unfilled.h"
+@@ -569,8 +509,8 @@ static void init_rast_tab( void )
+ #undef LOCAL_VARS
+ #define LOCAL_VARS \
+ r200ContextPtr rmesa = R200_CONTEXT(ctx); \
+- const GLuint vertsize = rmesa->swtcl.vertex_size; \
+- const char *r200verts = (char *)rmesa->swtcl.verts; \
++ const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \
++ const char *r200verts = (char *)rmesa->radeon.swtcl.verts; \
+ const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \
+ const GLboolean stipple = ctx->Line.StippleFlag; \
+ (void) elt; (void) stipple;
+@@ -599,13 +539,13 @@ void r200ChooseRenderState( GLcontext *ctx )
+ GLuint index = 0;
+ GLuint flags = ctx->_TriangleCaps;
+
+- if (!rmesa->TclFallback || rmesa->Fallback)
++ if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback)
+ return;
+
+ if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R200_TWOSIDE_BIT;
+ if (flags & DD_TRI_UNFILLED) index |= R200_UNFILLED_BIT;
+
+- if (index != rmesa->swtcl.RenderIndex) {
++ if (index != rmesa->radeon.swtcl.RenderIndex) {
+ tnl->Driver.Render.Points = rast_tab[index].points;
+ tnl->Driver.Render.Line = rast_tab[index].line;
+ tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+@@ -622,7 +562,7 @@ void r200ChooseRenderState( GLcontext *ctx )
+ tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+ }
+
+- rmesa->swtcl.RenderIndex = index;
++ rmesa->radeon.swtcl.RenderIndex = index;
+ }
+ }
+
+@@ -636,7 +576,7 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+- if (rmesa->swtcl.hw_primitive != hwprim) {
++ if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+ /* need to disable perspective-correct texturing for point sprites */
+ if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) {
+ if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) {
+@@ -649,14 +589,14 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+ rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE;
+ }
+ R200_NEWPRIM( rmesa );
+- rmesa->swtcl.hw_primitive = hwprim;
++ rmesa->radeon.swtcl.hw_primitive = hwprim;
+ }
+ }
+
+ static void r200RenderPrimitive( GLcontext *ctx, GLenum prim )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- rmesa->swtcl.render_primitive = prim;
++ rmesa->radeon.swtcl.render_primitive = prim;
+ if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED))
+ r200RasterPrimitive( ctx, reduced_hw_prim(ctx, prim) );
+ }
+@@ -701,15 +641,15 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+- GLuint oldfallback = rmesa->Fallback;
++ GLuint oldfallback = rmesa->radeon.Fallback;
+
+ if (mode) {
+- rmesa->Fallback |= bit;
++ rmesa->radeon.Fallback |= bit;
+ if (oldfallback == 0) {
+- R200_FIREVERTICES( rmesa );
++ radeon_firevertices(&rmesa->radeon);
+ TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE );
+ _swsetup_Wakeup( ctx );
+- rmesa->swtcl.RenderIndex = ~0;
++ rmesa->radeon.swtcl.RenderIndex = ~0;
+ if (R200_DEBUG & DEBUG_FALLBACKS) {
+ fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n",
+ bit, getFallbackString(bit));
+@@ -717,7 +657,7 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ }
+ }
+ else {
+- rmesa->Fallback &= ~bit;
++ rmesa->radeon.Fallback &= ~bit;
+ if (oldfallback == bit) {
+
+ _swrast_flush( ctx );
+@@ -731,14 +671,14 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+
+ tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple;
+ TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_FALSE );
+- if (rmesa->TclFallback) {
+- /* These are already done if rmesa->TclFallback goes to
++ if (rmesa->radeon.TclFallback) {
++ /* These are already done if rmesa->radeon.TclFallback goes to
+ * zero above. But not if it doesn't (R200_NO_TCL for
+ * example?)
+ */
+ _tnl_invalidate_vertex_state( ctx, ~0 );
+ _tnl_invalidate_vertices( ctx, ~0 );
+- RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
++ RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
+ r200ChooseVertexState( ctx );
+ r200ChooseRenderState( ctx );
+ }
+@@ -772,7 +712,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ const GLfloat *rc = ctx->Current.RasterColor;
+ GLint row, col;
+- r200Vertex vert;
++ radeonVertex vert;
+ GLuint orig_vte;
+ GLuint h;
+
+@@ -794,7 +734,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+ vte |= R200_VTX_W0_FMT;
+ vap &= ~R200_VAP_FORCE_W_TO_ONE;
+
+- rmesa->swtcl.vertex_size = 5;
++ rmesa->radeon.swtcl.vertex_size = 5;
+
+ if ( (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0)
+ || (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
+@@ -871,10 +811,10 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+
+ /* Update window height
+ */
+- LOCK_HARDWARE( rmesa );
+- UNLOCK_HARDWARE( rmesa );
+- h = rmesa->dri.drawable->h + rmesa->dri.drawable->y;
+- px += rmesa->dri.drawable->x;
++ LOCK_HARDWARE( &rmesa->radeon );
++ UNLOCK_HARDWARE( &rmesa->radeon );
++ h = rmesa->radeon.dri.drawable->h + rmesa->radeon.dri.drawable->y;
++ px += rmesa->radeon.dri.drawable->x;
+
+ /* Clipping handled by existing mechansims in r200_ioctl.c?
+ */
+@@ -929,7 +869,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+
+ /* Need to restore vertexformat?
+ */
+- if (rmesa->TclFallback)
++ if (rmesa->radeon.TclFallback)
+ r200ChooseVertexState( ctx );
+ }
+
+@@ -962,17 +902,13 @@ void r200InitSwtcl( GLcontext *ctx )
+ _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+ 36 * sizeof(GLfloat) );
+
+- rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+- rmesa->swtcl.RenderIndex = ~0;
+- rmesa->swtcl.render_primitive = GL_TRIANGLES;
+- rmesa->swtcl.hw_primitive = 0;
++ rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
++ rmesa->radeon.swtcl.RenderIndex = ~0;
++ rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
++ rmesa->radeon.swtcl.hw_primitive = 0;
+ }
+
+
+ void r200DestroySwtcl( GLcontext *ctx )
+ {
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+- if (rmesa->swtcl.indexed_verts.buf)
+- r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ );
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.h b/src/mesa/drivers/dri/r200/r200_swtcl.h
+index 8c29fd0..a4051a4 100644
+--- a/src/mesa/drivers/dri/r200/r200_swtcl.h
++++ b/src/mesa/drivers/dri/r200/r200_swtcl.h
+@@ -52,15 +52,11 @@ extern void r200BuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+ extern void r200PrintSetupFlags(char *msg, GLuint flags );
+
+
+-extern void r200_emit_indexed_verts( GLcontext *ctx,
+- GLuint start,
+- GLuint count );
+-
+ extern void r200_translate_vertex( GLcontext *ctx,
+- const r200Vertex *src,
++ const radeonVertex *src,
+ SWvertex *dst );
+
+-extern void r200_print_vertex( GLcontext *ctx, const r200Vertex *v );
++extern void r200_print_vertex( GLcontext *ctx, const radeonVertex *v );
+
+ extern void r200_import_float_colors( GLcontext *ctx );
+ extern void r200_import_float_spec_colors( GLcontext *ctx );
+@@ -70,5 +66,5 @@ extern void r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLubyte *bitmap );
+
+-
++void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
+ #endif
+diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
+index 99aecfe..8e0fb14 100644
+--- a/src/mesa/drivers/dri/r200/r200_tcl.c
++++ b/src/mesa/drivers/dri/r200/r200_tcl.c
+@@ -123,7 +123,7 @@ static GLboolean discrete_prim[0x10] = {
+
+ #define RESET_STIPPLE() do { \
+ R200_STATECHANGE( rmesa, lin ); \
+- r200EmitState( rmesa ); \
++ radeonEmitState(&rmesa->radeon); \
+ } while (0)
+
+ #define AUTO_STIPPLE( mode ) do { \
+@@ -134,7 +134,7 @@ static GLboolean discrete_prim[0x10] = {
+ else \
+ rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
+ ~R200_LINE_PATTERN_AUTO_RESET; \
+- r200EmitState( rmesa ); \
++ radeonEmitState(&rmesa->radeon); \
+ } while (0)
+
+
+@@ -142,25 +142,23 @@ static GLboolean discrete_prim[0x10] = {
+
+ static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr )
+ {
+- if (rmesa->dma.flush == r200FlushElts &&
+- rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) {
++ if (rmesa->radeon.dma.flush == r200FlushElts &&
++ rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) {
+
+- GLushort *dest = (GLushort *)(rmesa->store.cmd_buf +
+- rmesa->store.cmd_used);
++ GLushort *dest = (GLushort *)(rmesa->tcl.elt_dma_bo->ptr +
++ rmesa->tcl.elt_used);
+
+- rmesa->store.cmd_used += nr*2;
++ rmesa->tcl.elt_used += nr*2;
+
+ return dest;
+ }
+ else {
+- if (rmesa->dma.flush)
+- rmesa->dma.flush( rmesa );
++ if (rmesa->radeon.dma.flush)
++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+- r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
+- rmesa->hw.max_state_size + ELTS_BUFSZ(nr) );
++ rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->tcl.nr_aos_components), __FUNCTION__);
+
+ r200EmitAOS( rmesa,
+- rmesa->tcl.aos_components,
+ rmesa->tcl.nr_aos_components, 0 );
+
+ return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr );
+@@ -188,13 +186,14 @@ static void r200EmitPrim( GLcontext *ctx,
+ r200ContextPtr rmesa = R200_CONTEXT( ctx );
+ r200TclPrimitive( ctx, prim, hwprim );
+
+- r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
+- rmesa->hw.max_state_size + VBUF_BUFSZ );
++ // fprintf(stderr,"Emit prim %d\n", rmesa->tcl.nr_aos_components);
++ rcommonEnsureCmdBufSpace( &rmesa->radeon,
++ AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
++ rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ );
+
+ r200EmitAOS( rmesa,
+- rmesa->tcl.aos_components,
+- rmesa->tcl.nr_aos_components,
+- start );
++ rmesa->tcl.nr_aos_components,
++ start );
+
+ /* Why couldn't this packet have taken an offset param?
+ */
+@@ -394,7 +393,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
+
+ /* TODO: separate this from the swtnl pipeline
+ */
+- if (rmesa->TclFallback)
++ if (rmesa->radeon.TclFallback)
+ return GL_TRUE; /* fallback to software t&l */
+
+ if (R200_DEBUG & DEBUG_PRIMS)
+@@ -405,8 +404,9 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
+
+ /* Validate state:
+ */
+- if (rmesa->NewGLState)
+- r200ValidateState( ctx );
++ if (rmesa->radeon.NewGLState)
++ if (!r200ValidateState( ctx ))
++ return GL_TRUE; /* fallback to sw t&l */
+
+ if (!ctx->VertexProgram._Enabled) {
+ /* NOTE: inputs != tnl->render_inputs - these are the untransformed
+@@ -565,15 +565,11 @@ static void transition_to_hwtnl( GLcontext *ctx )
+
+ tnl->Driver.NotifyMaterialChange = r200UpdateMaterial;
+
+- if ( rmesa->dma.flush )
+- rmesa->dma.flush( rmesa );
++ if ( rmesa->radeon.dma.flush )
++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+- rmesa->dma.flush = NULL;
++ rmesa->radeon.dma.flush = NULL;
+
+- if (rmesa->swtcl.indexed_verts.buf)
+- r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
+- __FUNCTION__ );
+-
+ R200_STATECHANGE( rmesa, vap );
+ rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE;
+ rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE;
+@@ -631,10 +627,10 @@ static char *getFallbackString(GLuint bit)
+ void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- GLuint oldfallback = rmesa->TclFallback;
++ GLuint oldfallback = rmesa->radeon.TclFallback;
+
+ if (mode) {
+- rmesa->TclFallback |= bit;
++ rmesa->radeon.TclFallback |= bit;
+ if (oldfallback == 0) {
+ if (R200_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "R200 begin tcl fallback %s\n",
+@@ -643,7 +639,7 @@ void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ }
+ }
+ else {
+- rmesa->TclFallback &= ~bit;
++ rmesa->radeon.TclFallback &= ~bit;
+ if (oldfallback == bit) {
+ if (R200_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "R200 end tcl fallback %s\n",
+diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
+index 5a4db33..19a6cad 100644
+--- a/src/mesa/drivers/dri/r200/r200_tex.c
++++ b/src/mesa/drivers/dri/r200/r200_tex.c
+@@ -43,8 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/teximage.h"
+ #include "main/texobj.h"
+
+-#include "texmem.h"
+-
++#include "radeon_mipmap_tree.h"
+ #include "r200_context.h"
+ #include "r200_state.h"
+ #include "r200_ioctl.h"
+@@ -63,10 +62,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * \param twrap Wrap mode for the \a t texture coordinate
+ */
+
+-static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
++static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
+ {
+ GLboolean is_clamp = GL_FALSE;
+ GLboolean is_clamp_to_border = GL_FALSE;
++ struct gl_texture_object *tObj = &t->base;
+
+ t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D);
+
+@@ -103,7 +103,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum
+ _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
+ }
+
+- if (t->base.tObj->Target != GL_TEXTURE_1D) {
++ if (tObj->Target != GL_TEXTURE_1D) {
+ switch ( twrap ) {
+ case GL_REPEAT:
+ t->pp_txfilter |= R200_CLAMP_T_WRAP;
+@@ -180,7 +180,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum
+ t->border_fallback = (is_clamp && is_clamp_to_border);
+ }
+
+-static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
++static void r200SetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max )
+ {
+ t->pp_txfilter &= ~R200_MAX_ANISO_MASK;
+
+@@ -205,10 +205,13 @@ static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
+ * \param magf Texture magnification mode
+ */
+
+-static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf )
++static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
+ {
+ GLuint anisotropy = (t->pp_txfilter & R200_MAX_ANISO_MASK);
+
++ /* Force revalidation to account for switches from/to mipmapping. */
++ t->validated = GL_FALSE;
++
+ t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK);
+ t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK;
+
+@@ -267,693 +270,12 @@ static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf )
+ }
+ }
+
+-static void r200SetTexBorderColor( r200TexObjPtr t, GLubyte c[4] )
+-{
+- t->pp_border_color = r200PackColor( 4, c[0], c[1], c[2], c[3] );
+-}
+-
+-
+-/**
+- * Allocate space for and load the mesa images into the texture memory block.
+- * This will happen before drawing with a new texture, or drawing with a
+- * texture after it was swapped out or teximaged again.
+- */
+-
+-static r200TexObjPtr r200AllocTexObj( struct gl_texture_object *texObj )
+-{
+- r200TexObjPtr t;
+-
+- t = CALLOC_STRUCT( r200_tex_obj );
+- texObj->DriverData = t;
+- if ( t != NULL ) {
+- if ( R200_DEBUG & DEBUG_TEXTURE ) {
+- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj,
+- (void *)t );
+- }
+-
+- /* Initialize non-image-dependent parts of the state:
+- */
+- t->base.tObj = texObj;
+- t->border_fallback = GL_FALSE;
+-
+- make_empty_list( & t->base );
+-
+- r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR );
+- r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
+- r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+- r200SetTexBorderColor( t, texObj->_BorderChan );
+- }
+-
+- return t;
+-}
+-
+-/* try to find a format which will only need a memcopy */
+-static const struct gl_texture_format *
+-r200Choose8888TexFormat( GLenum srcFormat, GLenum srcType )
+-{
+- const GLuint ui = 1;
+- const GLubyte littleEndian = *((const GLubyte *) &ui);
+-
+- if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
+- return &_mesa_texformat_rgba8888;
+- }
+- else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
+- return &_mesa_texformat_rgba8888_rev;
+- }
+- else return _dri_texformat_argb8888;
+-}
+-
+-static const struct gl_texture_format *
+-r200ChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+- GLenum format, GLenum type )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- const GLboolean do32bpt =
+- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
+- const GLboolean force16bpt =
+- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
+- (void) format;
+-
+- switch ( internalFormat ) {
+- case 4:
+- case GL_RGBA:
+- case GL_COMPRESSED_RGBA:
+- switch ( type ) {
+- case GL_UNSIGNED_INT_10_10_10_2:
+- case GL_UNSIGNED_INT_2_10_10_10_REV:
+- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555;
+- case GL_UNSIGNED_SHORT_4_4_4_4:
+- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+- return _dri_texformat_argb4444;
+- case GL_UNSIGNED_SHORT_5_5_5_1:
+- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+- return _dri_texformat_argb1555;
+- default:
+- return do32bpt ?
+- r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444;
+- }
+-
+- case 3:
+- case GL_RGB:
+- case GL_COMPRESSED_RGB:
+- switch ( type ) {
+- case GL_UNSIGNED_SHORT_4_4_4_4:
+- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+- return _dri_texformat_argb4444;
+- case GL_UNSIGNED_SHORT_5_5_5_1:
+- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+- return _dri_texformat_argb1555;
+- case GL_UNSIGNED_SHORT_5_6_5:
+- case GL_UNSIGNED_SHORT_5_6_5_REV:
+- return _dri_texformat_rgb565;
+- default:
+- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
+- }
+-
+- case GL_RGBA8:
+- case GL_RGB10_A2:
+- case GL_RGBA12:
+- case GL_RGBA16:
+- return !force16bpt ?
+- r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444;
+-
+- case GL_RGBA4:
+- case GL_RGBA2:
+- return _dri_texformat_argb4444;
+-
+- case GL_RGB5_A1:
+- return _dri_texformat_argb1555;
+-
+- case GL_RGB8:
+- case GL_RGB10:
+- case GL_RGB12:
+- case GL_RGB16:
+- return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
+-
+- case GL_RGB5:
+- case GL_RGB4:
+- case GL_R3_G3_B2:
+- return _dri_texformat_rgb565;
+-
+- case GL_ALPHA:
+- case GL_ALPHA4:
+- case GL_ALPHA8:
+- case GL_ALPHA12:
+- case GL_ALPHA16:
+- case GL_COMPRESSED_ALPHA:
+- /* can't use a8 format since interpreting hw I8 as a8 would result
+- in wrong rgb values (same as alpha value instead of 0). */
+- return _dri_texformat_al88;
+-
+- case 1:
+- case GL_LUMINANCE:
+- case GL_LUMINANCE4:
+- case GL_LUMINANCE8:
+- case GL_LUMINANCE12:
+- case GL_LUMINANCE16:
+- case GL_COMPRESSED_LUMINANCE:
+- return _dri_texformat_l8;
+-
+- case 2:
+- case GL_LUMINANCE_ALPHA:
+- case GL_LUMINANCE4_ALPHA4:
+- case GL_LUMINANCE6_ALPHA2:
+- case GL_LUMINANCE8_ALPHA8:
+- case GL_LUMINANCE12_ALPHA4:
+- case GL_LUMINANCE12_ALPHA12:
+- case GL_LUMINANCE16_ALPHA16:
+- case GL_COMPRESSED_LUMINANCE_ALPHA:
+- return _dri_texformat_al88;
+-
+- case GL_INTENSITY:
+- case GL_INTENSITY4:
+- case GL_INTENSITY8:
+- case GL_INTENSITY12:
+- case GL_INTENSITY16:
+- case GL_COMPRESSED_INTENSITY:
+- return _dri_texformat_i8;
+-
+- case GL_YCBCR_MESA:
+- if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+- type == GL_UNSIGNED_BYTE)
+- return &_mesa_texformat_ycbcr;
+- else
+- return &_mesa_texformat_ycbcr_rev;
+-
+- case GL_RGB_S3TC:
+- case GL_RGB4_S3TC:
+- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+- return &_mesa_texformat_rgb_dxt1;
+-
+- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+- return &_mesa_texformat_rgba_dxt1;
+-
+- case GL_RGBA_S3TC:
+- case GL_RGBA4_S3TC:
+- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+- return &_mesa_texformat_rgba_dxt3;
+-
+- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+- return &_mesa_texformat_rgba_dxt5;
+-
+- default:
+- _mesa_problem(ctx,
+- "unexpected internalFormat 0x%x in r200ChooseTextureFormat",
+- (int) internalFormat);
+- return NULL;
+- }
+-
+- return NULL; /* never get here */
+-}
+-
+-
+-static GLboolean
+-r200ValidateClientStorage( GLcontext *ctx, GLenum target,
+- GLint internalFormat,
+- GLint srcWidth, GLint srcHeight,
+- GLenum format, GLenum type, const void *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage)
+-
+-{
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+- if ( R200_DEBUG & DEBUG_TEXTURE )
+- fprintf(stderr, "intformat %s format %s type %s\n",
+- _mesa_lookup_enum_by_nr( internalFormat ),
+- _mesa_lookup_enum_by_nr( format ),
+- _mesa_lookup_enum_by_nr( type ));
+-
+- if (!ctx->Unpack.ClientStorage)
+- return 0;
+-
+- if (ctx->_ImageTransferState ||
+- texImage->IsCompressed ||
+- texObj->GenerateMipmap)
+- return 0;
+-
+-
+- /* This list is incomplete, may be different on ppc???
+- */
+- switch ( internalFormat ) {
+- case GL_RGBA:
+- if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
+- texImage->TexFormat = _dri_texformat_argb8888;
+- }
+- else
+- return 0;
+- break;
+-
+- case GL_RGB:
+- if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
+- texImage->TexFormat = _dri_texformat_rgb565;
+- }
+- else
+- return 0;
+- break;
+-
+- case GL_YCBCR_MESA:
+- if ( format == GL_YCBCR_MESA &&
+- type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) {
+- texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
+- }
+- else if ( format == GL_YCBCR_MESA &&
+- (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+- type == GL_UNSIGNED_BYTE)) {
+- texImage->TexFormat = &_mesa_texformat_ycbcr;
+- }
+- else
+- return 0;
+- break;
+-
+- default:
+- return 0;
+- }
+-
+- /* Could deal with these packing issues, but currently don't:
+- */
+- if (packing->SkipPixels ||
+- packing->SkipRows ||
+- packing->SwapBytes ||
+- packing->LsbFirst) {
+- return 0;
+- }
+-
+- {
+- GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
+- format, type);
+-
+-
+- if ( R200_DEBUG & DEBUG_TEXTURE )
+- fprintf(stderr, "%s: srcRowStride %d/%x\n",
+- __FUNCTION__, srcRowStride, srcRowStride);
+-
+- /* Could check this later in upload, pitch restrictions could be
+- * relaxed, but would need to store the image pitch somewhere,
+- * as packing details might change before image is uploaded:
+- */
+- if (!r200IsGartMemory( rmesa, pixels, srcHeight * srcRowStride ) ||
+- (srcRowStride & 63))
+- return 0;
+-
+-
+- /* Have validated that _mesa_transfer_teximage would be a straight
+- * memcpy at this point. NOTE: future calls to TexSubImage will
+- * overwrite the client data. This is explicitly mentioned in the
+- * extension spec.
+- */
+- texImage->Data = (void *)pixels;
+- texImage->IsClientData = GL_TRUE;
+- texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
+-
+- return 1;
+- }
+-}
+-
+-
+-static void r200TexImage1D( GLcontext *ctx, GLenum target, GLint level,
+- GLint internalFormat,
+- GLint width, GLint border,
+- GLenum format, GLenum type, const GLvoid *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+- if ( t ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) r200AllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+- return;
+- }
+- }
+-
+- /* Note, this will call ChooseTextureFormat */
+- _mesa_store_teximage1d(ctx, target, level, internalFormat,
+- width, border, format, type, pixels,
+- &ctx->Unpack, texObj, texImage);
+-
+- t->dirty_images[0] |= (1 << level);
+-}
+-
+-
+-static void r200TexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
+- GLint xoffset,
+- GLsizei width,
+- GLenum format, GLenum type,
+- const GLvoid *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+- assert( t ); /* this _should_ be true */
+- if ( t ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) r200AllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+- return;
+- }
+- }
+-
+- _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+- format, type, pixels, packing, texObj,
+- texImage);
+-
+- t->dirty_images[0] |= (1 << level);
+-}
+-
+-
+-static void r200TexImage2D( GLcontext *ctx, GLenum target, GLint level,
+- GLint internalFormat,
+- GLint width, GLint height, GLint border,
+- GLenum format, GLenum type, const GLvoid *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- if ( t != NULL ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) r200AllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+- return;
+- }
+- }
+-
+- texImage->IsClientData = GL_FALSE;
+-
+- if (r200ValidateClientStorage( ctx, target,
+- internalFormat,
+- width, height,
+- format, type, pixels,
+- packing, texObj, texImage)) {
+- if (R200_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using client storage\n", __FUNCTION__);
+- }
+- else {
+- if (R200_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__);
+-
+- /* Normal path: copy (to cached memory) and eventually upload
+- * via another copy to GART memory and then a blit... Could
+- * eliminate one copy by going straight to (permanent) GART.
+- *
+- * Note, this will call r200ChooseTextureFormat.
+- */
+- _mesa_store_teximage2d(ctx, target, level, internalFormat,
+- width, height, border, format, type, pixels,
+- &ctx->Unpack, texObj, texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+- }
+-}
+-
+-
+-static void r200TexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+- GLint xoffset, GLint yoffset,
+- GLsizei width, GLsizei height,
+- GLenum format, GLenum type,
+- const GLvoid *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- assert( t ); /* this _should_ be true */
+- if ( t ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) r200AllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+- return;
+- }
+- }
+-
+- _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+- height, format, type, pixels, packing, texObj,
+- texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+-}
+-
+-
+-static void r200CompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+- GLint internalFormat,
+- GLint width, GLint height, GLint border,
+- GLsizei imageSize, const GLvoid *data,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
++static void r200SetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] )
+ {
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- if ( t != NULL ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) r200AllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
+- return;
+- }
+- }
+-
+- texImage->IsClientData = GL_FALSE;
+-/* can't call this, different parameters. Would never evaluate to true anyway currently
+- if (r200ValidateClientStorage( ctx, target,
+- internalFormat,
+- width, height,
+- format, type, pixels,
+- packing, texObj, texImage)) {
+- if (R200_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using client storage\n", __FUNCTION__);
+- }
+- else */{
+- if (R200_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__);
+-
+- /* Normal path: copy (to cached memory) and eventually upload
+- * via another copy to GART memory and then a blit... Could
+- * eliminate one copy by going straight to (permanent) GART.
+- *
+- * Note, this will call r200ChooseTextureFormat.
+- */
+- _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
+- height, border, imageSize, data, texObj, texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+- }
+-}
+-
+-
+-static void r200CompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+- GLint xoffset, GLint yoffset,
+- GLsizei width, GLsizei height,
+- GLenum format,
+- GLsizei imageSize, const GLvoid *data,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- assert( t ); /* this _should_ be true */
+- if ( t ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) r200AllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D");
+- return;
+- }
+- }
+-
+- _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+- height, format, imageSize, data, texObj, texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+-}
+-
+-
+-#if ENABLE_HW_3D_TEXTURE
+-static void r200TexImage3D( GLcontext *ctx, GLenum target, GLint level,
+- GLint internalFormat,
+- GLint width, GLint height, GLint depth,
+- GLint border,
+- GLenum format, GLenum type, const GLvoid *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+- if ( t ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) r200AllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
+- return;
+- }
+- }
+-
+- texImage->IsClientData = GL_FALSE;
+-
+-#if 0
+- if (r200ValidateClientStorage( ctx, target,
+- internalFormat,
+- width, height,
+- format, type, pixels,
+- packing, texObj, texImage)) {
+- if (R200_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using client storage\n", __FUNCTION__);
+- }
+- else
+-#endif
+- {
+- if (R200_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__);
+-
+- /* Normal path: copy (to cached memory) and eventually upload
+- * via another copy to GART memory and then a blit... Could
+- * eliminate one copy by going straight to (permanent) GART.
+- *
+- * Note, this will call r200ChooseTextureFormat.
+- */
+- _mesa_store_teximage3d(ctx, target, level, internalFormat,
+- width, height, depth, border,
+- format, type, pixels,
+- &ctx->Unpack, texObj, texImage);
+-
+- t->dirty_images[0] |= (1 << level);
+- }
++ t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+ }
+-#endif
+-
+
+-#if ENABLE_HW_3D_TEXTURE
+-static void
+-r200TexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
+- GLint xoffset, GLint yoffset, GLint zoffset,
+- GLsizei width, GLsizei height, GLsizei depth,
+- GLenum format, GLenum type,
+- const GLvoid *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+-/* fprintf(stderr, "%s\n", __FUNCTION__); */
+-
+- assert( t ); /* this _should_ be true */
+- if ( t ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) r200AllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
+- return;
+- }
+- texObj->DriverData = t;
+- }
+
+- _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
+- width, height, depth,
+- format, type, pixels, packing, texObj, texImage);
+-
+- t->dirty_images[0] |= (1 << level);
+-}
+-#endif
+
+
+
+@@ -978,7 +300,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target,
+ GLubyte c[4];
+ GLuint envColor;
+ UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
+- envColor = r200PackColor( 4, c[0], c[1], c[2], c[3] );
++ envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+ if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) {
+ R200_STATECHANGE( rmesa, tf );
+ rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor;
+@@ -997,7 +319,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target,
+ * NOTE: Add a small bias to the bias for conform mipsel.c test.
+ */
+ bias = *param + .01;
+- min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ?
++ min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
+ 0.0 : -16.0;
+ bias = CLAMP( bias, min, 16.0 );
+ b = (int)(bias * fixed_one) & R200_LOD_BIAS_MASK;
+@@ -1034,7 +356,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj,
+ GLenum pname, const GLfloat *params )
+ {
+- r200TexObjPtr t = (r200TexObjPtr) texObj->DriverData;
++ radeonTexObj* t = radeon_tex_obj(texObj);
+
+ if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+ fprintf( stderr, "%s( %s )\n", __FUNCTION__,
+@@ -1068,59 +390,46 @@ static void r200TexParameter( GLcontext *ctx, GLenum target,
+ * we just have to rely on loading the right subset of mipmap levels
+ * to simulate a clamped LOD.
+ */
+- driSwapOutTextureObject( (driTextureObject *) t );
++ if (t->mt) {
++ radeon_miptree_unreference(t->mt);
++ t->mt = 0;
++ t->validated = GL_FALSE;
++ }
+ break;
+
+ default:
+ return;
+ }
+-
+- /* Mark this texobj as dirty (one bit per tex unit)
+- */
+- t->dirty_state = TEX_ALL;
+ }
+
+
+-
+-static void r200BindTexture( GLcontext *ctx, GLenum target,
+- struct gl_texture_object *texObj )
+-{
+- if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+- fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj,
+- ctx->Texture.CurrentUnit );
+- }
+-
+- if ( (target == GL_TEXTURE_1D)
+- || (target == GL_TEXTURE_2D)
+-#if ENABLE_HW_3D_TEXTURE
+- || (target == GL_TEXTURE_3D)
+-#endif
+- || (target == GL_TEXTURE_CUBE_MAP)
+- || (target == GL_TEXTURE_RECTANGLE_NV) ) {
+- assert( texObj->DriverData != NULL );
+- }
+-}
+-
+-
+-static void r200DeleteTexture( GLcontext *ctx,
+- struct gl_texture_object *texObj )
++static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+- if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+- fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
+- _mesa_lookup_enum_by_nr( texObj->Target ) );
++ radeonTexObj* t = radeon_tex_obj(texObj);
++
++ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
++ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
++ (void *)texObj,
++ _mesa_lookup_enum_by_nr(texObj->Target));
++ }
++
++ if (rmesa) {
++ int i;
++ radeon_firevertices(&rmesa->radeon);
++ for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
++ if ( t == rmesa->state.texture.unit[i].texobj ) {
++ rmesa->state.texture.unit[i].texobj = NULL;
++ rmesa->hw.tex[i].dirty = GL_FALSE;
++ rmesa->hw.cube[i].dirty = GL_FALSE;
++ }
++ }
+ }
+-
+- if ( t != NULL ) {
+- if ( rmesa ) {
+- R200_FIREVERTICES( rmesa );
+- }
+-
+- driDestroyTextureObject( t );
++
++ if (t->mt) {
++ radeon_miptree_unreference(t->mt);
++ t->mt = 0;
+ }
+- /* Free mipmap images and the texture object itself */
+ _mesa_delete_texture_object(ctx, texObj);
+ }
+
+@@ -1150,46 +459,59 @@ static void r200TexGen( GLcontext *ctx,
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+- * Note: we could use containment here to 'derive' the driver-specific
+- * texture object from the core mesa gl_texture_object. Not done at this time.
+ * Fixup MaxAnisotropy according to user preference.
+ */
+-static struct gl_texture_object *
+-r200NewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
++static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx,
++ GLuint name,
++ GLenum target)
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- struct gl_texture_object *obj;
+- obj = _mesa_new_texture_object(ctx, name, target);
+- if (!obj)
+- return NULL;
+- obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
+- r200AllocTexObj( obj );
+- return obj;
++ radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
++
++
++ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
++ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
++ t, _mesa_lookup_enum_by_nr(target));
++ }
++
++ _mesa_initialize_texture_object(&t->base, name, target);
++ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
++
++ /* Initialize hardware state */
++ r200SetTexWrap( t, t->base.WrapS, t->base.WrapT, t->base.WrapR );
++ r200SetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
++ r200SetTexFilter(t, t->base.MinFilter, t->base.MagFilter);
++ r200SetTexBorderColor(t, t->base._BorderChan);
++
++ return &t->base;
+ }
+
+
++
+ void r200InitTextureFuncs( struct dd_function_table *functions )
+ {
+ /* Note: we only plug in the functions we implement in the driver
+ * since _mesa_init_driver_functions() was already called.
+ */
+- functions->ChooseTextureFormat = r200ChooseTextureFormat;
+- functions->TexImage1D = r200TexImage1D;
+- functions->TexImage2D = r200TexImage2D;
++ functions->ChooseTextureFormat = radeonChooseTextureFormat;
++ functions->TexImage1D = radeonTexImage1D;
++ functions->TexImage2D = radeonTexImage2D;
+ #if ENABLE_HW_3D_TEXTURE
+- functions->TexImage3D = r200TexImage3D;
++ functions->TexImage3D = radeonTexImage3D;
+ #else
+ functions->TexImage3D = _mesa_store_teximage3d;
+ #endif
+- functions->TexSubImage1D = r200TexSubImage1D;
+- functions->TexSubImage2D = r200TexSubImage2D;
++ functions->TexSubImage1D = radeonTexSubImage1D;
++ functions->TexSubImage2D = radeonTexSubImage2D;
+ #if ENABLE_HW_3D_TEXTURE
+- functions->TexSubImage3D = r200TexSubImage3D;
++ functions->TexSubImage3D = radeonTexSubImage3D;
+ #else
+ functions->TexSubImage3D = _mesa_store_texsubimage3d;
+ #endif
++ functions->GetTexImage = radeonGetTexImage;
++ functions->GetCompressedTexImage = radeonGetCompressedTexImage;
+ functions->NewTextureObject = r200NewTextureObject;
+- functions->BindTexture = r200BindTexture;
++ // functions->BindTexture = r200BindTexture;
+ functions->DeleteTexture = r200DeleteTexture;
+ functions->IsTextureResident = driIsTextureResident;
+
+@@ -1197,22 +519,16 @@ void r200InitTextureFuncs( struct dd_function_table *functions )
+ functions->TexParameter = r200TexParameter;
+ functions->TexGen = r200TexGen;
+
+- functions->CompressedTexImage2D = r200CompressedTexImage2D;
+- functions->CompressedTexSubImage2D = r200CompressedTexSubImage2D;
++ functions->CompressedTexImage2D = radeonCompressedTexImage2D;
++ functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
++
++ functions->GenerateMipmap = radeonGenerateMipmap;
++
++ functions->NewTextureImage = radeonNewTextureImage;
++ functions->FreeTexImageData = radeonFreeTexImageData;
++ functions->MapTexture = radeonMapTexture;
++ functions->UnmapTexture = radeonUnmapTexture;
+
+ driInitTextureFormats();
+
+-#if 000
+- /* moved or obsolete code */
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- driInitTextureObjects( ctx, & rmesa->swapped,
+- DRI_TEXMGR_DO_TEXTURE_1D
+- | DRI_TEXMGR_DO_TEXTURE_2D );
+-
+- /* Hack: r200NewTextureObject is not yet installed when the
+- * default textures are created. Therefore set MaxAnisotropy of the
+- * default 2D texture now. */
+- ctx->Shared->Default2D->MaxAnisotropy = driQueryOptionf (&rmesa->optionCache,
+- "def_max_anisotropy");
+-#endif
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h
+index 10ff8e8..55592ed 100644
+--- a/src/mesa/drivers/dri/r200/r200_tex.h
++++ b/src/mesa/drivers/dri/r200/r200_tex.h
+@@ -41,9 +41,9 @@ extern void r200SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+
+ extern void r200UpdateTextureState( GLcontext *ctx );
+
+-extern int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face );
++extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint face );
+
+-extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t );
++extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t );
+
+ extern void r200InitTextureFuncs( struct dd_function_table *functions );
+
+diff --git a/src/mesa/drivers/dri/r200/r200_texmem.c b/src/mesa/drivers/dri/r200/r200_texmem.c
+deleted file mode 100644
+index 3b81ac0..0000000
+--- a/src/mesa/drivers/dri/r200/r200_texmem.c
++++ /dev/null
+@@ -1,530 +0,0 @@
+-/**************************************************************************
+-
+-Copyright (C) Tungsten Graphics 2002. All Rights Reserved.
+-The Weather Channel, Inc. funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86
+-license. This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation on the rights to use, copy, modify, merge, publish,
+-distribute, sub license, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
+-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+-SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Kevin E. Martin <martin@valinux.com>
+- * Gareth Hughes <gareth@valinux.com>
+- *
+- */
+-
+-#include <errno.h>
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/context.h"
+-#include "main/colormac.h"
+-#include "main/macros.h"
+-#include "r200_context.h"
+-#include "r200_ioctl.h"
+-#include "r200_tex.h"
+-#include "radeon_reg.h"
+-
+-#include <unistd.h> /* for usleep() */
+-
+-
+-/**
+- * Destroy any device-dependent state associated with the texture. This may
+- * include NULLing out hardware state that points to the texture.
+- */
+-void
+-r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t )
+-{
+- if ( R200_DEBUG & DEBUG_TEXTURE ) {
+- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__,
+- (void *)t, (void *)t->base.tObj );
+- }
+-
+- if ( rmesa != NULL ) {
+- unsigned i;
+-
+-
+- for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
+- if ( t == rmesa->state.texture.unit[i].texobj ) {
+- rmesa->state.texture.unit[i].texobj = NULL;
+- rmesa->hw.tex[i].dirty = GL_FALSE;
+- rmesa->hw.cube[i].dirty = GL_FALSE;
+- }
+- }
+- }
+-}
+-
+-
+-/* ------------------------------------------------------------
+- * Texture image conversions
+- */
+-
+-
+-static void r200UploadGARTClientSubImage( r200ContextPtr rmesa,
+- r200TexObjPtr t,
+- struct gl_texture_image *texImage,
+- GLint hwlevel,
+- GLint x, GLint y,
+- GLint width, GLint height )
+-{
+- const struct gl_texture_format *texFormat = texImage->TexFormat;
+- GLuint srcPitch, dstPitch;
+- int blit_format;
+- int srcOffset;
+-
+- /*
+- * XXX it appears that we always upload the full image, not a subimage.
+- * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever
+- * changed, the src pitch will have to change.
+- */
+- switch ( texFormat->TexelBytes ) {
+- case 1:
+- blit_format = R200_CP_COLOR_FORMAT_CI8;
+- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+- break;
+- case 2:
+- blit_format = R200_CP_COLOR_FORMAT_RGB565;
+- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+- break;
+- case 4:
+- blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
+- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+- break;
+- default:
+- return;
+- }
+-
+- t->image[0][hwlevel].data = texImage->Data;
+- srcOffset = r200GartOffsetFromVirtual( rmesa, texImage->Data );
+-
+- assert( srcOffset != ~0 );
+-
+- /* Don't currently need to cope with small pitches?
+- */
+- width = texImage->Width;
+- height = texImage->Height;
+-
+- r200EmitWait( rmesa, RADEON_WAIT_3D );
+-
+- r200EmitBlit( rmesa, blit_format,
+- srcPitch,
+- srcOffset,
+- dstPitch,
+- t->bufAddr,
+- x,
+- y,
+- t->image[0][hwlevel].x + x,
+- t->image[0][hwlevel].y + y,
+- width,
+- height );
+-
+- r200EmitWait( rmesa, RADEON_WAIT_2D );
+-}
+-
+-static void r200UploadRectSubImage( r200ContextPtr rmesa,
+- r200TexObjPtr t,
+- struct gl_texture_image *texImage,
+- GLint x, GLint y,
+- GLint width, GLint height )
+-{
+- const struct gl_texture_format *texFormat = texImage->TexFormat;
+- int blit_format, dstPitch, done;
+-
+- switch ( texFormat->TexelBytes ) {
+- case 1:
+- blit_format = R200_CP_COLOR_FORMAT_CI8;
+- break;
+- case 2:
+- blit_format = R200_CP_COLOR_FORMAT_RGB565;
+- break;
+- case 4:
+- blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
+- break;
+- default:
+- return;
+- }
+-
+- t->image[0][0].data = texImage->Data;
+-
+- /* Currently don't need to cope with small pitches.
+- */
+- width = texImage->Width;
+- height = texImage->Height;
+- dstPitch = t->pp_txpitch + 32;
+-
+- if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
+- /* In this case, could also use GART texturing. This is
+- * currently disabled, but has been tested & works.
+- */
+- if ( !t->image_override )
+- t->pp_txoffset = r200GartOffsetFromVirtual( rmesa, texImage->Data );
+- t->pp_txpitch = texImage->RowStride * texFormat->TexelBytes - 32;
+-
+- if (R200_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr,
+- "Using GART texturing for rectangular client texture\n");
+-
+- /* Release FB memory allocated for this image:
+- */
+- /* FIXME This may not be correct as driSwapOutTextureObject sets
+- * FIXME dirty_images. It may be fine, though.
+- */
+- if ( t->base.memBlock ) {
+- driSwapOutTextureObject( (driTextureObject *) t );
+- }
+- }
+- else if (texImage->IsClientData) {
+- /* Data already in GART memory, with usable pitch.
+- */
+- GLuint srcPitch;
+- srcPitch = texImage->RowStride * texFormat->TexelBytes;
+- r200EmitBlit( rmesa,
+- blit_format,
+- srcPitch,
+- r200GartOffsetFromVirtual( rmesa, texImage->Data ),
+- dstPitch, t->bufAddr,
+- 0, 0,
+- 0, 0,
+- width, height );
+- }
+- else {
+- /* Data not in GART memory, or bad pitch.
+- */
+- for (done = 0; done < height ; ) {
+- struct r200_dma_region region;
+- int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
+- int src_pitch;
+- char *tex;
+-
+- src_pitch = texImage->RowStride * texFormat->TexelBytes;
+-
+- tex = (char *)texImage->Data + done * src_pitch;
+-
+- memset(&region, 0, sizeof(region));
+- r200AllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
+-
+- /* Copy texdata to dma:
+- */
+- if (0)
+- fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
+- __FUNCTION__, src_pitch, dstPitch);
+-
+- if (src_pitch == dstPitch) {
+- memcpy( region.address + region.start, tex, lines * src_pitch );
+- }
+- else {
+- char *buf = region.address + region.start;
+- int i;
+- for (i = 0 ; i < lines ; i++) {
+- memcpy( buf, tex, src_pitch );
+- buf += dstPitch;
+- tex += src_pitch;
+- }
+- }
+-
+- r200EmitWait( rmesa, RADEON_WAIT_3D );
+-
+- /* Blit to framebuffer
+- */
+- r200EmitBlit( rmesa,
+- blit_format,
+- dstPitch, GET_START( &region ),
+- dstPitch | (t->tile_bits >> 16),
+- t->bufAddr,
+- 0, 0,
+- 0, done,
+- width, lines );
+-
+- r200EmitWait( rmesa, RADEON_WAIT_2D );
+-
+- r200ReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
+- done += lines;
+- }
+- }
+-}
+-
+-
+-/**
+- * Upload the texture image associated with texture \a t at the specified
+- * level at the address relative to \a start.
+- */
+-static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t,
+- GLint hwlevel,
+- GLint x, GLint y, GLint width, GLint height,
+- GLuint face )
+-{
+- struct gl_texture_image *texImage = NULL;
+- GLuint offset;
+- GLint imageWidth, imageHeight;
+- GLint ret;
+- drm_radeon_texture_t tex;
+- drm_radeon_tex_image_t tmp;
+- const int level = hwlevel + t->base.firstLevel;
+-
+- if ( R200_DEBUG & DEBUG_TEXTURE ) {
+- fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
+- __FUNCTION__, (void *)t, (void *)t->base.tObj,
+- level, width, height, face );
+- }
+-
+- ASSERT(face < 6);
+-
+- /* Ensure we have a valid texture to upload */
+- if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
+- _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+- return;
+- }
+-
+- texImage = t->base.tObj->Image[face][level];
+-
+- if ( !texImage ) {
+- if ( R200_DEBUG & DEBUG_TEXTURE )
+- fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
+- return;
+- }
+- if ( !texImage->Data ) {
+- if ( R200_DEBUG & DEBUG_TEXTURE )
+- fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
+- return;
+- }
+-
+-
+- if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+- assert(level == 0);
+- assert(hwlevel == 0);
+- if ( R200_DEBUG & DEBUG_TEXTURE )
+- fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
+- r200UploadRectSubImage( rmesa, t, texImage, x, y, width, height );
+- return;
+- }
+- else if (texImage->IsClientData) {
+- if ( R200_DEBUG & DEBUG_TEXTURE )
+- fprintf( stderr, "%s: image data is in GART client storage\n",
+- __FUNCTION__);
+- r200UploadGARTClientSubImage( rmesa, t, texImage, hwlevel,
+- x, y, width, height );
+- return;
+- }
+- else if ( R200_DEBUG & DEBUG_TEXTURE )
+- fprintf( stderr, "%s: image data is in normal memory\n",
+- __FUNCTION__);
+-
+-
+- imageWidth = texImage->Width;
+- imageHeight = texImage->Height;
+-
+- offset = t->bufAddr + t->base.totalSize / 6 * face;
+-
+- if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+- GLint imageX = 0;
+- GLint imageY = 0;
+- GLint blitX = t->image[face][hwlevel].x;
+- GLint blitY = t->image[face][hwlevel].y;
+- GLint blitWidth = t->image[face][hwlevel].width;
+- GLint blitHeight = t->image[face][hwlevel].height;
+- fprintf( stderr, " upload image: %d,%d at %d,%d\n",
+- imageWidth, imageHeight, imageX, imageY );
+- fprintf( stderr, " upload blit: %d,%d at %d,%d\n",
+- blitWidth, blitHeight, blitX, blitY );
+- fprintf( stderr, " blit ofs: 0x%07x level: %d/%d\n",
+- (GLuint)offset, hwlevel, level );
+- }
+-
+- t->image[face][hwlevel].data = texImage->Data;
+-
+- /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
+- * NOTE: we're always use a 1KB-wide blit and I8 texture format.
+- * We used to use 1, 2 and 4-byte texels and used to use the texture
+- * width to dictate the blit width - but that won't work for compressed
+- * textures. (Brian)
+- * NOTE: can't do that with texture tiling. (sroland)
+- */
+- tex.offset = offset;
+- tex.image = &tmp;
+- /* copy (x,y,width,height,data) */
+- memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) );
+-
+- if (texImage->TexFormat->TexelBytes) {
+- /* use multi-byte upload scheme */
+- tex.height = imageHeight;
+- tex.width = imageWidth;
+- tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK;
+- if (tex.format == R200_TXFORMAT_ABGR8888) {
+- /* drm will refuse abgr8888 textures. */
+- tex.format = R200_TXFORMAT_ARGB8888;
+- }
+- tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
+- tex.offset += tmp.x & ~1023;
+- tmp.x = tmp.x % 1024;
+- if (t->tile_bits & R200_TXO_MICRO_TILE) {
+- /* need something like "tiled coordinates" ? */
+- tmp.y = tmp.x / (tex.pitch * 128) * 2;
+- tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
+- tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+- }
+- else {
+- tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+- }
+- if ((t->tile_bits & R200_TXO_MACRO_TILE) &&
+- (texImage->Width * texImage->TexFormat->TexelBytes >= 256) &&
+- ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) ||
+- (texImage->Height >= 16))) {
+- /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
+- OR if height is smaller than 8 automatically, but if micro tiling is active
+- the limit is height 16 instead ? */
+- tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+- }
+- }
+- else {
+- /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
+- needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
+- /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
+- so the kernel module reads the right amount of data. */
+- tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */
+- tex.pitch = (BLIT_WIDTH_BYTES / 64);
+- tex.height = (imageHeight + 3) / 4;
+- tex.width = (imageWidth + 3) / 4;
+- switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) {
+- case R200_TXFORMAT_DXT1:
+- tex.width *= 8;
+- break;
+- case R200_TXFORMAT_DXT23:
+- case R200_TXFORMAT_DXT45:
+- tex.width *= 16;
+- break;
+- default:
+- fprintf(stderr, "unknown compressed tex format in uploadSubImage\n");
+- }
+- }
+-
+- LOCK_HARDWARE( rmesa );
+- do {
+- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
+- &tex, sizeof(drm_radeon_texture_t) );
+- if (ret) {
+- if (R200_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n");
+- usleep(1);
+- }
+- } while ( ret == -EAGAIN );
+-
+- UNLOCK_HARDWARE( rmesa );
+-
+- if ( ret ) {
+- fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
+- fprintf( stderr, " offset=0x%08x\n",
+- offset );
+- fprintf( stderr, " image width=%d height=%d\n",
+- imageWidth, imageHeight );
+- fprintf( stderr, " blit width=%d height=%d data=%p\n",
+- t->image[face][hwlevel].width, t->image[face][hwlevel].height,
+- t->image[face][hwlevel].data );
+- exit( 1 );
+- }
+-}
+-
+-
+-/**
+- * Upload the texture images associated with texture \a t. This might
+- * require the allocation of texture memory.
+- *
+- * \param rmesa Context pointer
+- * \param t Texture to be uploaded
+- * \param face Cube map face to be uploaded. Zero for non-cube maps.
+- */
+-
+-int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face )
+-{
+- const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+- if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+- fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
+- (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize,
+- t->base.firstLevel, t->base.lastLevel );
+- }
+-
+- if ( !t || t->base.totalSize == 0 || t->image_override )
+- return 0;
+-
+- if (R200_DEBUG & DEBUG_SYNC) {
+- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
+- r200Finish( rmesa->glCtx );
+- }
+-
+- LOCK_HARDWARE( rmesa );
+-
+- if ( t->base.memBlock == NULL ) {
+- int heap;
+-
+- heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
+- (driTextureObject *) t );
+- if ( heap == -1 ) {
+- UNLOCK_HARDWARE( rmesa );
+- return -1;
+- }
+-
+- /* Set the base offset of the texture image */
+- t->bufAddr = rmesa->r200Screen->texOffset[heap]
+- + t->base.memBlock->ofs;
+- t->pp_txoffset = t->bufAddr;
+-
+- if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+- /* hope it's safe to add that here... */
+- t->pp_txoffset |= t->tile_bits;
+- }
+-
+- /* Mark this texobj as dirty on all units:
+- */
+- t->dirty_state = TEX_ALL;
+- }
+-
+- /* Let the world know we've used this memory recently.
+- */
+- driUpdateTextureLRU( (driTextureObject *) t );
+- UNLOCK_HARDWARE( rmesa );
+-
+- /* Upload any images that are new */
+- if (t->base.dirty_images[face]) {
+- int i;
+- for ( i = 0 ; i < numLevels ; i++ ) {
+- if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
+- uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
+- t->image[face][i].height, face );
+- }
+- }
+- t->base.dirty_images[face] = 0;
+- }
+-
+-
+- if (R200_DEBUG & DEBUG_SYNC) {
+- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
+- r200Finish( rmesa->glCtx );
+- }
+-
+- return 0;
+-}
+diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
+index 3f9a2f4..6432068 100644
+--- a/src/mesa/drivers/dri/r200/r200_texstate.c
++++ b/src/mesa/drivers/dri/r200/r200_texstate.c
+@@ -40,6 +40,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/texobj.h"
+ #include "main/enums.h"
+
++#include "radeon_common.h"
++#include "radeon_mipmap_tree.h"
+ #include "r200_context.h"
+ #include "r200_state.h"
+ #include "r200_ioctl.h"
+@@ -139,257 +141,6 @@ static const struct tx_table tx_table_le[] =
+ #undef _ALPHA
+ #undef _INVALID
+
+-/**
+- * This function computes the number of bytes of storage needed for
+- * the given texture object (all mipmap levels, all cube faces).
+- * The \c image[face][level].x/y/width/height parameters for upload/blitting
+- * are computed here. \c pp_txfilter, \c pp_txformat, etc. will be set here
+- * too.
+- *
+- * \param rmesa Context pointer
+- * \param tObj GL texture object whose images are to be posted to
+- * hardware state.
+- */
+-static void r200SetTexImages( r200ContextPtr rmesa,
+- struct gl_texture_object *tObj )
+-{
+- r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData;
+- const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+- GLint curOffset, blitWidth;
+- GLint i, texelBytes;
+- GLint numLevels;
+- GLint log2Width, log2Height, log2Depth;
+-
+- /* Set the hardware texture format
+- */
+- if ( !t->image_override ) {
+- if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
+- const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
+- tx_table_be;
+-
+- t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
+- R200_TXFORMAT_ALPHA_IN_MAP);
+- t->pp_txfilter &= ~R200_YUV_TO_RGB;
+-
+- t->pp_txformat |= table[ baseImage->TexFormat->MesaFormat ].format;
+- t->pp_txfilter |= table[ baseImage->TexFormat->MesaFormat ].filter;
+- }
+- else {
+- _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
+- return;
+- }
+- }
+-
+- texelBytes = baseImage->TexFormat->TexelBytes;
+-
+- /* Compute which mipmap levels we really want to send to the hardware.
+- */
+-
+- driCalculateTextureFirstLastLevel( (driTextureObject *) t );
+- log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+- log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+- log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
+-
+- numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+- assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
+-
+- /* Calculate mipmap offsets and dimensions for blitting (uploading)
+- * The idea is that we lay out the mipmap levels within a block of
+- * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
+- */
+- curOffset = 0;
+- blitWidth = BLIT_WIDTH_BYTES;
+- t->tile_bits = 0;
+-
+- /* figure out if this texture is suitable for tiling. */
+- if (texelBytes) {
+- if (rmesa->texmicrotile && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
+- /* texrect might be able to use micro tiling too in theory? */
+- (baseImage->Height > 1)) {
+- /* allow 32 (bytes) x 1 mip (which will use two times the space
+- the non-tiled version would use) max if base texture is large enough */
+- if ((numLevels == 1) ||
+- (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+- (baseImage->Width * texelBytes > 64)) ||
+- ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+- t->tile_bits |= R200_TXO_MICRO_TILE;
+- }
+- }
+- if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
+- /* we can set macro tiling even for small textures, they will be untiled anyway */
+- t->tile_bits |= R200_TXO_MACRO_TILE;
+- }
+- }
+-
+- for (i = 0; i < numLevels; i++) {
+- const struct gl_texture_image *texImage;
+- GLuint size;
+-
+- texImage = tObj->Image[0][i + t->base.firstLevel];
+- if ( !texImage )
+- break;
+-
+- /* find image size in bytes */
+- if (texImage->IsCompressed) {
+- /* need to calculate the size AFTER padding even though the texture is
+- submitted without padding.
+- Only handle pot textures currently - don't know if npot is even possible,
+- size calculation would certainly need (trivial) adjustments.
+- Align (and later pad) to 32byte, not sure what that 64byte blit width is
+- good for? */
+- if ((t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) == R200_TXFORMAT_DXT1) {
+- /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
+- if ((texImage->Width + 3) < 8) /* width one block */
+- size = texImage->CompressedSize * 4;
+- else if ((texImage->Width + 3) < 16)
+- size = texImage->CompressedSize * 2;
+- else size = texImage->CompressedSize;
+- }
+- else /* DXT3/5, 16 bytes per block */
+- if ((texImage->Width + 3) < 8)
+- size = texImage->CompressedSize * 2;
+- else size = texImage->CompressedSize;
+- }
+- else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+- size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+- }
+- else if (t->tile_bits & R200_TXO_MICRO_TILE) {
+- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+- though the actual offset may be different (if texture is less than
+- 32 bytes width) to the untiled case */
+- int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+- size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+- }
+- else {
+- int w = (texImage->Width * texelBytes + 31) & ~31;
+- size = w * texImage->Height * texImage->Depth;
+- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+- }
+- assert(size > 0);
+-
+- /* Align to 32-byte offset. It is faster to do this unconditionally
+- * (no branch penalty).
+- */
+-
+- curOffset = (curOffset + 0x1f) & ~0x1f;
+-
+- if (texelBytes) {
+- t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+- t->image[0][i].y = 0;
+- t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+- t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+- }
+- else {
+- t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+- t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+- t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
+- t->image[0][i].height = size / t->image[0][i].width;
+- }
+-
+-#if 0
+- /* for debugging only and only applicable to non-rectangle targets */
+- assert(size % t->image[0][i].width == 0);
+- assert(t->image[0][i].x == 0
+- || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
+-#endif
+-
+- if (0)
+- fprintf(stderr,
+- "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+- i, texImage->Width, texImage->Height,
+- t->image[0][i].x, t->image[0][i].y,
+- t->image[0][i].width, t->image[0][i].height, size, curOffset);
+-
+- curOffset += size;
+-
+- }
+-
+- /* Align the total size of texture memory block.
+- */
+- t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+-
+- /* Setup remaining cube face blits, if needed */
+- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+- const GLuint faceSize = t->base.totalSize;
+- GLuint face;
+- /* reuse face 0 x/y/width/height - just update the offset when uploading */
+- for (face = 1; face < 6; face++) {
+- for (i = 0; i < numLevels; i++) {
+- t->image[face][i].x = t->image[0][i].x;
+- t->image[face][i].y = t->image[0][i].y;
+- t->image[face][i].width = t->image[0][i].width;
+- t->image[face][i].height = t->image[0][i].height;
+- }
+- }
+- t->base.totalSize = 6 * faceSize; /* total texmem needed */
+- }
+-
+-
+- /* Hardware state:
+- */
+- t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
+- t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT;
+-
+- t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
+- R200_TXFORMAT_HEIGHT_MASK |
+- R200_TXFORMAT_CUBIC_MAP_ENABLE |
+- R200_TXFORMAT_F5_WIDTH_MASK |
+- R200_TXFORMAT_F5_HEIGHT_MASK);
+- t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
+- (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
+-
+- t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
+- if (tObj->Target == GL_TEXTURE_3D) {
+- t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
+- t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
+- }
+- else if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+- ASSERT(log2Width == log2Height);
+- t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
+- (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
+-/* don't think we need this bit, if it exists at all - fglrx does not set it */
+- (R200_TXFORMAT_CUBIC_MAP_ENABLE));
+- t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+- t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
+- (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
+- (log2Width << R200_FACE_WIDTH_2_SHIFT) |
+- (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
+- (log2Width << R200_FACE_WIDTH_3_SHIFT) |
+- (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
+- (log2Width << R200_FACE_WIDTH_4_SHIFT) |
+- (log2Height << R200_FACE_HEIGHT_4_SHIFT));
+- }
+- else {
+- /* If we don't in fact send enough texture coordinates, q will be 1,
+- * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
+- */
+- t->pp_txformat_x |= R200_TEXCOORD_PROJ;
+- }
+-
+- t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
+- ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
+-
+- /* Only need to round to nearest 32 for textures, but the blitter
+- * requires 64-byte aligned pitches, and we may/may not need the
+- * blitter. NPOT only!
+- */
+- if ( !t->image_override ) {
+- if (baseImage->IsCompressed)
+- t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
+- else
+- t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
+- t->pp_txpitch -= 32;
+- }
+-
+- t->dirty_state = TEX_ALL;
+-
+- /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */
+-}
+-
+-
+-
+ /* ================================================================
+ * Texture combine functions
+ */
+@@ -981,20 +732,19 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ {
+ r200ContextPtr rmesa = pDRICtx->driverPrivate;
+ struct gl_texture_object *tObj =
+- _mesa_lookup_texture(rmesa->glCtx, texname);
+- r200TexObjPtr t;
++ _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
++ radeonTexObjPtr t = radeon_tex_obj(tObj);
+
+ if (!tObj)
+ return;
+
+- t = (r200TexObjPtr) tObj->DriverData;
+-
+ t->image_override = GL_TRUE;
+
+ if (!offset)
+ return;
+
+- t->pp_txoffset = offset;
++ t->bo = NULL;
++ t->override_offset = offset;
+ t->pp_txpitch = pitch - 32;
+
+ switch (depth) {
+@@ -1207,12 +957,41 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
+ R200_VOLUME_FILTER_MASK)
+
+
++static void disable_tex_obj_state( r200ContextPtr rmesa,
++ int unit )
++{
++
++ R200_STATECHANGE( rmesa, vtx );
++ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
++
++ if (rmesa->radeon.TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
++ TCL_FALLBACK( rmesa->radeon.glCtx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
++ }
++
++ /* Actually want to keep all units less than max active texture
++ * enabled, right? Fix this for >2 texunits.
++ */
++
++ {
++ GLuint tmp = rmesa->TexGenEnabled;
++
++ rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
++ rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
++ rmesa->TexGenNeedNormals[unit] = GL_FALSE;
++ rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
++
++ if (tmp != rmesa->TexGenEnabled) {
++ rmesa->recheck_texgen[unit] = GL_TRUE;
++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
++ }
++ }
++}
+ static void import_tex_obj_state( r200ContextPtr rmesa,
+ int unit,
+- r200TexObjPtr texobj )
++ radeonTexObjPtr texobj )
+ {
+ /* do not use RADEON_DB_STATE to avoid stale texture caches */
+- int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
++ GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+
+ R200_STATECHANGE( rmesa, tex[unit] );
+
+@@ -1225,36 +1004,21 @@ static void import_tex_obj_state( r200ContextPtr rmesa,
+ cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
+ cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
+ cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+- if (rmesa->r200Screen->drmSupportsFragShader) {
+- cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset;
+- }
+- else {
+- cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset;
+- }
+
+- if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
+- int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+- GLuint bytesPerFace = texobj->base.totalSize / 6;
+- ASSERT(texobj->base.totalSize % 6 == 0);
++ if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
++ GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+
+ R200_STATECHANGE( rmesa, cube[unit] );
+ cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+- if (rmesa->r200Screen->drmSupportsFragShader) {
++ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+ /* that value is submitted twice. could change cube atom
+ to not include that command when new drm is used */
+ cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+ }
+- cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace;
+- cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace;
+- cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace;
+- cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace;
+- cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace;
+ }
+
+- texobj->dirty_state &= ~(1<<unit);
+ }
+
+-
+ static void set_texgen_matrix( r200ContextPtr rmesa,
+ GLuint unit,
+ const GLfloat *s_plane,
+@@ -1377,7 +1141,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
+ } else {
+ tgcm |= R200_TEXGEN_COMP_T << (unit * 4);
+ }
+-
+ if (texUnit->TexGenEnabled & R_BIT) {
+ if (texUnit->GenModeR != mode)
+ mixed_fallback = GL_TRUE;
+@@ -1513,52 +1276,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
+ return GL_TRUE;
+ }
+
+-
+-static void disable_tex( GLcontext *ctx, int unit )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+- if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit)) {
+- /* Texture unit disabled */
+- if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+- /* The old texture is no longer bound to this texture unit.
+- * Mark it as such.
+- */
+-
+- rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
+- rmesa->state.texture.unit[unit].texobj = NULL;
+- }
+-
+- R200_STATECHANGE( rmesa, ctx );
+- rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit);
+-
+- R200_STATECHANGE( rmesa, vtx );
+- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+-
+- if (rmesa->TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
+- TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+- }
+-
+- /* Actually want to keep all units less than max active texture
+- * enabled, right? Fix this for >2 texunits.
+- */
+-
+- {
+- GLuint tmp = rmesa->TexGenEnabled;
+-
+- rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
+- rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
+- rmesa->TexGenNeedNormals[unit] = GL_FALSE;
+- rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
+-
+- if (tmp != rmesa->TexGenEnabled) {
+- rmesa->recheck_texgen[unit] = GL_TRUE;
+- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+- }
+- }
+- }
+-}
+-
+ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+@@ -1575,237 +1292,165 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
+ }
+ }
+
+-static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+-
+- /* Need to load the 2d images associated with this unit.
+- */
+- if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
+- t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
+- t->base.dirty_images[0] = ~0;
+- }
+-
+- ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
+-
+- if ( t->base.dirty_images[0] ) {
+- R200_FIREVERTICES( rmesa );
+- r200SetTexImages( rmesa, tObj );
+- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
+- if ( !t->base.memBlock && !t->image_override )
+- return GL_FALSE;
+- }
+-
+- set_re_cntl_d3d( ctx, unit, GL_FALSE );
+-
+- return GL_TRUE;
+-}
+-
+-#if ENABLE_HW_3D_TEXTURE
+-static GLboolean enable_tex_3d( GLcontext *ctx, int unit )
++/**
++ * Compute the cached hardware register values for the given texture object.
++ *
++ * \param rmesa Context pointer
++ * \param t the r300 texture object
++ */
++static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
+ {
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+-
+- /* Need to load the 3d images associated with this unit.
+- */
+- if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
+- t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
+- t->base.dirty_images[0] = ~0;
++ const struct gl_texture_image *firstImage =
++ t->base.Image[0][t->mt->firstLevel];
++ GLint log2Width, log2Height, log2Depth, texelBytes;
++
++ log2Width = firstImage->WidthLog2;
++ log2Height = firstImage->HeightLog2;
++ log2Depth = firstImage->DepthLog2;
++ texelBytes = firstImage->TexFormat->TexelBytes;
++
++
++ if (!t->image_override) {
++ if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
++ const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
++ tx_table_be;
++
++ t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
++ R200_TXFORMAT_ALPHA_IN_MAP);
++ t->pp_txfilter &= ~R200_YUV_TO_RGB;
++
++ t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
++ t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
++ } else {
++ _mesa_problem(NULL, "unexpected texture format in %s",
++ __FUNCTION__);
++ return;
++ }
+ }
++
++ t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
++ t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << R200_MAX_MIP_LEVEL_SHIFT;
++
++ t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
++ R200_TXFORMAT_HEIGHT_MASK |
++ R200_TXFORMAT_CUBIC_MAP_ENABLE |
++ R200_TXFORMAT_F5_WIDTH_MASK |
++ R200_TXFORMAT_F5_HEIGHT_MASK);
++ t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
++ (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
++
++ t->tile_bits = 0;
++
++ t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
++ if (t->base.Target == GL_TEXTURE_3D) {
++ t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
++ t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
+
+- ASSERT(tObj->Target == GL_TEXTURE_3D);
+-
+- /* R100 & R200 do not support mipmaps for 3D textures.
+- */
+- if ( (tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR) ) {
+- return GL_FALSE;
+ }
+-
+- if ( t->base.dirty_images[0] ) {
+- R200_FIREVERTICES( rmesa );
+- r200SetTexImages( rmesa, tObj );
+- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
+- if ( !t->base.memBlock )
+- return GL_FALSE;
++ else if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
++ ASSERT(log2Width == log2Height);
++ t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
++ (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
++ /* don't think we need this bit, if it exists at all - fglrx does not set it */
++ (R200_TXFORMAT_CUBIC_MAP_ENABLE));
++ t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
++ t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
++ (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
++ (log2Width << R200_FACE_WIDTH_2_SHIFT) |
++ (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
++ (log2Width << R200_FACE_WIDTH_3_SHIFT) |
++ (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
++ (log2Width << R200_FACE_WIDTH_4_SHIFT) |
++ (log2Height << R200_FACE_HEIGHT_4_SHIFT));
+ }
+-
+- set_re_cntl_d3d( ctx, unit, GL_TRUE );
+-
+- return GL_TRUE;
+-}
+-#endif
+-
+-static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+- GLuint face;
+-
+- /* Need to load the 2d images associated with this unit.
+- */
+- if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
+- t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
+- for (face = 0; face < 6; face++)
+- t->base.dirty_images[face] = ~0;
++ else {
++ /* If we don't in fact send enough texture coordinates, q will be 1,
++ * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
++ */
++ t->pp_txformat_x |= R200_TEXCOORD_PROJ;
+ }
+
+- ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+-
+- if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
+- t->base.dirty_images[2] || t->base.dirty_images[3] ||
+- t->base.dirty_images[4] || t->base.dirty_images[5] ) {
+- /* flush */
+- R200_FIREVERTICES( rmesa );
+- /* layout memory space, once for all faces */
+- r200SetTexImages( rmesa, tObj );
+- }
++ t->pp_txsize = (((firstImage->Width - 1) << R200_PP_TX_WIDTHMASK_SHIFT)
++ | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT));
+
+- /* upload (per face) */
+- for (face = 0; face < 6; face++) {
+- if (t->base.dirty_images[face]) {
+- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, face );
+- }
+- }
+-
+- if ( !t->base.memBlock ) {
+- /* texmem alloc failed, use s/w fallback */
+- return GL_FALSE;
++ if ( !t->image_override ) {
++ if (firstImage->IsCompressed)
++ t->pp_txpitch = (firstImage->Width + 63) & ~(63);
++ else
++ t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
++ t->pp_txpitch -= 32;
+ }
+
+- set_re_cntl_d3d( ctx, unit, GL_TRUE );
+-
+- return GL_TRUE;
+-}
+-
+-static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
+-{
+- r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+-
+- if (!(t->pp_txformat & R200_TXFORMAT_NON_POWER2)) {
++ if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+ t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
+- t->base.dirty_images[0] = ~0;
+ }
+
+- ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+-
+- if ( t->base.dirty_images[0] ) {
+- R200_FIREVERTICES( rmesa );
+- r200SetTexImages( rmesa, tObj );
+- r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
+- if ( !t->base.memBlock &&
+- !t->image_override &&
+- !rmesa->prefer_gart_client_texturing )
+- return GL_FALSE;
+- }
+-
+- set_re_cntl_d3d( ctx, unit, GL_FALSE );
+-
+- return GL_TRUE;
+ }
+
+-
+-static GLboolean update_tex_common( GLcontext *ctx, int unit )
++static GLboolean r200_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+-
+- /* Fallback if there's a texture border */
+- if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 )
+- return GL_FALSE;
+-
+- /* Update state if this is a different texture object to last
+- * time.
+- */
+- if ( rmesa->state.texture.unit[unit].texobj != t ) {
+- if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+- /* The old texture is no longer bound to this texture unit.
+- * Mark it as such.
+- */
+-
+- rmesa->state.texture.unit[unit].texobj->base.bound &=
+- ~(1UL << unit);
+- }
++ radeonTexObj *t = radeon_tex_obj(texObj);
+
+- rmesa->state.texture.unit[unit].texobj = t;
+- t->base.bound |= (1UL << unit);
+- t->dirty_state |= 1<<unit;
+- driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
+- }
+-
+-
+- /* Newly enabled?
+- */
+- if ( 1|| !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit))) {
+- R200_STATECHANGE( rmesa, ctx );
+- rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
++ if (!radeon_validate_texture_miptree(ctx, texObj))
++ return GL_FALSE;
+
+- R200_STATECHANGE( rmesa, vtx );
+- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+- rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
++ r200_validate_texgen(ctx, unit);
++ /* Configure the hardware registers (more precisely, the cached version
++ * of the hardware registers). */
++ setup_hardware_state(rmesa, t);
++
++ if (texObj->Target == GL_TEXTURE_RECTANGLE_NV ||
++ texObj->Target == GL_TEXTURE_2D ||
++ texObj->Target == GL_TEXTURE_1D)
++ set_re_cntl_d3d( ctx, unit, GL_FALSE );
++ else
++ set_re_cntl_d3d( ctx, unit, GL_TRUE );
++ R200_STATECHANGE( rmesa, ctx );
++ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
++
++ R200_STATECHANGE( rmesa, vtx );
++ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
++ rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
+
+- rmesa->recheck_texgen[unit] = GL_TRUE;
+- }
+-
+- if (t->dirty_state & (1<<unit)) {
+- import_tex_obj_state( rmesa, unit, t );
+- }
++ rmesa->recheck_texgen[unit] = GL_TRUE;
++ import_tex_obj_state( rmesa, unit, t );
+
+ if (rmesa->recheck_texgen[unit]) {
+ GLboolean fallback = !r200_validate_texgen( ctx, unit );
+ TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+ rmesa->recheck_texgen[unit] = 0;
+- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+ }
+
+- FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback );
+- return !t->border_fallback;
+-}
++ t->validated = GL_TRUE;
+
++ FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
+
++ return !t->border_fallback;
++}
+
+-static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit )
++static GLboolean r200UpdateTextureUnit(GLcontext *ctx, int unit)
+ {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;
+
+- if ( unitneeded & (TEXTURE_RECT_BIT) ) {
+- return (enable_tex_rect( ctx, unit ) &&
+- update_tex_common( ctx, unit ));
+- }
+- else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
+- return (enable_tex_2d( ctx, unit ) &&
+- update_tex_common( ctx, unit ));
+- }
+-#if ENABLE_HW_3D_TEXTURE
+- else if ( unitneeded & (TEXTURE_3D_BIT) ) {
+- return (enable_tex_3d( ctx, unit ) &&
+- update_tex_common( ctx, unit ));
+- }
+-#endif
+- else if ( unitneeded & (TEXTURE_CUBE_BIT) ) {
+- return (enable_tex_cube( ctx, unit ) &&
+- update_tex_common( ctx, unit ));
+- }
+- else if ( unitneeded ) {
+- return GL_FALSE;
+- }
+- else {
+- disable_tex( ctx, unit );
+- return GL_TRUE;
++ if (!unitneeded) {
++ /* disable the unit */
++ disable_tex_obj_state(rmesa, unit);
++ return GL_TRUE;
+ }
++
++ if (!r200_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
++ _mesa_warning(ctx,
++ "failed to validate texture for unit %d.\n",
++ unit);
++ rmesa->state.texture.unit[unit].texobj = NULL;
++ return GL_FALSE;
++ }
++
++ rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
++ return GL_TRUE;
+ }
+
+
+@@ -1846,11 +1491,11 @@ void r200UpdateTextureState( GLcontext *ctx )
+
+ FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
+
+- if (rmesa->TclFallback)
++ if (rmesa->radeon.TclFallback)
+ r200ChooseVertexState( ctx );
+
+
+- if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
++ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+
+ /*
+ * T0 hang workaround -------------
+@@ -1863,7 +1508,7 @@ void r200UpdateTextureState( GLcontext *ctx )
+ R200_STATECHANGE(rmesa, tex[1]);
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
+ if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
+- rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
++ rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+ rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
+ }
+ else if (!ctx->ATIFragmentShader._Enabled) {
+diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
+index 562992f..888f91d 100644
+--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
++++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
+@@ -1110,9 +1110,9 @@ void r200SetupVertexProg( GLcontext *ctx ) {
+ }
+ /* could optimize setting up vertex progs away for non-tcl hw */
+ fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
+- rmesa->r200Screen->drmSupportsVertexProgram);
++ rmesa->radeon.radeonScreen->drmSupportsVertexProgram);
+ TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
+- if (rmesa->TclFallback) return;
++ if (rmesa->radeon.TclFallback) return;
+
+ R200_STATECHANGE( rmesa, vap );
+ /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
+diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
+index 6ca9342..497b1ec 100644
+--- a/src/mesa/drivers/dri/r300/Makefile
++++ b/src/mesa/drivers/dri/r300/Makefile
+@@ -3,6 +3,8 @@
+ TOP = ../../../../..
+ include $(TOP)/configs/current
+
++CFLAGS += $(RADEON_CFLAGS)
++
+ LIBNAME = r300_dri.so
+
+ MINIGLX_SOURCES = server/radeon_dri.c
+@@ -20,20 +22,24 @@ COMMON_SOURCES = \
+ ../common/xmlconfig.c \
+ ../common/dri_util.c
+
++RADEON_COMMON_SOURCES = \
++ radeon_texture.c \
++ radeon_common_context.c \
++ radeon_common.c \
++ radeon_dma.c \
++ radeon_lock.c \
++ radeon_bo_legacy.c \
++ radeon_cs_legacy.c \
++ radeon_mipmap_tree.c \
++ radeon_span.c
++
+ DRIVER_SOURCES = \
+ radeon_screen.c \
+- radeon_context.c \
+- radeon_ioctl.c \
+- radeon_lock.c \
+- radeon_span.c \
+- radeon_state.c \
+- r300_mem.c \
+ r300_context.c \
+ r300_ioctl.c \
+ r300_cmdbuf.c \
+ r300_state.c \
+ r300_render.c \
+- r300_texmem.c \
+ r300_tex.c \
+ r300_texstate.c \
+ radeon_program.c \
+@@ -49,12 +55,15 @@ DRIVER_SOURCES = \
+ r300_shader.c \
+ r300_emit.c \
+ r300_swtcl.c \
++ $(RADEON_COMMON_SOURCES) \
+ $(EGL_SOURCES)
+
+ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
+
+ DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
+- -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300
++ -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 \
++# -DRADEON_BO_TRACK \
++ -Wall
+
+ SYMLINKS = \
+ server/radeon_dri.c \
+@@ -68,7 +77,28 @@ COMMON_SYMLINKS = \
+ radeon_chipset.h \
+ radeon_screen.c \
+ radeon_screen.h \
+- radeon_span.h
++ radeon_span.h \
++ radeon_span.c \
++ radeon_bo_legacy.c \
++ radeon_cs_legacy.c \
++ radeon_bo_legacy.h \
++ radeon_cs_legacy.h \
++ radeon_bocs_wrapper.h \
++ radeon_lock.c \
++ radeon_lock.h \
++ radeon_common.c \
++ radeon_common.h \
++ radeon_common_context.c \
++ radeon_common_context.h \
++ radeon_cmdbuf.h \
++ radeon_dma.c \
++ radeon_dma.h \
++ radeon_mipmap_tree.c \
++ radeon_mipmap_tree.h \
++ radeon_texture.c \
++ radeon_texture.h
++
++DRI_LIB_DEPS += $(RADEON_LDFLAGS)
+
+ ##### TARGETS #####
+
+diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+index c9e1dfe..cfc9785 100644
+--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
++++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+@@ -44,235 +44,288 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "drm.h"
+ #include "radeon_drm.h"
+
+-#include "radeon_ioctl.h"
+ #include "r300_context.h"
+ #include "r300_ioctl.h"
+ #include "radeon_reg.h"
+ #include "r300_reg.h"
+ #include "r300_cmdbuf.h"
+ #include "r300_emit.h"
++#include "radeon_bocs_wrapper.h"
++#include "radeon_mipmap_tree.h"
+ #include "r300_state.h"
++#include "radeon_reg.h"
+
+-// Set this to 1 for extremely verbose debugging of command buffers
+-#define DEBUG_CMDBUF 0
++#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200
++# define RADEON_ONE_REG_WR (1 << 15)
+
+-/**
+- * Send the current command buffer via ioctl to the hardware.
++/** # of dwords reserved for additional instructions that may need to be written
++ * during flushing.
+ */
+-int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
++#define SPACE_FOR_FLUSHING 4
++
++static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt)
+ {
+- int ret;
+- int i;
+- drm_radeon_cmd_buffer_t cmd;
+- int start;
+-
+- if (r300->radeon.lost_context) {
+- start = 0;
+- r300->radeon.lost_context = GL_FALSE;
+- } else
+- start = r300->cmdbuf.count_reemit;
+-
+- if (RADEON_DEBUG & DEBUG_IOCTL) {
+- fprintf(stderr, "%s from %s - %i cliprects\n",
+- __FUNCTION__, caller, r300->radeon.numClipRects);
+-
+- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
+- for (i = start; i < r300->cmdbuf.count_used; ++i)
+- fprintf(stderr, "%d: %08x\n", i,
+- r300->cmdbuf.cmd_buf[i]);
+- }
++ if (r300->radeon.radeonScreen->kernel_mm) {
++ return ((((*pkt) >> 16) & 0x3FFF) + 1);
++ } else {
++ drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt;
++ return t->packet0.count;
++ }
++ return 0;
++}
+
+- cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start);
+- cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
++#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
++#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
+
+- if (r300->radeon.state.scissor.enabled) {
+- cmd.nbox = r300->radeon.state.scissor.numClipRects;
+- cmd.boxes =
+- (drm_clip_rect_t *) r300->radeon.state.scissor.pClipRects;
+- } else {
+- cmd.nbox = r300->radeon.numClipRects;
+- cmd.boxes = (drm_clip_rect_t *) r300->radeon.pClipRects;
++void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom)
++{
++ r300ContextPtr r300 = R300_CONTEXT(ctx);
++ BATCH_LOCALS(&r300->radeon);
++ drm_r300_cmd_header_t cmd;
++ uint32_t addr, ndw, i;
++
++ if (!r300->radeon.radeonScreen->kernel_mm) {
++ uint32_t dwords;
++ dwords = (*atom->check) (ctx, atom);
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_BATCH_TABLE(atom->cmd, dwords);
++ END_BATCH();
++ return;
+ }
+-
+- ret = drmCommandWrite(r300->radeon.dri.fd,
+- DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
+-
+- if (RADEON_DEBUG & DEBUG_SYNC) {
+- fprintf(stderr, "Syncing in %s (from %s)\n\n",
+- __FUNCTION__, caller);
+- radeonWaitForIdleLocked(&r300->radeon);
++
++ cmd.u = atom->cmd[0];
++ addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
++ ndw = cmd.vpu.count * 4;
++ if (ndw) {
++
++ if (r300->vap_flush_needed) {
++ BEGIN_BATCH_NO_AUTOSTATE(15 + ndw);
++
++ /* flush processing vertices */
++ OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0);
++ OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
++ OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
++ OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff);
++ OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
++ r300->vap_flush_needed = GL_FALSE;
++ } else {
++ BEGIN_BATCH_NO_AUTOSTATE(5 + ndw);
++ }
++ OUT_BATCH_REGVAL(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
++ OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR);
++ for (i = 0; i < ndw; i++) {
++ OUT_BATCH(atom->cmd[i+1]);
++ }
++ OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
++ END_BATCH();
+ }
+-
+- r300->dma.nr_released_bufs = 0;
+- r300->cmdbuf.count_used = 0;
+- r300->cmdbuf.count_reemit = 0;
+-
+- return ret;
+ }
+
+-int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
++void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom)
+ {
+- int ret;
++ r300ContextPtr r300 = R300_CONTEXT(ctx);
++ BATCH_LOCALS(&r300->radeon);
++ drm_r300_cmd_header_t cmd;
++ uint32_t addr, ndw, i, sz;
++ int type, clamp, stride;
++
++ if (!r300->radeon.radeonScreen->kernel_mm) {
++ uint32_t dwords;
++ dwords = (*atom->check) (ctx, atom);
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_BATCH_TABLE(atom->cmd, dwords);
++ END_BATCH();
++ return;
++ }
+
+- LOCK_HARDWARE(&r300->radeon);
++ cmd.u = atom->cmd[0];
++ sz = cmd.r500fp.count;
++ addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo;
++ type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
++ clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
+
+- ret = r300FlushCmdBufLocked(r300, caller);
++ addr |= (type << 16);
++ addr |= (clamp << 17);
+
+- UNLOCK_HARDWARE(&r300->radeon);
++ stride = type ? 4 : 6;
+
+- if (ret) {
+- fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret);
+- _mesa_exit(ret);
+- }
++ ndw = sz * stride;
++ if (ndw) {
+
+- return ret;
++ BEGIN_BATCH_NO_AUTOSTATE(3 + ndw);
++ OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0));
++ OUT_BATCH(addr);
++ OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR);
++ for (i = 0; i < ndw; i++) {
++ OUT_BATCH(atom->cmd[i+1]);
++ }
++ END_BATCH();
++ }
+ }
+
+-static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state)
++static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
+ {
+- int i;
+- int dwords = (*state->check) (r300, state);
+-
+- fprintf(stderr, " emit %s %d/%d\n", state->name, dwords,
+- state->cmd_size);
++ r300ContextPtr r300 = R300_CONTEXT(ctx);
++ BATCH_LOCALS(&r300->radeon);
++ int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
++ int notexture = 0;
++
++ if (numtmus) {
++ int i;
++
++ for(i = 0; i < numtmus; ++i) {
++ radeonTexObj *t = r300->hw.textures[i];
++
++ if (!t)
++ notexture = 1;
++ }
+
+- if (RADEON_DEBUG & DEBUG_VERBOSE) {
+- for (i = 0; i < dwords; i++) {
+- fprintf(stderr, " %s[%d]: %08x\n",
+- state->name, i, state->cmd[i]);
++ if (r300->radeon.radeonScreen->kernel_mm && notexture) {
++ return;
+ }
++ BEGIN_BATCH_NO_AUTOSTATE(4 * numtmus);
++ for(i = 0; i < numtmus; ++i) {
++ radeonTexObj *t = r300->hw.textures[i];
++ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
++ if (t && !t->image_override) {
++ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
++ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
++ } else if (!t) {
++ OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
++ } else { /* override cases */
++ if (t->bo) {
++ OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
++ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
++ } else if (!r300->radeon.radeonScreen->kernel_mm) {
++ OUT_BATCH(t->override_offset);
++ }
++ else
++ OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
++ }
++ }
++ END_BATCH();
+ }
+ }
+
+-/**
+- * Emit all atoms with a dirty field equal to dirty.
+- *
+- * The caller must have ensured that there is enough space in the command
+- * buffer.
+- */
+-static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
++static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+ {
+- struct r300_state_atom *atom;
+- uint32_t *dest;
+- int dwords;
+-
+- dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
+-
+- /* Emit WAIT */
+- *dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+- dest++;
+- r300->cmdbuf.count_used++;
+-
+- /* Emit cache flush */
+- *dest = cmdpacket0(R300_TX_INVALTAGS, 1);
+- dest++;
+- r300->cmdbuf.count_used++;
+-
+- *dest = R300_TX_FLUSH;
+- dest++;
+- r300->cmdbuf.count_used++;
+-
+- /* Emit END3D */
+- *dest = cmdpacify();
+- dest++;
+- r300->cmdbuf.count_used++;
+-
+- /* Emit actual atoms */
+-
+- foreach(atom, &r300->hw.atomlist) {
+- if ((atom->dirty || r300->hw.all_dirty) == dirty) {
+- dwords = (*atom->check) (r300, atom);
+- if (dwords) {
+- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
+- r300PrintStateAtom(r300, atom);
+- }
+- memcpy(dest, atom->cmd, dwords * 4);
+- dest += dwords;
+- r300->cmdbuf.count_used += dwords;
+- atom->dirty = GL_FALSE;
+- } else {
+- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
+- fprintf(stderr, " skip state %s\n",
+- atom->name);
+- }
+- }
+- }
++ r300ContextPtr r300 = R300_CONTEXT(ctx);
++ BATCH_LOCALS(&r300->radeon);
++ struct radeon_renderbuffer *rrb;
++ uint32_t cbpitch;
++
++ rrb = radeon_get_colorbuffer(&r300->radeon);
++ if (!rrb || !rrb->bo) {
++ fprintf(stderr, "no rrb\n");
++ return;
+ }
++
++ cbpitch = (rrb->pitch / rrb->cpp);
++ if (rrb->cpp == 4)
++ cbpitch |= R300_COLOR_FORMAT_ARGB8888;
++ else
++ cbpitch |= R300_COLOR_FORMAT_RGB565;
++
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
++ cbpitch |= R300_COLOR_TILE_ENABLE;
++
++ BEGIN_BATCH_NO_AUTOSTATE(6);
++ OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++ OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
++ OUT_BATCH(cbpitch);
++ END_BATCH();
+ }
+
+-/**
+- * Copy dirty hardware state atoms into the command buffer.
+- *
+- * We also copy out clean state if we're at the start of a buffer. That makes
+- * it easy to recover from lost contexts.
+- */
+-void r300EmitState(r300ContextPtr r300)
++static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+ {
+- if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS))
+- fprintf(stderr, "%s\n", __FUNCTION__);
++ r300ContextPtr r300 = R300_CONTEXT(ctx);
++ BATCH_LOCALS(&r300->radeon);
++ struct radeon_renderbuffer *rrb;
++ uint32_t zbpitch;
+
+- if (r300->cmdbuf.count_used && !r300->hw.is_dirty
+- && !r300->hw.all_dirty)
++ rrb = radeon_get_depthbuffer(&r300->radeon);
++ if (!rrb)
+ return;
+
+- /* To avoid going across the entire set of states multiple times, just check
+- * for enough space for the case of emitting all state, and inline the
+- * r300AllocCmdBuf code here without all the checks.
+- */
+- r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__);
+-
+- if (!r300->cmdbuf.count_used) {
+- if (RADEON_DEBUG & DEBUG_STATE)
+- fprintf(stderr, "Begin reemit state\n");
+-
+- r300EmitAtoms(r300, GL_FALSE);
+- r300->cmdbuf.count_reemit = r300->cmdbuf.count_used;
++ zbpitch = (rrb->pitch / rrb->cpp);
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
++ zbpitch |= R300_DEPTHMACROTILE_ENABLE;
+ }
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
++ zbpitch |= R300_DEPTHMICROTILE_TILED;
++ }
++
++ BEGIN_BATCH_NO_AUTOSTATE(6);
++ OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++ OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, zbpitch);
++ END_BATCH();
++}
+
+- if (RADEON_DEBUG & DEBUG_STATE)
+- fprintf(stderr, "Begin dirty state\n");
+-
+- r300EmitAtoms(r300, GL_TRUE);
+-
+- assert(r300->cmdbuf.count_used < r300->cmdbuf.size);
++static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom)
++{
++ r300ContextPtr r300 = R300_CONTEXT(ctx);
++ BATCH_LOCALS(&r300->radeon);
++ struct radeon_renderbuffer *rrb;
++ uint32_t zbpitch;
++ uint32_t format;
++
++ rrb = radeon_get_depthbuffer(&r300->radeon);
++ if (!rrb)
++ format = 0;
++ else {
++ if (rrb->cpp == 2)
++ format = R300_DEPTHFORMAT_16BIT_INT_Z;
++ else if (rrb->cpp == 4)
++ format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
++ }
+
+- r300->hw.is_dirty = GL_FALSE;
+- r300->hw.all_dirty = GL_FALSE;
++ OUT_BATCH(atom->cmd[0]);
++ atom->cmd[1] &= ~(3 << 0);
++ atom->cmd[1] |= format;
++ OUT_BATCH(atom->cmd[1]);
++ OUT_BATCH(atom->cmd[2]);
++ OUT_BATCH(atom->cmd[3]);
++ OUT_BATCH(atom->cmd[4]);
+ }
+
+-#define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count)
+-#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
+-#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
+-
+-static int check_always(r300ContextPtr r300, struct r300_state_atom *atom)
++static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
+ {
+ return atom->cmd_size;
+ }
+
+-static int check_variable(r300ContextPtr r300, struct r300_state_atom *atom)
++static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom)
+ {
++ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int cnt;
+- cnt = packet0_count(atom->cmd);
++ if (atom->cmd[0] == CP_PACKET2) {
++ return 0;
++ }
++ cnt = packet0_count(r300, atom->cmd);
+ return cnt ? cnt + 1 : 0;
+ }
+
+-static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom)
++int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
+ {
+ int cnt;
++
+ cnt = vpu_count(atom->cmd);
+ return cnt ? (cnt * 4) + 1 : 0;
+ }
+
+-static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom)
++int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom)
+ {
+ int cnt;
++
+ cnt = r500fp_count(atom->cmd);
+ return cnt ? (cnt * 6) + 1 : 0;
+ }
+
+-static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
++int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom)
+ {
+ int cnt;
++
+ cnt = r500fp_count(atom->cmd);
+ return cnt ? (cnt * 4) + 1 : 0;
+ }
+@@ -285,8 +338,8 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
+ r300->hw.ATOM.idx = (IDX); \
+ r300->hw.ATOM.check = check_##CHK; \
+ r300->hw.ATOM.dirty = GL_FALSE; \
+- r300->hw.max_state_size += (SZ); \
+- insert_at_tail(&r300->hw.atomlist, &r300->hw.ATOM); \
++ r300->radeon.hw.max_state_size += (SZ); \
++ insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM); \
+ } while (0)
+ /**
+ * Allocate memory for the command buffer and initialize the state atom
+@@ -294,7 +347,7 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
+ */
+ void r300InitCmdBuf(r300ContextPtr r300)
+ {
+- int size, mtu;
++ int mtu;
+ int has_tcl = 1;
+ int is_r500 = 0;
+ int i;
+@@ -305,7 +358,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ is_r500 = 1;
+
+- r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */
++ r300->radeon.hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */
+
+ mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
+ if (RADEON_DEBUG & DEBUG_TEXTURE) {
+@@ -313,97 +366,97 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ }
+
+ /* Setup the atom linked list */
+- make_empty_list(&r300->hw.atomlist);
+- r300->hw.atomlist.name = "atom-list";
++ make_empty_list(&r300->radeon.hw.atomlist);
++ r300->radeon.hw.atomlist.name = "atom-list";
+
+ /* Initialize state atoms */
+ ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0);
+- r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6);
++ r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6);
+ ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0);
+- r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1);
++ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
+ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0;
+- r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1);
++ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1);
+ if (is_r500) {
+ ALLOC_STATE(vap_index_offset, always, 2, 0);
+- r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1);
++ r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1);
+ r300->hw.vap_index_offset.cmd[1] = 0;
+ }
+ ALLOC_STATE(vte, always, 3, 0);
+- r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2);
++ r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2);
+ ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0);
+- r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(R300_VAP_VF_MAX_VTX_INDX, 2);
++ r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2);
+ ALLOC_STATE(vap_cntl_status, always, 2, 0);
+- r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1);
++ r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1);
+ ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0);
+ r300->hw.vir[0].cmd[R300_VIR_CMD_0] =
+- cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1);
++ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1);
+ ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1);
+ r300->hw.vir[1].cmd[R300_VIR_CMD_0] =
+- cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
++ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
+ ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0);
+- r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2);
++ r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2);
+ ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0);
+- r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
++ r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
+
+ if (has_tcl) {
+ ALLOC_STATE(vap_clip_cntl, always, 2, 0);
+- r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1);
++ r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1);
+ ALLOC_STATE(vap_clip, always, 5, 0);
+- r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4);
++ r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4);
+ ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0);
+- r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1);
++ r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1);
+ }
+
+ ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0);
+ r300->hw.vof.cmd[R300_VOF_CMD_0] =
+- cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2);
++ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2);
+
+ if (has_tcl) {
+ ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0);
+ r300->hw.pvs.cmd[R300_PVS_CMD_0] =
+- cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3);
++ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3);
+ }
+
+ ALLOC_STATE(gb_enable, always, 2, 0);
+- r300->hw.gb_enable.cmd[0] = cmdpacket0(R300_GB_ENABLE, 1);
++ r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1);
+ ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
+- r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5);
++ r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 5);
+ ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
+- r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1);
++ r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1);
+ ALLOC_STATE(ga_point_s0, always, 5, 0);
+- r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4);
++ r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4);
+ ALLOC_STATE(ga_triangle_stipple, always, 2, 0);
+- r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1);
++ r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1);
+ ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0);
+- r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1);
++ r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1);
+ ALLOC_STATE(ga_point_minmax, always, 4, 0);
+- r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3);
++ r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3);
+ ALLOC_STATE(lcntl, always, 2, 0);
+- r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1);
++ r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1);
+ ALLOC_STATE(ga_line_stipple, always, 4, 0);
+- r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3);
++ r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3);
+ ALLOC_STATE(shade, always, 5, 0);
+- r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4);
++ r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 4);
+ ALLOC_STATE(polygon_mode, always, 4, 0);
+- r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3);
++ r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3);
+ ALLOC_STATE(fogp, always, 3, 0);
+- r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2);
++ r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2);
+ ALLOC_STATE(zbias_cntl, always, 2, 0);
+- r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1);
++ r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1);
+ ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0);
+ r300->hw.zbs.cmd[R300_ZBS_CMD_0] =
+- cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
++ cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ ALLOC_STATE(occlusion_cntl, always, 2, 0);
+- r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1);
++ r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1);
+ ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0);
+- r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1);
++ r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1);
+ ALLOC_STATE(su_depth_scale, always, 3, 0);
+- r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2);
++ r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2);
+ ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0);
+- r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2);
++ r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2);
+ if (is_r500) {
+ ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0);
+- r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16);
++ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16);
+ for (i = 0; i < 8; i++) {
+ r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] =
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+@@ -412,133 +465,149 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
+ }
+ ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
+- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1);
++ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1);
+ } else {
+ ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0);
+- r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8);
++ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8);
+ ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
+- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1);
++ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1);
+ }
+ ALLOC_STATE(sc_hyperz, always, 3, 0);
+- r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2);
++ r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2);
+ ALLOC_STATE(sc_screendoor, always, 2, 0);
+- r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1);
++ r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
+ ALLOC_STATE(us_out_fmt, always, 6, 0);
+- r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5);
++ r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5);
+
+ if (is_r500) {
+ ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0);
+- r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2);
++ r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2);
+ r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO;
+- r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3);
+- r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1);
++ r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3);
++ r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1);
+ r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */
+
+ ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0);
+- r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0);
++ r300->hw.r500fp.cmd[R300_FPI_CMD_0] =
++ cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0);
++ r300->hw.r500fp.emit = emit_r500fp;
+ ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0);
+- r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0);
++ r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] =
++ cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0);
++ r300->hw.r500fp_const.emit = emit_r500fp;
+ } else {
+ ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
+- r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3);
+- r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4);
++ r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3);
++ r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4);
++
+ ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
+- r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0);
++ r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0);
+
+ ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
+- r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1);
++ r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1);
+ ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
+- r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1);
++ r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1);
+ ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
+- r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1);
++ r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1);
+ ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
+- r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1);
++ r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1);
+ ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
+- r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0);
++ r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0);
+ }
+ ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0);
+- r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1);
++ r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1);
+ ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0);
+- r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3);
++ r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3);
+ ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0);
+- r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2);
++ r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2);
+ ALLOC_STATE(fg_depth_src, always, 2, 0);
+- r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1);
++ r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1);
+ ALLOC_STATE(rb3d_cctl, always, 2, 0);
+- r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1);
++ r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1);
+ ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0);
+- r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2);
++ r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2);
+ ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0);
+- r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(RB3D_COLOR_CHANNEL_MASK, 1);
++ r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1);
+ if (is_r500) {
+ ALLOC_STATE(blend_color, always, 3, 0);
+- r300->hw.blend_color.cmd[0] = cmdpacket0(R500_RB3D_CONSTANT_COLOR_AR, 2);
++ r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2);
+ } else {
+ ALLOC_STATE(blend_color, always, 2, 0);
+- r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 1);
++ r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1);
+ }
+ ALLOC_STATE(rop, always, 2, 0);
+- r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1);
++ r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1);
+ ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
+- r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
+- r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
++ r300->hw.cb.emit = &emit_cb_offset;
+ ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
+- r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9);
++ r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
+ ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
+- r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1);
++ r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
+ ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
+- r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
++ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
+ ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
+ r300->hw.zs.cmd[R300_ZS_CMD_0] =
+- cmdpacket0(R300_ZB_CNTL, 3);
++ cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
++
+ ALLOC_STATE(zstencil_format, always, 5, 0);
+ r300->hw.zstencil_format.cmd[0] =
+- cmdpacket0(R300_ZB_FORMAT, 4);
++ cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4);
++ r300->hw.zstencil_format.emit = emit_zstencil_format;
++
+ ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
+- r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2);
++ r300->hw.zb.emit = emit_zb_offset;
+ ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
+- r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1);
++ r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1);
+ ALLOC_STATE(unk4F30, always, 3, 0);
+- r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2);
++ r300->hw.unk4F30.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4F30, 2);
+ ALLOC_STATE(zb_hiz_offset, always, 2, 0);
+- r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1);
++ r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1);
+ ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
+- r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1);
++ r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1);
+
+ /* VPU only on TCL */
+ if (has_tcl) {
+ int i;
+ ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
+- r300->hw.vpi.cmd[R300_VPI_CMD_0] =
+- cmdvpu(R300_PVS_CODE_START, 0);
++ r300->hw.vpi.cmd[0] =
++ cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
++ r300->hw.vpi.emit = emit_vpu;
+
+ if (is_r500) {
+ ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+- r300->hw.vpp.cmd[R300_VPP_CMD_0] =
+- cmdvpu(R500_PVS_CONST_START, 0);
++ r300->hw.vpp.cmd[0] =
++ cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
++ r300->hw.vpp.emit = emit_vpu;
+
+ ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+- r300->hw.vps.cmd[R300_VPS_CMD_0] =
+- cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1);
++ r300->hw.vps.cmd[0] =
++ cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1);
++ r300->hw.vps.emit = emit_vpu;
+
+ for (i = 0; i < 6; i++) {
+- ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+- r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
+- cmdvpu(R500_PVS_UCP_START + i, 1);
++ ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
++ r300->hw.vpucp[i].cmd[0] =
++ cmdvpu(r300->radeon.radeonScreen,
++ R500_PVS_UCP_START + i, 1);
++ r300->hw.vpucp[i].emit = emit_vpu;
+ }
+ } else {
+ ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+- r300->hw.vpp.cmd[R300_VPP_CMD_0] =
+- cmdvpu(R300_PVS_CONST_START, 0);
++ r300->hw.vpp.cmd[0] =
++ cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
++ r300->hw.vpp.emit = emit_vpu;
+
+ ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+- r300->hw.vps.cmd[R300_VPS_CMD_0] =
+- cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1);
++ r300->hw.vps.cmd[0] =
++ cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1);
++ r300->hw.vps.emit = emit_vpu;
+
+ for (i = 0; i < 6; i++) {
+ ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+- r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
+- cmdvpu(R300_PVS_UCP_START + i, 1);
++ r300->hw.vpucp[i].cmd[0] =
++ cmdvpu(r300->radeon.radeonScreen,
++ R300_PVS_UCP_START + i, 1);
++ r300->hw.vpucp[i].emit = emit_vpu;
+ }
+ }
+ }
+@@ -546,61 +615,39 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ /* Textures */
+ ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
+ r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_FILTER0_0, 0);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0);
+
+ ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
+ r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_FILTER1_0, 0);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0);
+
+ ALLOC_STATE(tex.size, variable, mtu + 1, 0);
+- r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0);
++ r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0);
+
+ ALLOC_STATE(tex.format, variable, mtu + 1, 0);
+ r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_FORMAT_0, 0);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0);
+
+ ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
+- r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0);
++ r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0);
+
+- ALLOC_STATE(tex.offset, variable, mtu + 1, 0);
++ ALLOC_STATE(tex.offset, variable, 1, 0);
+ r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_OFFSET_0, 0);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0);
++ r300->hw.tex.offset.emit = &emit_tex_offsets;
+
+ ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
+ r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_CHROMA_KEY_0, 0);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0);
+
+ ALLOC_STATE(tex.border_color, variable, mtu + 1, 0);
+ r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_BORDER_COLOR_0, 0);
+-
+- r300->hw.is_dirty = GL_TRUE;
+- r300->hw.all_dirty = GL_TRUE;
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0);
+
+- /* Initialize command buffer */
+- size =
+- 256 * driQueryOptioni(&r300->radeon.optionCache,
+- "command_buffer_size");
+- if (size < 2 * r300->hw.max_state_size) {
+- size = 2 * r300->hw.max_state_size + 65535;
+- }
+- if (size > 64 * 256)
+- size = 64 * 256;
+-
+- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) {
+- fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n",
+- sizeof(drm_r300_cmd_header_t));
+- fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n",
+- sizeof(drm_radeon_cmd_buffer_t));
+- fprintf(stderr,
+- "Allocating %d bytes command buffer (max state is %d bytes)\n",
+- size * 4, r300->hw.max_state_size * 4);
+- }
++ r300->radeon.hw.is_dirty = GL_TRUE;
++ r300->radeon.hw.all_dirty = GL_TRUE;
+
+- r300->cmdbuf.size = size;
+- r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4);
+- r300->cmdbuf.count_used = 0;
+- r300->cmdbuf.count_reemit = 0;
++ rcommonInitCmdBuf(&r300->radeon);
+ }
+
+ /**
+@@ -608,68 +655,10 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ */
+ void r300DestroyCmdBuf(r300ContextPtr r300)
+ {
+- struct r300_state_atom *atom;
++ struct radeon_state_atom *atom;
+
+- FREE(r300->cmdbuf.cmd_buf);
+-
+- foreach(atom, &r300->hw.atomlist) {
++ foreach(atom, &r300->radeon.hw.atomlist) {
+ FREE(atom->cmd);
+ }
+-}
+-
+-void r300EmitBlit(r300ContextPtr rmesa,
+- GLuint color_fmt,
+- GLuint src_pitch,
+- GLuint src_offset,
+- GLuint dst_pitch,
+- GLuint dst_offset,
+- GLint srcx, GLint srcy,
+- GLint dstx, GLint dsty, GLuint w, GLuint h)
+-{
+- drm_r300_cmd_header_t *cmd;
+-
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr,
+- "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+- __FUNCTION__, src_pitch, src_offset, srcx, srcy,
+- dst_pitch, dst_offset, dstx, dsty, w, h);
+-
+- assert((src_pitch & 63) == 0);
+- assert((dst_pitch & 63) == 0);
+- assert((src_offset & 1023) == 0);
+- assert((dst_offset & 1023) == 0);
+- assert(w < (1 << 16));
+- assert(h < (1 << 16));
+-
+- cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__);
+-
+- cmd[0].header.cmd_type = R300_CMD_PACKET3;
+- cmd[0].header.pad0 = R300_CMD_PACKET3_RAW;
+- cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16);
+- cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+- RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+- RADEON_GMC_BRUSH_NONE |
+- (color_fmt << 8) |
+- RADEON_GMC_SRC_DATATYPE_COLOR |
+- RADEON_ROP3_S |
+- RADEON_DP_SRC_SOURCE_MEMORY |
+- RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
+-
+- cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10);
+- cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
+- cmd[5].u = (srcx << 16) | srcy;
+- cmd[6].u = (dstx << 16) | dsty; /* dst */
+- cmd[7].u = (w << 16) | h;
+-}
+-
+-void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
+-{
+- drm_r300_cmd_header_t *cmd;
+-
+- assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D)));
+
+- cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+- cmd[0].u = 0;
+- cmd[0].wait.cmd_type = R300_CMD_WAIT;
+- cmd[0].wait.flags = flags;
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
+index a8eaa58..b7798eb 100644
+--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h
++++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
+@@ -38,79 +38,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #include "r300_context.h"
+
+-extern int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller);
+-extern int r300FlushCmdBuf(r300ContextPtr r300, const char *caller);
+-
+-extern void r300EmitState(r300ContextPtr r300);
+-
+ extern void r300InitCmdBuf(r300ContextPtr r300);
+ extern void r300DestroyCmdBuf(r300ContextPtr r300);
+
+-/**
+- * Make sure that enough space is available in the command buffer
+- * by flushing if necessary.
+- *
+- * \param dwords The number of dwords we need to be free on the command buffer
+- */
+-static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300,
+- int dwords, const char *caller)
+-{
+- assert(dwords < r300->cmdbuf.size);
+-
+- if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size)
+- r300FlushCmdBuf(r300, caller);
+-}
+-
+-/**
+- * Allocate the given number of dwords in the command buffer and return
+- * a pointer to the allocated area.
+- * When necessary, these functions cause a flush. r300AllocCmdBuf() also
+- * causes state reemission after a flush. This is necessary to ensure
+- * correct hardware state after an unlock.
+- */
+-static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
+- int dwords, const char *caller)
+-{
+- uint32_t *ptr;
+-
+- r300EnsureCmdBufSpace(r300, dwords, caller);
+-
+- ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
+- r300->cmdbuf.count_used += dwords;
+- return ptr;
+-}
+-
+-static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300,
+- int dwords, const char *caller)
+-{
+- uint32_t *ptr;
+-
+- r300EnsureCmdBufSpace(r300, dwords, caller);
+-
+- if (!r300->cmdbuf.count_used) {
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr,
+- "Reemit state after flush (from %s)\n", caller);
+- r300EmitState(r300);
+- }
+-
+- ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
+- r300->cmdbuf.count_used += dwords;
+- return ptr;
+-}
+
+-extern void r300EmitBlit(r300ContextPtr rmesa,
+- GLuint color_fmt,
+- GLuint src_pitch,
+- GLuint src_offset,
+- GLuint dst_pitch,
+- GLuint dst_offset,
+- GLint srcx, GLint srcy,
+- GLint dstx, GLint dsty, GLuint w, GLuint h);
++void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom);
++int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom);
+
+-extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags);
+-extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start);
+-extern void r300EmitVertexShader(r300ContextPtr rmesa);
+-extern void r300EmitPixelShader(r300ContextPtr rmesa);
++void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom);
++int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom);
++int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom);
+
+ #endif /* __R300_CMDBUF_H__ */
+diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
+index 3743627..5d497ef 100644
+--- a/src/mesa/drivers/dri/r300/r300_context.c
++++ b/src/mesa/drivers/dri/r300/r300_context.c
+@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/extensions.h"
+ #include "main/state.h"
+ #include "main/bufferobj.h"
++#include "main/texobj.h"
+
+ #include "swrast/swrast.h"
+ #include "swrast_setup/swrast_setup.h"
+@@ -55,19 +56,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #include "drivers/common/driverfuncs.h"
+
+-#include "radeon_ioctl.h"
+-#include "radeon_span.h"
+ #include "r300_context.h"
++#include "radeon_context.h"
++#include "radeon_span.h"
+ #include "r300_cmdbuf.h"
+ #include "r300_state.h"
+ #include "r300_ioctl.h"
+ #include "r300_tex.h"
+ #include "r300_emit.h"
+ #include "r300_swtcl.h"
++#include "radeon_bocs_wrapper.h"
+
+-#ifdef USER_BUFFERS
+-#include "r300_mem.h"
+-#endif
+
+ #include "vblank.h"
+ #include "utils.h"
+@@ -77,19 +76,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ int future_hw_tcl_on = 1;
+ int hw_tcl_on = 1;
+
+-#define need_GL_EXT_stencil_two_side
+-#define need_GL_ARB_multisample
++#define need_GL_VERSION_2_0
+ #define need_GL_ARB_point_parameters
+-#define need_GL_ARB_texture_compression
+-#define need_GL_ARB_vertex_buffer_object
+ #define need_GL_ARB_vertex_program
+-#define need_GL_EXT_blend_minmax
+-//#define need_GL_EXT_fog_coord
+-#define need_GL_EXT_multi_draw_arrays
+-#define need_GL_EXT_secondary_color
+ #define need_GL_EXT_blend_equation_separate
+ #define need_GL_EXT_blend_func_separate
++#define need_GL_EXT_blend_minmax
++//#define need_GL_EXT_fog_coord
+ #define need_GL_EXT_gpu_program_parameters
++#define need_GL_EXT_secondary_color
++#define need_GL_EXT_stencil_two_side
++#define need_GL_ATI_separate_stencil
+ #define need_GL_NV_vertex_program
+ #include "extension_helper.h"
+
+@@ -97,27 +94,23 @@ const struct dri_extension card_extensions[] = {
+ /* *INDENT-OFF* */
+ {"GL_ARB_depth_texture", NULL},
+ {"GL_ARB_fragment_program", NULL},
+- {"GL_ARB_multisample", GL_ARB_multisample_functions},
+ {"GL_ARB_multitexture", NULL},
+ {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
+ {"GL_ARB_shadow", NULL},
+ {"GL_ARB_shadow_ambient", NULL},
+ {"GL_ARB_texture_border_clamp", NULL},
+- {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions},
+ {"GL_ARB_texture_cube_map", NULL},
+ {"GL_ARB_texture_env_add", NULL},
+ {"GL_ARB_texture_env_combine", NULL},
+ {"GL_ARB_texture_env_crossbar", NULL},
+ {"GL_ARB_texture_env_dot3", NULL},
+ {"GL_ARB_texture_mirrored_repeat", NULL},
+- {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions},
+ {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
+ {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
+ {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
+ {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
+ {"GL_EXT_blend_subtract", NULL},
+ // {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+- {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions},
+ {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
+ {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
+ {"GL_EXT_shadow_funcs", NULL},
+@@ -130,6 +123,7 @@ const struct dri_extension card_extensions[] = {
+ {"GL_EXT_texture_lod_bias", NULL},
+ {"GL_EXT_texture_mirror_clamp", NULL},
+ {"GL_EXT_texture_rectangle", NULL},
++ {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions},
+ {"GL_ATI_texture_env_combine3", NULL},
+ {"GL_ATI_texture_mirror_once", NULL},
+ {"GL_MESA_pack_invert", NULL},
+@@ -142,6 +136,16 @@ const struct dri_extension card_extensions[] = {
+ /* *INDENT-ON* */
+ };
+
++
++/**
++ * The GL 2.0 functions are needed to make display lists work with
++ * functions added by GL_ATI_separate_stencil.
++ */
++const struct dri_extension gl_20_extension[] = {
++ {"GL_VERSION_2_0", GL_VERSION_2_0_functions },
++};
++
++
+ extern struct tnl_pipeline_stage _r300_render_stage;
+ extern const struct tnl_pipeline_stage _r300_tcl_stage;
+
+@@ -178,6 +182,82 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = {
+ 0,
+ };
+
++static void r300RunPipeline(GLcontext * ctx)
++{
++ _mesa_lock_context_textures(ctx);
++
++ if (ctx->NewState)
++ _mesa_update_state_locked(ctx);
++
++ _tnl_run_pipeline(ctx);
++ _mesa_unlock_context_textures(ctx);
++}
++
++static void r300_get_lock(radeonContextPtr rmesa)
++{
++ drm_radeon_sarea_t *sarea = rmesa->sarea;
++
++ if (sarea->ctx_owner != rmesa->dri.hwContext) {
++ sarea->ctx_owner = rmesa->dri.hwContext;
++ if (!rmesa->radeonScreen->kernel_mm)
++ radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
++ }
++}
++
++static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
++{
++ /* please flush pipe do all pending work */
++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++ R300_SC_SCREENDOOR, 1));
++ radeon_cs_write_dword(cs, 0x0);
++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++ R300_SC_SCREENDOOR, 1));
++ radeon_cs_write_dword(cs, 0x00FFFFFF);
++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++ R300_SC_HYPERZ, 1));
++ radeon_cs_write_dword(cs, 0x0);
++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++ R300_US_CONFIG, 1));
++ radeon_cs_write_dword(cs, 0x0);
++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++ R300_ZB_CNTL, 1));
++ radeon_cs_write_dword(cs, 0x0);
++ radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D));
++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++ R300_RB3D_DSTCACHE_CTLSTAT, 1));
++ radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
++ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++ R300_ZB_ZCACHE_CTLSTAT, 1));
++ radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
++ radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen,
++ R300_WAIT_3D | R300_WAIT_3D_CLEAN));
++}
++
++static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon)
++{
++ r300ContextPtr r300 = (r300ContextPtr)radeon;
++ BATCH_LOCALS(radeon);
++
++ r300->vap_flush_needed = GL_TRUE;
++
++ cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
++ BEGIN_BATCH_NO_AUTOSTATE(2);
++ OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH);
++ END_BATCH();
++ end_3d(radeon);
++}
++
++static void r300_init_vtbl(radeonContextPtr radeon)
++{
++ radeon->vtbl.get_lock = r300_get_lock;
++ radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset;
++ radeon->vtbl.update_draw_buffer = r300UpdateDrawBuffer;
++ radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header;
++ radeon->vtbl.swtcl_flush = r300_swtcl_flush;
++ radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms;
++}
++
++
+ /* Create the device specific rendering context.
+ */
+ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+@@ -189,7 +269,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ struct dd_function_table functions;
+ r300ContextPtr r300;
+ GLcontext *ctx;
+- int tcl_mode, i;
++ int tcl_mode;
+
+ assert(glVisual);
+ assert(driContextPriv);
+@@ -203,13 +283,14 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
+ hw_tcl_on = future_hw_tcl_on = 0;
+
++ r300_init_vtbl(&r300->radeon);
+ /* Parse configuration files.
+ * Do this here so that initialMaxAnisotropy is set before we create
+ * the default textures.
+ */
+ driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache,
+ screen->driScreen->myNum, "r300");
+- r300->initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache,
++ r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache,
+ "def_max_anisotropy");
+
+ /* Init default driver functions then plug in our R300-specific functions
+@@ -221,10 +302,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ r300InitTextureFuncs(&functions);
+ r300InitShaderFuncs(&functions);
+
+-#ifdef USER_BUFFERS
+- r300_mem_init(r300);
+-#endif
+-
+ if (!radeonInitContext(&r300->radeon, &functions,
+ glVisual, driContextPriv,
+ sharedContextPrivate)) {
+@@ -233,37 +310,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ }
+
+ /* Init r300 context data */
+- r300->dma.buf0_address =
+- r300->radeon.radeonScreen->buffers->list[0].address;
+-
+- (void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps));
+- make_empty_list(&r300->swapped);
+-
+- r300->nr_heaps = 1 /* screen->numTexHeaps */ ;
+- assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS);
+- for (i = 0; i < r300->nr_heaps; i++) {
+- /* *INDENT-OFF* */
+- r300->texture_heaps[i] = driCreateTextureHeap(i, r300,
+- screen->
+- texSize[i], 12,
+- RADEON_NR_TEX_REGIONS,
+- (drmTextureRegionPtr)
+- r300->radeon.sarea->
+- tex_list[i],
+- &r300->radeon.sarea->
+- tex_age[i],
+- &r300->swapped,
+- sizeof
+- (r300TexObj),
+- (destroy_texture_object_t
+- *)
+- r300DestroyTexObj);
+- /* *INDENT-ON* */
+- }
+- r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache,
++ r300->radeon.texture_depth = driQueryOptioni(&r300->radeon.optionCache,
+ "texture_depth");
+- if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+- r300->texture_depth = (screen->cpp == 4) ?
++ if (r300->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
++ r300->radeon.texture_depth = (screen->cpp == 4) ?
+ DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+
+ /* Set the maximum texture size small enough that we can guarentee that
+@@ -298,13 +348,13 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
+ ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
+
+-#ifdef USER_BUFFERS
+ /* Needs further modifications */
+ #if 0
+ ctx->Const.MaxArrayLockSize =
+ ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
+ #endif
+-#endif
++
++ ctx->Const.MaxDrawBuffers = 1;
+
+ /* Initialize the software rasterizer and helper modules.
+ */
+@@ -377,13 +427,13 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ driQueryOptionb(&r300->radeon.optionCache,
+ "disable_lowimpact_fallback");
+
+- radeonInitSpanFuncs(ctx);
++ radeonInitSpanFuncs( ctx );
+ r300InitCmdBuf(r300);
+ r300InitState(r300);
+ if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
+ r300InitSwtcl(ctx);
+
+- TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
++ TNL_CONTEXT(ctx)->Driver.RunPipeline = r300RunPipeline;
+
+ tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode");
+ if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) {
+@@ -406,72 +456,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ return GL_TRUE;
+ }
+
+-static void r300FreeGartAllocations(r300ContextPtr r300)
+-{
+- int i, ret, tries = 0, done_age, in_use = 0;
+- drm_radeon_mem_free_t memfree;
+-
+- memfree.region = RADEON_MEM_REGION_GART;
+-
+-#ifdef USER_BUFFERS
+- for (i = r300->rmm->u_last; i > 0; i--) {
+- if (r300->rmm->u_list[i].ptr == NULL) {
+- continue;
+- }
+-
+- /* check whether this buffer is still in use */
+- if (r300->rmm->u_list[i].pending) {
+- in_use++;
+- }
+- }
+- /* Cannot flush/lock if no context exists. */
+- if (in_use)
+- r300FlushCmdBuf(r300, __FUNCTION__);
+-
+- done_age = radeonGetAge((radeonContextPtr) r300);
+-
+- for (i = r300->rmm->u_last; i > 0; i--) {
+- if (r300->rmm->u_list[i].ptr == NULL) {
+- continue;
+- }
+-
+- /* check whether this buffer is still in use */
+- if (!r300->rmm->u_list[i].pending) {
+- continue;
+- }
+-
+- assert(r300->rmm->u_list[i].h_pending == 0);
+-
+- tries = 0;
+- while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
+- usleep(10);
+- done_age = radeonGetAge((radeonContextPtr) r300);
+- }
+- if (tries >= 1000) {
+- WARN_ONCE("Failed to idle region!");
+- }
+-
+- memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
+- (char *)r300->radeon.radeonScreen->gartTextures.map;
+-
+- ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
+- DRM_RADEON_FREE, &memfree,
+- sizeof(memfree));
+- if (ret) {
+- fprintf(stderr, "Failed to free at %p\nret = %s\n",
+- r300->rmm->u_list[i].ptr, strerror(-ret));
+- } else {
+- if (i == r300->rmm->u_last)
+- r300->rmm->u_last--;
+-
+- r300->rmm->u_list[i].pending = 0;
+- r300->rmm->u_list[i].ptr = NULL;
+- }
+- }
+- r300->rmm->u_head = i;
+-#endif /* USER_BUFFERS */
+-}
+-
+ /* Destroy the device specific context.
+ */
+ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
+@@ -495,55 +479,27 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
+ assert(r300); /* should never be null */
+
+ if (r300) {
+- GLboolean release_texture_heaps;
+-
+- release_texture_heaps =
+- (r300->radeon.glCtx->Shared->RefCount == 1);
+ _swsetup_DestroyContext(r300->radeon.glCtx);
+ _tnl_DestroyContext(r300->radeon.glCtx);
+ _vbo_DestroyContext(r300->radeon.glCtx);
+ _swrast_DestroyContext(r300->radeon.glCtx);
+
+- if (r300->dma.current.buf) {
+- r300ReleaseDmaRegion(r300, &r300->dma.current,
+- __FUNCTION__);
+-#ifndef USER_BUFFERS
+- r300FlushCmdBuf(r300, __FUNCTION__);
+-#endif
+- }
+- r300FreeGartAllocations(r300);
+- r300DestroyCmdBuf(r300);
++ rcommonFlushCmdBuf(&r300->radeon, __FUNCTION__);
+
+ if (radeon->state.scissor.pClipRects) {
+ FREE(radeon->state.scissor.pClipRects);
+ radeon->state.scissor.pClipRects = NULL;
+ }
+
+- if (release_texture_heaps) {
+- /* This share group is about to go away, free our private
+- * texture object data.
+- */
+- int i;
+-
+- for (i = 0; i < r300->nr_heaps; i++) {
+- driDestroyTextureHeap(r300->texture_heaps[i]);
+- r300->texture_heaps[i] = NULL;
+- }
+-
+- assert(is_empty_list(&r300->swapped));
+- }
++ r300DestroyCmdBuf(r300);
+
+ radeonCleanupContext(&r300->radeon);
+
+-#ifdef USER_BUFFERS
++
+ /* the memory manager might be accessed when Mesa frees the shared
+ * state, so don't destroy it earlier
+ */
+- r300_mem_destroy(r300);
+-#endif
+
+- /* free the option cache */
+- driDestroyOptionCache(&r300->radeon.optionCache);
+
+ FREE(r300);
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
+index c15e9fa..37718f5 100644
+--- a/src/mesa/drivers/dri/r300/r300_context.h
++++ b/src/mesa/drivers/dri/r300/r300_context.h
+@@ -42,21 +42,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_drm.h"
+ #include "dri_util.h"
+ #include "texmem.h"
++#include "radeon_common.h"
+
+ #include "main/macros.h"
+ #include "main/mtypes.h"
+ #include "main/colormac.h"
+
+-#define USER_BUFFERS
+-
+ struct r300_context;
+ typedef struct r300_context r300ContextRec;
+ typedef struct r300_context *r300ContextPtr;
+
+-#include "radeon_lock.h"
++
+ #include "main/mm.h"
+
+-/* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
++/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
+ I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
+ with other compilers ... GLUE!
+ */
+@@ -75,174 +74,19 @@ typedef struct r300_context *r300ContextPtr;
+ #include "r300_vertprog.h"
+ #include "r500_fragprog.h"
+
+-/**
+- * This function takes a float and packs it into a uint32_t
+- */
+-static INLINE uint32_t r300PackFloat32(float fl)
+-{
+- union {
+- float fl;
+- uint32_t u;
+- } u;
+-
+- u.fl = fl;
+- return u.u;
+-}
+-
+-/* This is probably wrong for some values, I need to test this
+- * some more. Range checking would be a good idea also..
+- *
+- * But it works for most things. I'll fix it later if someone
+- * else with a better clue doesn't
+- */
+-static INLINE uint32_t r300PackFloat24(float f)
+-{
+- float mantissa;
+- int exponent;
+- uint32_t float24 = 0;
+-
+- if (f == 0.0)
+- return 0;
+
+- mantissa = frexpf(f, &exponent);
+-
+- /* Handle -ve */
+- if (mantissa < 0) {
+- float24 |= (1 << 23);
+- mantissa = mantissa * -1.0;
+- }
+- /* Handle exponent, bias of 63 */
+- exponent += 62;
+- float24 |= (exponent << 16);
+- /* Kill 7 LSB of mantissa */
+- float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF) >> 7;
+-
+- return float24;
+-}
+
+ /************ DMA BUFFERS **************/
+
+-/* Need refcounting on dma buffers:
+- */
+-struct r300_dma_buffer {
+- int refcount; /**< the number of retained regions in buf */
+- drmBufPtr buf;
+- int id;
+-};
+-#undef GET_START
+-#ifdef USER_BUFFERS
+-#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start))
+-#else
+-#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset + \
+- (rvb)->address - rmesa->dma.buf0_address + \
+- (rvb)->start)
+-#endif
+-/* A retained region, eg vertices for indexed vertices.
+- */
+-struct r300_dma_region {
+- struct r300_dma_buffer *buf;
+- char *address; /* == buf->address */
+- int start, end, ptr; /* offsets from start of buf */
+-
+- int aos_offset; /* address in GART memory */
+- int aos_stride; /* distance between elements, in dwords */
+- int aos_size; /* number of components (1-4) */
+-};
+-
+-struct r300_dma {
+- /* Active dma region. Allocations for vertices and retained
+- * regions come from here. Also used for emitting random vertices,
+- * these may be flushed by calling flush_current();
+- */
+- struct r300_dma_region current;
+-
+- void (*flush) (r300ContextPtr);
+-
+- char *buf0_address; /* start of buf[0], for index calcs */
+-
+- /* Number of "in-flight" DMA buffers, i.e. the number of buffers
+- * for which a DISCARD command is currently queued in the command buffer.
+- */
+- GLuint nr_released_bufs;
+-};
+-
+- /* Texture related */
+-
+-typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr;
+-
+-/* Texture object in locally shared texture space.
+- */
+-struct r300_tex_obj {
+- driTextureObject base;
+-
+- GLuint bufAddr; /* Offset to start of locally
+- shared texture block */
+-
+- drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
+- /* Six, for the cube faces */
+-
+- GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
+-
+- GLuint pitch; /* this isn't sent to hardware just used in calculations */
+- /* hardware register values */
+- /* Note that R200 has 8 registers per texture and R300 only 7 */
+- GLuint filter;
+- GLuint filter_1;
+- GLuint pitch_reg;
+- GLuint size; /* npot only */
+- GLuint format;
+- GLuint offset; /* Image location in the card's address space.
+- All cube faces follow. */
+- GLuint unknown4;
+- GLuint unknown5;
+- /* end hardware registers */
+-
+- /* registers computed by r200 code - keep them here to
+- compare against what is actually written.
+-
+- to be removed later.. */
+- GLuint pp_border_color;
+- GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */
+- GLuint format_x;
+-
+- GLboolean border_fallback;
+-
+- GLuint tile_bits; /* hw texture tile bits used on this texture */
+-};
+-
+-struct r300_texture_env_state {
+- r300TexObjPtr texobj;
+- GLenum format;
+- GLenum envMode;
+-};
+-
+ /* The blit width for texture uploads
+ */
+ #define R300_BLIT_WIDTH_BYTES 1024
+ #define R300_MAX_TEXTURE_UNITS 8
+
+ struct r300_texture_state {
+- struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS];
+ int tc_count; /* number of incoming texture coordinates from VAP */
+ };
+
+-/**
+- * A block of hardware state.
+- *
+- * When check returns non-zero, the returned number of dwords must be
+- * copied verbatim into the command buffer in order to update a state atom
+- * when it is dirty.
+- */
+-struct r300_state_atom {
+- struct r300_state_atom *next, *prev;
+- const char *name; /* for debug */
+- int cmd_size; /* maximum size in dwords */
+- GLuint idx; /* index in an array (e.g. textures) */
+- uint32_t *cmd;
+- GLboolean dirty;
+-
+- int (*check) (r300ContextPtr, struct r300_state_atom * atom);
+-};
+
+ #define R300_VPT_CMD_0 0
+ #define R300_VPT_XSCALE 1
+@@ -459,124 +303,98 @@ struct r300_state_atom {
+ * Cache for hardware register state.
+ */
+ struct r300_hw_state {
+- struct r300_state_atom atomlist;
+-
+- GLboolean is_dirty;
+- GLboolean all_dirty;
+- int max_state_size; /* in dwords */
+-
+- struct r300_state_atom vpt; /* viewport (1D98) */
+- struct r300_state_atom vap_cntl;
+- struct r300_state_atom vap_index_offset; /* 0x208c r5xx only */
+- struct r300_state_atom vof; /* VAP output format register 0x2090 */
+- struct r300_state_atom vte; /* (20B0) */
+- struct r300_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */
+- struct r300_state_atom vap_cntl_status;
+- struct r300_state_atom vir[2]; /* vap input route (2150/21E0) */
+- struct r300_state_atom vic; /* vap input control (2180) */
+- struct r300_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
+- struct r300_state_atom vap_clip_cntl;
+- struct r300_state_atom vap_clip;
+- struct r300_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */
+- struct r300_state_atom pvs; /* pvs_cntl (22D0) */
+- struct r300_state_atom gb_enable; /* (4008) */
+- struct r300_state_atom gb_misc; /* Multisampling position shifts ? (4010) */
+- struct r300_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
+- struct r300_state_atom ga_triangle_stipple; /* (4214) */
+- struct r300_state_atom ps; /* pointsize (421C) */
+- struct r300_state_atom ga_point_minmax; /* (4230) */
+- struct r300_state_atom lcntl; /* line control */
+- struct r300_state_atom ga_line_stipple; /* (4260) */
+- struct r300_state_atom shade;
+- struct r300_state_atom polygon_mode;
+- struct r300_state_atom fogp; /* fog parameters (4294) */
+- struct r300_state_atom ga_soft_reset; /* (429C) */
+- struct r300_state_atom zbias_cntl;
+- struct r300_state_atom zbs; /* zbias (42A4) */
+- struct r300_state_atom occlusion_cntl;
+- struct r300_state_atom cul; /* cull cntl (42B8) */
+- struct r300_state_atom su_depth_scale; /* (42C0) */
+- struct r300_state_atom rc; /* rs control (4300) */
+- struct r300_state_atom ri; /* rs interpolators (4310) */
+- struct r300_state_atom rr; /* rs route (4330) */
+- struct r300_state_atom sc_hyperz; /* (43A4) */
+- struct r300_state_atom sc_screendoor; /* (43E8) */
+- struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */
+- struct r300_state_atom fpt; /* texi - (4620) */
+- struct r300_state_atom us_out_fmt; /* (46A4) */
+- struct r300_state_atom r500fp; /* r500 fp instructions */
+- struct r300_state_atom r500fp_const; /* r500 fp constants */
+- struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */
+- struct r300_state_atom fogs; /* fog state (4BC0) */
+- struct r300_state_atom fogc; /* fog color (4BC8) */
+- struct r300_state_atom at; /* alpha test (4BD4) */
+- struct r300_state_atom fg_depth_src; /* (4BD8) */
+- struct r300_state_atom fpp; /* 0x4C00 and following */
+- struct r300_state_atom rb3d_cctl; /* (4E00) */
+- struct r300_state_atom bld; /* blending (4E04) */
+- struct r300_state_atom cmk; /* colormask (4E0C) */
+- struct r300_state_atom blend_color; /* constant blend color */
+- struct r300_state_atom rop; /* ropcntl */
+- struct r300_state_atom cb; /* colorbuffer (4E28) */
+- struct r300_state_atom rb3d_dither_ctl; /* (4E50) */
+- struct r300_state_atom rb3d_aaresolve_ctl; /* (4E88) */
+- struct r300_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */
+- struct r300_state_atom zs; /* zstencil control (4F00) */
+- struct r300_state_atom zstencil_format;
+- struct r300_state_atom zb; /* z buffer (4F20) */
+- struct r300_state_atom zb_depthclearvalue; /* (4F28) */
+- struct r300_state_atom unk4F30; /* (4F30) */
+- struct r300_state_atom zb_hiz_offset; /* (4F44) */
+- struct r300_state_atom zb_hiz_pitch; /* (4F54) */
+-
+- struct r300_state_atom vpi; /* vp instructions */
+- struct r300_state_atom vpp; /* vp parameters */
+- struct r300_state_atom vps; /* vertex point size (?) */
+- struct r300_state_atom vpucp[6]; /* vp user clip plane - 6 */
++ struct radeon_state_atom vpt; /* viewport (1D98) */
++ struct radeon_state_atom vap_cntl;
++ struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */
++ struct radeon_state_atom vof; /* VAP output format register 0x2090 */
++ struct radeon_state_atom vte; /* (20B0) */
++ struct radeon_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */
++ struct radeon_state_atom vap_cntl_status;
++ struct radeon_state_atom vir[2]; /* vap input route (2150/21E0) */
++ struct radeon_state_atom vic; /* vap input control (2180) */
++ struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
++ struct radeon_state_atom vap_clip_cntl;
++ struct radeon_state_atom vap_clip;
++ struct radeon_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */
++ struct radeon_state_atom pvs; /* pvs_cntl (22D0) */
++ struct radeon_state_atom gb_enable; /* (4008) */
++ struct radeon_state_atom gb_misc; /* Multisampling position shifts ? (4010) */
++ struct radeon_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
++ struct radeon_state_atom ga_triangle_stipple; /* (4214) */
++ struct radeon_state_atom ps; /* pointsize (421C) */
++ struct radeon_state_atom ga_point_minmax; /* (4230) */
++ struct radeon_state_atom lcntl; /* line control */
++ struct radeon_state_atom ga_line_stipple; /* (4260) */
++ struct radeon_state_atom shade;
++ struct radeon_state_atom polygon_mode;
++ struct radeon_state_atom fogp; /* fog parameters (4294) */
++ struct radeon_state_atom ga_soft_reset; /* (429C) */
++ struct radeon_state_atom zbias_cntl;
++ struct radeon_state_atom zbs; /* zbias (42A4) */
++ struct radeon_state_atom occlusion_cntl;
++ struct radeon_state_atom cul; /* cull cntl (42B8) */
++ struct radeon_state_atom su_depth_scale; /* (42C0) */
++ struct radeon_state_atom rc; /* rs control (4300) */
++ struct radeon_state_atom ri; /* rs interpolators (4310) */
++ struct radeon_state_atom rr; /* rs route (4330) */
++ struct radeon_state_atom sc_hyperz; /* (43A4) */
++ struct radeon_state_atom sc_screendoor; /* (43E8) */
++ struct radeon_state_atom fp; /* fragment program cntl + nodes (4600) */
++ struct radeon_state_atom fpt; /* texi - (4620) */
++ struct radeon_state_atom us_out_fmt; /* (46A4) */
++ struct radeon_state_atom r500fp; /* r500 fp instructions */
++ struct radeon_state_atom r500fp_const; /* r500 fp constants */
++ struct radeon_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */
++ struct radeon_state_atom fogs; /* fog state (4BC0) */
++ struct radeon_state_atom fogc; /* fog color (4BC8) */
++ struct radeon_state_atom at; /* alpha test (4BD4) */
++ struct radeon_state_atom fg_depth_src; /* (4BD8) */
++ struct radeon_state_atom fpp; /* 0x4C00 and following */
++ struct radeon_state_atom rb3d_cctl; /* (4E00) */
++ struct radeon_state_atom bld; /* blending (4E04) */
++ struct radeon_state_atom cmk; /* colormask (4E0C) */
++ struct radeon_state_atom blend_color; /* constant blend color */
++ struct radeon_state_atom rop; /* ropcntl */
++ struct radeon_state_atom cb; /* colorbuffer (4E28) */
++ struct radeon_state_atom rb3d_dither_ctl; /* (4E50) */
++ struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */
++ struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */
++ struct radeon_state_atom zs; /* zstencil control (4F00) */
++ struct radeon_state_atom zstencil_format;
++ struct radeon_state_atom zb; /* z buffer (4F20) */
++ struct radeon_state_atom zb_depthclearvalue; /* (4F28) */
++ struct radeon_state_atom unk4F30; /* (4F30) */
++ struct radeon_state_atom zb_hiz_offset; /* (4F44) */
++ struct radeon_state_atom zb_hiz_pitch; /* (4F54) */
++
++ struct radeon_state_atom vpi; /* vp instructions */
++ struct radeon_state_atom vpp; /* vp parameters */
++ struct radeon_state_atom vps; /* vertex point size (?) */
++ struct radeon_state_atom vpucp[6]; /* vp user clip plane - 6 */
+ /* 8 texture units */
+ /* the state is grouped by function and not by
+ texture unit. This makes single unit updates
+ really awkward - we are much better off
+ updating the whole thing at once */
+ struct {
+- struct r300_state_atom filter;
+- struct r300_state_atom filter_1;
+- struct r300_state_atom size;
+- struct r300_state_atom format;
+- struct r300_state_atom pitch;
+- struct r300_state_atom offset;
+- struct r300_state_atom chroma_key;
+- struct r300_state_atom border_color;
++ struct radeon_state_atom filter;
++ struct radeon_state_atom filter_1;
++ struct radeon_state_atom size;
++ struct radeon_state_atom format;
++ struct radeon_state_atom pitch;
++ struct radeon_state_atom offset;
++ struct radeon_state_atom chroma_key;
++ struct radeon_state_atom border_color;
+ } tex;
+- struct r300_state_atom txe; /* tex enable (4104) */
+-};
++ struct radeon_state_atom txe; /* tex enable (4104) */
+
+-/**
+- * This structure holds the command buffer while it is being constructed.
+- *
+- * The first batch of commands in the buffer is always the state that needs
+- * to be re-emitted when the context is lost. This batch can be skipped
+- * otherwise.
+- */
+-struct r300_cmdbuf {
+- int size; /* DWORDs allocated for buffer */
+- uint32_t *cmd_buf;
+- int count_used; /* DWORDs filled so far */
+- int count_reemit; /* size of re-emission batch */
++ radeonTexObj *textures[R300_MAX_TEXTURE_UNITS];
+ };
+
+ /**
+ * State cache
+ */
+
+-struct r300_depthbuffer_state {
+- GLfloat scale;
+-};
+-
+-struct r300_stencilbuffer_state {
+- GLboolean hw_stencil;
+-};
+-
+ /* Vertex shader state */
+
+ /* Perhaps more if we store programs in vmem? */
+@@ -812,22 +630,18 @@ struct r500_fragment_program {
+ #define REG_TEX0 2
+
+ struct r300_state {
+- struct r300_depthbuffer_state depth;
+ struct r300_texture_state texture;
+ int sw_tcl_inputs[VERT_ATTRIB_MAX];
+ struct r300_vertex_shader_state vertex_shader;
+- struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
++ struct radeon_aos aos[R300_MAX_AOS_ARRAYS];
+ int aos_count;
+
+- GLuint *Elts;
+- struct r300_dma_region elt_dma;
++ struct radeon_bo *elt_dma_bo; /** Buffer object that contains element indices */
++ int elt_dma_offset; /** Offset into this buffer object, in bytes */
+
+- struct r300_dma_region swtcl_dma;
+ DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for.
+ They are the same as tnl->render_inputs for fixed pipeline */
+
+- struct r300_stencilbuffer_state stencil;
+-
+ };
+
+ #define R300_FALLBACK_NONE 0
+@@ -837,41 +651,7 @@ struct r300_state {
+ /* r300_swtcl.c
+ */
+ struct r300_swtcl_info {
+- GLuint RenderIndex;
+-
+- /**
+- * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is
+- * installed in the Mesa state vector.
+- */
+- GLuint vertex_size;
+-
+- /**
+- * Attributes instructing the Mesa TCL pipeline where / how to put vertex
+- * data in the hardware buffer.
+- */
+- struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+-
+- /**
+- * Number of elements of \c ::vertex_attrs that are actually used.
+- */
+- GLuint vertex_attr_count;
+-
+- /**
+- * Cached pointer to the buffer where Mesa will store vertex data.
+- */
+- GLubyte *verts;
+-
+- /* Fallback rasterization functions
+- */
+- // r200_point_func draw_point;
+- // r200_line_func draw_line;
+- // r200_tri_func draw_tri;
+-
+- GLuint hw_primitive;
+- GLenum render_primitive;
+- GLuint numverts;
+-
+- /**
++ /*
+ * Offset of the 4UB color data within a hardware (swtcl) vertex.
+ */
+ GLuint coloroffset;
+@@ -880,13 +660,6 @@ struct r300_swtcl_info {
+ * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
+ */
+ GLuint specoffset;
+-
+- /**
+- * Should Mesa project vertex data or will the hardware do it?
+- */
+- GLboolean needproj;
+-
+- struct r300_dma_region indexed_verts;
+ };
+
+
+@@ -897,40 +670,22 @@ struct r300_context {
+ struct radeon_context radeon; /* parent class, must be first */
+
+ struct r300_hw_state hw;
+- struct r300_cmdbuf cmdbuf;
++
+ struct r300_state state;
+ struct gl_vertex_program *curr_vp;
+ struct r300_vertex_program *selected_vp;
+
+ /* Vertex buffers
+ */
+- struct r300_dma dma;
+- GLboolean save_on_next_unlock;
+- GLuint NewGLState;
+-
+- /* Texture object bookkeeping
+- */
+- unsigned nr_heaps;
+- driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
+- driTextureObject swapped;
+- int texture_depth;
+- float initialMaxAnisotropy;
+-
+- /* Clientdata textures;
+- */
+- GLuint prefer_gart_client_texturing;
+-
+-#ifdef USER_BUFFERS
+- struct r300_memory_manager *rmm;
+-#endif
+-
+ GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
+ GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
+
+ GLboolean disable_lowimpact_fallback;
+
+ DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */
++
+ struct r300_swtcl_info swtcl;
++ GLboolean vap_flush_needed;
+ };
+
+ struct r300_buffer_object {
+@@ -956,4 +711,7 @@ extern int r300VertexProgUpdateParams(GLcontext * ctx,
+ #define RADEON_D_PLAYBACK_RAW 2
+ #define RADEON_D_T 3
+
++#define r300PackFloat32 radeonPackFloat32
++#define r300PackFloat24 radeonPackFloat24
++
+ #endif /* __R300_CONTEXT_H__ */
+diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
+index 80bd338..1512e90 100644
+--- a/src/mesa/drivers/dri/r300/r300_emit.c
++++ b/src/mesa/drivers/dri/r300/r300_emit.c
+@@ -46,14 +46,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "tnl/t_context.h"
+
+ #include "r300_context.h"
+-#include "radeon_ioctl.h"
+ #include "r300_state.h"
+ #include "r300_emit.h"
+ #include "r300_ioctl.h"
+
+-#ifdef USER_BUFFERS
+-#include "r300_mem.h"
+-#endif
+
+ #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
+ SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
+@@ -66,147 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #define DEBUG_ALL DEBUG_VERTS
+
+-#if defined(USE_X86_ASM)
+-#define COPY_DWORDS( dst, src, nr ) \
+-do { \
+- int __tmp; \
+- __asm__ __volatile__( "rep ; movsl" \
+- : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
+- : "0" (nr), \
+- "D" ((long)dst), \
+- "S" ((long)src) ); \
+-} while (0)
+-#else
+-#define COPY_DWORDS( dst, src, nr ) \
+-do { \
+- int j; \
+- for ( j = 0 ; j < nr ; j++ ) \
+- dst[j] = ((int *)src)[j]; \
+- dst += nr; \
+-} while (0)
+-#endif
+-
+-static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
+- GLvoid * data, int stride, int count)
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+- __FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+- if (stride == 4)
+- COPY_DWORDS(out, data, count);
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out++;
+- data += stride;
+- }
+-}
+-
+-static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
+- GLvoid * data, int stride, int count)
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+- __FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+- if (stride == 8)
+- COPY_DWORDS(out, data, count * 2);
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out[1] = *(int *)(data + 4);
+- out += 2;
+- data += stride;
+- }
+-}
+-
+-static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
+- GLvoid * data, int stride, int count)
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+- __FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+- if (stride == 12)
+- COPY_DWORDS(out, data, count * 3);
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out[1] = *(int *)(data + 4);
+- out[2] = *(int *)(data + 8);
+- out += 3;
+- data += stride;
+- }
+-}
+-
+-static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
+- GLvoid * data, int stride, int count)
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+- __FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+- if (stride == 16)
+- COPY_DWORDS(out, data, count * 4);
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out[1] = *(int *)(data + 4);
+- out[2] = *(int *)(data + 8);
+- out[3] = *(int *)(data + 12);
+- out += 4;
+- data += stride;
+- }
+-}
+-
+-static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
+- GLvoid * data, int size, int stride, int count)
+-{
+- r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-
+- if (stride == 0) {
+- r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
+- count = 1;
+- rvb->aos_offset = GET_START(rvb);
+- rvb->aos_stride = 0;
+- } else {
+- r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
+- rvb->aos_offset = GET_START(rvb);
+- rvb->aos_stride = size;
+- }
+-
+- switch (size) {
+- case 1:
+- r300EmitVec4(ctx, rvb, data, stride, count);
+- break;
+- case 2:
+- r300EmitVec8(ctx, rvb, data, stride, count);
+- break;
+- case 3:
+- r300EmitVec12(ctx, rvb, data, stride, count);
+- break;
+- case 4:
+- r300EmitVec16(ctx, rvb, data, stride, count);
+- break;
+- default:
+- assert(0);
+- break;
+- }
+-}
+-
+ #define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \
+ (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT)
+
+@@ -314,10 +169,6 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
+
+-#if 0
+- if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
+-#endif
+-
+ if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+
+@@ -371,7 +222,6 @@ int r300EmitArrays(GLcontext * ctx)
+
+ assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
+ assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
+- //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
+
+ if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
+ InputsRead |= 1 << VERT_ATTRIB_POS;
+@@ -433,7 +283,7 @@ int r300EmitArrays(GLcontext * ctx)
+ }
+
+ for (i = 0; i < nr; i++) {
+- int ci, fix, found = 0;
++ int ci;
+
+ swizzle[i][0] = SWIZZLE_ZERO;
+ swizzle[i][1] = SWIZZLE_ZERO;
+@@ -443,61 +293,35 @@ int r300EmitArrays(GLcontext * ctx)
+ for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
+ swizzle[i][ci] = ci;
+ }
+-
+- if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
+- if (vb->AttribPtr[tab[i]]->stride % 4) {
+- return R300_FALLBACK_TCL;
+- }
+- rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
+- rmesa->state.aos[i].start = 0;
+- rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
+- rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
+- rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
+- } else {
+- r300EmitVec(ctx, &rmesa->state.aos[i],
++ rcommon_emit_vector(ctx, &rmesa->state.aos[i],
+ vb->AttribPtr[tab[i]]->data,
+ vb->AttribPtr[tab[i]]->size,
+ vb->AttribPtr[tab[i]]->stride, count);
+- }
+-
+- rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
+-
+- for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
+- if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
+- continue;
+- }
+- found = 1;
+- break;
+- }
+-
+- if (found) {
+- if (fix > 0) {
+- WARN_ONCE("Feeling lucky?\n");
+- }
+- rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
+- for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
+- swizzle[i][ci] += fix;
+- }
+- } else {
+- WARN_ONCE
+- ("Cannot handle offset %x with stride %d, comp %d\n",
+- rmesa->state.aos[i].aos_offset,
+- rmesa->state.aos[i].aos_stride,
+- vb->AttribPtr[tab[i]]->size);
+- return R300_FALLBACK_TCL;
+- }
+ }
+
+ /* Setup INPUT_ROUTE. */
+- R300_STATECHANGE(rmesa, vir[0]);
+- ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
+- r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
+- vb->AttribPtr, inputs, tab, nr);
+- R300_STATECHANGE(rmesa, vir[1]);
+- ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
+- r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
+- nr);
+-
++ if (rmesa->radeon.radeonScreen->kernel_mm) {
++ R300_STATECHANGE(rmesa, vir[0]);
++ rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
++ rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
++ rmesa->hw.vir[0].cmd[0] |=
++ (r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
++ vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16;
++ R300_STATECHANGE(rmesa, vir[1]);
++ rmesa->hw.vir[1].cmd[0] |=
++ (r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
++ nr) & 0x3FFF) << 16;
++ } else {
++ R300_STATECHANGE(rmesa, vir[0]);
++ ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
++ r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
++ vb->AttribPtr, inputs, tab, nr);
++ R300_STATECHANGE(rmesa, vir[1]);
++ ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
++ r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
++ nr);
++ }
++
+ /* Setup INPUT_CNTL. */
+ R300_STATECHANGE(rmesa, vic);
+ rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+@@ -515,45 +339,34 @@ int r300EmitArrays(GLcontext * ctx)
+ return R300_FALLBACK_NONE;
+ }
+
+-#ifdef USER_BUFFERS
+-void r300UseArrays(GLcontext * ctx)
+-{
+- r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- int i;
+-
+- if (rmesa->state.elt_dma.buf)
+- r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
+-
+- for (i = 0; i < rmesa->state.aos_count; i++) {
+- if (rmesa->state.aos[i].buf)
+- r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
+- }
+-}
+-#endif
+-
+ void r300ReleaseArrays(GLcontext * ctx)
+ {
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ int i;
+
+- r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
++ if (rmesa->state.elt_dma_bo) {
++ radeon_bo_unref(rmesa->state.elt_dma_bo);
++ rmesa->state.elt_dma_bo = NULL;
++ }
+ for (i = 0; i < rmesa->state.aos_count; i++) {
+- r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
++ if (rmesa->state.aos[i].bo) {
++ radeon_bo_unref(rmesa->state.aos[i].bo);
++ rmesa->state.aos[i].bo = NULL;
++ }
+ }
+ }
+
+ void r300EmitCacheFlush(r300ContextPtr rmesa)
+ {
+- int cmd_reserved = 0;
+- int cmd_written = 0;
+-
+- drm_radeon_cmd_header_t *cmd = NULL;
+-
+- reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
+- e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+- R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+-
+- reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
+- e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
++ BATCH_LOCALS(&rmesa->radeon);
++
++ BEGIN_BATCH(4);
++ OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
++ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
++ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
++ OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
++ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
++ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
++ END_BATCH();
++ COMMIT_BATCH();
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
+index 89d7383..6bc8f8e 100644
+--- a/src/mesa/drivers/dri/r300/r300_emit.h
++++ b/src/mesa/drivers/dri/r300/r300_emit.h
+@@ -44,28 +44,31 @@
+ #include "r300_cmdbuf.h"
+ #include "radeon_reg.h"
+
+-/* TODO: move these defines (and the ones from DRM) into r300_reg.h and sync up
+- * with DRM */
+-#define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
+-#define CP_PACKET3( pkt, n ) \
+- (RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
+-
+-static INLINE uint32_t cmdpacket0(int reg, int count)
++static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn,
++ int reg, int count)
+ {
+- drm_r300_cmd_header_t cmd;
+-
+- cmd.packet0.cmd_type = R300_CMD_PACKET0;
+- cmd.packet0.count = count;
+- cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
+- cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
+-
+- return cmd.u;
++ if (!rscrn->kernel_mm) {
++ drm_r300_cmd_header_t cmd;
++
++ cmd.u = 0;
++ cmd.packet0.cmd_type = R300_CMD_PACKET0;
++ cmd.packet0.count = count;
++ cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
++ cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
++
++ return cmd.u;
++ }
++ if (count) {
++ return CP_PACKET0(reg, count - 1);
++ }
++ return CP_PACKET2;
+ }
+
+-static INLINE uint32_t cmdvpu(int addr, int count)
++static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count)
+ {
+ drm_r300_cmd_header_t cmd;
+
++ cmd.u = 0;
+ cmd.vpu.cmd_type = R300_CMD_VPU;
+ cmd.vpu.count = count;
+ cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8;
+@@ -74,10 +77,12 @@ static INLINE uint32_t cmdvpu(int addr, int count)
+ return cmd.u;
+ }
+
+-static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
++static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn,
++ int addr, int count, int type, int clamp)
+ {
+ drm_r300_cmd_header_t cmd;
+
++ cmd.u = 0;
+ cmd.r500fp.cmd_type = R300_CMD_R500FP;
+ cmd.r500fp.count = count;
+ cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8;
+@@ -88,169 +93,131 @@ static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
+ return cmd.u;
+ }
+
+-static INLINE uint32_t cmdpacket3(int packet)
++static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet)
+ {
+ drm_r300_cmd_header_t cmd;
+
++ cmd.u = 0;
+ cmd.packet3.cmd_type = R300_CMD_PACKET3;
+ cmd.packet3.packet = packet;
+
+ return cmd.u;
+ }
+
+-static INLINE uint32_t cmdcpdelay(unsigned short count)
++static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn,
++ unsigned short count)
+ {
+ drm_r300_cmd_header_t cmd;
+
++ cmd.u = 0;
++
+ cmd.delay.cmd_type = R300_CMD_CP_DELAY;
+ cmd.delay.count = count;
+
+ return cmd.u;
+ }
+
+-static INLINE uint32_t cmdwait(unsigned char flags)
++static INLINE uint32_t cmdwait(struct radeon_screen *rscrn,
++ unsigned char flags)
+ {
+ drm_r300_cmd_header_t cmd;
+
++ cmd.u = 0;
+ cmd.wait.cmd_type = R300_CMD_WAIT;
+ cmd.wait.flags = flags;
+
+ return cmd.u;
+ }
+
+-static INLINE uint32_t cmdpacify(void)
++static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn)
+ {
+ drm_r300_cmd_header_t cmd;
+
++ cmd.u = 0;
+ cmd.header.cmd_type = R300_CMD_END3D;
+
+ return cmd.u;
+ }
+
+ /**
+- * Prepare to write a register value to register at address reg.
+- * If num_extra > 0 then the following extra values are written
+- * to registers with address +4, +8 and so on..
+- */
+-#define reg_start(reg, num_extra) \
+- do { \
+- int _n; \
+- _n=(num_extra); \
+- cmd = (drm_radeon_cmd_header_t*) \
+- r300AllocCmdBuf(rmesa, \
+- (_n+2), \
+- __FUNCTION__); \
+- cmd_reserved=_n+2; \
+- cmd_written=1; \
+- cmd[0].i=cmdpacket0((reg), _n+1); \
+- } while (0);
+-
+-/**
+- * Emit GLuint freestyle
++ * Write the header of a packet3 to the command buffer.
++ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
+ */
+-#define e32(dword) \
+- do { \
+- if(cmd_written<cmd_reserved) { \
+- cmd[cmd_written].i=(dword); \
+- cmd_written++; \
+- } else { \
+- fprintf(stderr, \
+- "e32 but no previous packet " \
+- "declaration.\n" \
+- "Aborting! in %s::%s at line %d, " \
+- "cmd_written=%d cmd_reserved=%d\n", \
+- __FILE__, __FUNCTION__, __LINE__, \
+- cmd_written, cmd_reserved); \
+- _mesa_exit(-1); \
+- } \
++#define OUT_BATCH_PACKET3(packet, num_extra) do {\
++ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
++ OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\
++ R300_CMD_PACKET3_RAW)); \
++ } else b_l_rmesa->cmdbuf.cs->section_cdw++;\
++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } while(0)
+
+-#define efloat(f) e32(r300PackFloat32(f))
+-
+-#define vsf_start_fragment(dest, length) \
+- do { \
+- int _n; \
+- _n = (length); \
+- cmd = (drm_radeon_cmd_header_t*) \
+- r300AllocCmdBuf(rmesa, \
+- (_n+1), \
+- __FUNCTION__); \
+- cmd_reserved = _n+2; \
+- cmd_written =1; \
+- cmd[0].i = cmdvpu((dest), _n/4); \
+- } while (0);
+-
+-#define r500fp_start_fragment(dest, length) \
+- do { \
+- int _n; \
+- _n = (length); \
+- cmd = (drm_radeon_cmd_header_t*) \
+- r300AllocCmdBuf(rmesa, \
+- (_n+1), \
+- __FUNCTION__); \
+- cmd_reserved = _n+1; \
+- cmd_written =1; \
+- cmd[0].i = cmdr500fp((dest), _n/6, 0, 0); \
+- } while (0);
+-
+-#define start_packet3(packet, count) \
+- { \
+- int _n; \
+- GLuint _p; \
+- _n = (count); \
+- _p = (packet); \
+- cmd = (drm_radeon_cmd_header_t*) \
+- r300AllocCmdBuf(rmesa, \
+- (_n+3), \
+- __FUNCTION__); \
+- cmd_reserved = _n+3; \
+- cmd_written = 2; \
+- if(_n > 0x3fff) { \
+- fprintf(stderr,"Too big packet3 %08x: cannot " \
+- "store %d dwords\n", \
+- _p, _n); \
+- _mesa_exit(-1); \
+- } \
+- cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW); \
+- cmd[1].i = _p | ((_n & 0x3fff)<<16); \
+- }
+-
+ /**
+ * Must be sent to switch to 2d commands
+ */
+-void static INLINE end_3d(r300ContextPtr rmesa)
++void static INLINE end_3d(radeonContextPtr radeon)
+ {
+- drm_radeon_cmd_header_t *cmd = NULL;
++ BATCH_LOCALS(radeon);
+
+- cmd =
+- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+- cmd[0].header.cmd_type = R300_CMD_END3D;
++ if (!radeon->radeonScreen->kernel_mm) {
++ BEGIN_BATCH_NO_AUTOSTATE(1);
++ OUT_BATCH(cmdpacify(radeon->radeonScreen));
++ END_BATCH();
++ }
+ }
+
+ void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
+ {
+- drm_radeon_cmd_header_t *cmd = NULL;
++ BATCH_LOCALS(&rmesa->radeon);
+
+- cmd =
+- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+- cmd[0].i = cmdcpdelay(count);
++ if (!rmesa->radeon.radeonScreen->kernel_mm) {
++ BEGIN_BATCH_NO_AUTOSTATE(1);
++ OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count));
++ END_BATCH();
++ }
+ }
+
+-void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags)
++void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags)
+ {
+- drm_radeon_cmd_header_t *cmd = NULL;
+-
+- cmd =
+- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+- cmd[0].i = cmdwait(flags);
++ BATCH_LOCALS(radeon);
++ uint32_t wait_until;
++
++ if (!radeon->radeonScreen->kernel_mm) {
++ BEGIN_BATCH_NO_AUTOSTATE(1);
++ OUT_BATCH(cmdwait(radeon->radeonScreen, flags));
++ END_BATCH();
++ } else {
++ switch(flags) {
++ case R300_WAIT_2D:
++ wait_until = (1 << 14);
++ break;
++ case R300_WAIT_3D:
++ wait_until = (1 << 15);
++ break;
++ case R300_NEW_WAIT_2D_3D:
++ wait_until = (1 << 14) | (1 << 15);
++ break;
++ case R300_NEW_WAIT_2D_2D_CLEAN:
++ wait_until = (1 << 14) | (1 << 16) | (1 << 18);
++ break;
++ case R300_NEW_WAIT_3D_3D_CLEAN:
++ wait_until = (1 << 15) | (1 << 17) | (1 << 18);
++ break;
++ case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
++ wait_until = (1 << 14) | (1 << 16) | (1 << 18);
++ wait_until |= (1 << 15) | (1 << 17) | (1 << 18);
++ break;
++ default:
++ return;
++ }
++ BEGIN_BATCH_NO_AUTOSTATE(2);
++ OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
++ OUT_BATCH(wait_until);
++ END_BATCH();
++ }
+ }
+
+ extern int r300EmitArrays(GLcontext * ctx);
+
+-#ifdef USER_BUFFERS
+-void r300UseArrays(GLcontext * ctx);
+-#endif
+-
+ extern void r300ReleaseArrays(GLcontext * ctx);
+ extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
+ extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
+diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
+index 4ef7f2b..8d030c6 100644
+--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
++++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
+@@ -163,6 +163,19 @@ static GLboolean transform_TEX(
+ }
+ }
+
++ if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
++ int tmpreg = radeonFindFreeTemporary(t);
++ tgt = radeonAppendInstructions(t->Program, 1);
++ tgt->Opcode = OPCODE_MOV;
++ tgt->DstReg.File = PROGRAM_TEMPORARY;
++ tgt->DstReg.Index = tmpreg;
++ tgt->SrcReg[0] = inst.SrcReg[0];
++
++ reset_srcreg(&inst.SrcReg[0]);
++ inst.SrcReg[0].File = PROGRAM_TEMPORARY;
++ inst.SrcReg[0].Index = tmpreg;
++ }
++
+ tgt = radeonAppendInstructions(t->Program, 1);
+ _mesa_copy_instructions(tgt, &inst, 1);
+
+diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
+index ee85e22..619d268 100644
+--- a/src/mesa/drivers/dri/r300/r300_ioctl.c
++++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
+@@ -46,8 +46,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/context.h"
+ #include "swrast/swrast.h"
+
++#include "radeon_common.h"
++#include "radeon_lock.h"
+ #include "r300_context.h"
+-#include "radeon_ioctl.h"
+ #include "r300_ioctl.h"
+ #include "r300_cmdbuf.h"
+ #include "r300_state.h"
+@@ -55,71 +56,83 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_reg.h"
+ #include "r300_emit.h"
+ #include "r300_fragprog.h"
++#include "r300_context.h"
+
+ #include "vblank.h"
+
++#define R200_3D_DRAW_IMMD_2 0xC0003500
++
+ #define CLEARBUFFER_COLOR 0x1
+ #define CLEARBUFFER_DEPTH 0x2
+ #define CLEARBUFFER_STENCIL 0x4
+
+-static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
++static void r300ClearBuffer(r300ContextPtr r300, int flags,
++ struct radeon_renderbuffer *rrb,
++ struct radeon_renderbuffer *rrbd)
+ {
++ BATCH_LOCALS(&r300->radeon);
+ GLcontext *ctx = r300->radeon.glCtx;
+ __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
+- GLuint cboffset, cbpitch;
+- drm_r300_cmd_header_t *cmd2;
+- int cmd_reserved = 0;
+- int cmd_written = 0;
+- drm_radeon_cmd_header_t *cmd = NULL;
++ GLuint cbpitch = 0;
+ r300ContextPtr rmesa = r300;
+
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n",
+- __FUNCTION__, buffer ? "back" : "front",
+- dPriv->x, dPriv->y, dPriv->w, dPriv->h);
+-
+- if (buffer) {
+- cboffset = r300->radeon.radeonScreen->backOffset;
+- cbpitch = r300->radeon.radeonScreen->backPitch;
+- } else {
+- cboffset = r300->radeon.radeonScreen->frontOffset;
+- cbpitch = r300->radeon.radeonScreen->frontPitch;
++ fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n",
++ __FUNCTION__, rrb, dPriv->x, dPriv->y,
++ dPriv->w, dPriv->h);
++
++ if (rrb) {
++ cbpitch = (rrb->pitch / rrb->cpp);
++ if (rrb->cpp == 4)
++ cbpitch |= R300_COLOR_FORMAT_ARGB8888;
++ else
++ cbpitch |= R300_COLOR_FORMAT_RGB565;
++
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
++ cbpitch |= R300_COLOR_TILE_ENABLE;
++ }
+ }
+
+- cboffset += r300->radeon.radeonScreen->fbLocation;
+-
+- cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+- end_3d(rmesa);
+-
+- R300_STATECHANGE(r300, cb);
+- reg_start(R300_RB3D_COLOROFFSET0, 0);
+- e32(cboffset);
+-
+- if (r300->radeon.radeonScreen->cpp == 4)
+- cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+- else
+- cbpitch |= R300_COLOR_FORMAT_RGB565;
+-
+- if (r300->radeon.sarea->tiling_enabled)
+- cbpitch |= R300_COLOR_TILE_ENABLE;
+-
+- reg_start(R300_RB3D_COLORPITCH0, 0);
+- e32(cbpitch);
+-
+- R300_STATECHANGE(r300, cmk);
+- reg_start(RB3D_COLOR_CHANNEL_MASK, 0);
++ /* TODO in bufmgr */
++ cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
++ end_3d(&rmesa->radeon);
+
+ if (flags & CLEARBUFFER_COLOR) {
+- e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
+- (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
+- (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
+- (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
++ assert(rrb != 0);
++ BEGIN_BATCH_NO_AUTOSTATE(6);
++ OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++ OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch);
++ END_BATCH();
++ }
++#if 1
++ if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) {
++ assert(rrbd != 0);
++ cbpitch = (rrbd->pitch / rrbd->cpp);
++ if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
++ cbpitch |= R300_DEPTHMACROTILE_ENABLE;
++ }
++ if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
++ cbpitch |= R300_DEPTHMICROTILE_TILED;
++ }
++ BEGIN_BATCH_NO_AUTOSTATE(6);
++ OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
++ OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++ OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch);
++ END_BATCH();
++ }
++#endif
++ BEGIN_BATCH_NO_AUTOSTATE(6);
++ OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1);
++ if (flags & CLEARBUFFER_COLOR) {
++ OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
++ (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
++ (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
++ (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
+ } else {
+- e32(0x0);
++ OUT_BATCH(0);
+ }
+
+- R300_STATECHANGE(r300, zs);
+- reg_start(R300_ZB_CNTL, 2);
+
+ {
+ uint32_t t1, t2;
+@@ -146,37 +159,55 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
+ R300_S_FRONT_ZFAIL_OP_SHIFT);
+ }
+
+- e32(t1);
+- e32(t2);
+- e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
+- (ctx->Stencil.Clear & R300_STENCILREF_MASK));
++ OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3);
++ OUT_BATCH(t1);
++ OUT_BATCH(t2);
++ OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) <<
++ R300_STENCILWRITEMASK_SHIFT) |
++ (ctx->Stencil.Clear & R300_STENCILREF_MASK));
++ END_BATCH();
+ }
+
+- cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__);
+- cmd2[0].packet3.cmd_type = R300_CMD_PACKET3;
+- cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR;
+- cmd2[1].u = r300PackFloat32(dPriv->w / 2.0);
+- cmd2[2].u = r300PackFloat32(dPriv->h / 2.0);
+- cmd2[3].u = r300PackFloat32(ctx->Depth.Clear);
+- cmd2[4].u = r300PackFloat32(1.0);
+- cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]);
+- cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]);
+- cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]);
+- cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
+-
++ if (!rmesa->radeon.radeonScreen->kernel_mm) {
++ BEGIN_BATCH_NO_AUTOSTATE(9);
++ OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR));
++ OUT_BATCH_FLOAT32(dPriv->w / 2.0);
++ OUT_BATCH_FLOAT32(dPriv->h / 2.0);
++ OUT_BATCH_FLOAT32(ctx->Depth.Clear);
++ OUT_BATCH_FLOAT32(1.0);
++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
++ END_BATCH();
++ } else {
++ OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
++ OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
++ (1 << R300_PRIM_NUM_VERTICES_SHIFT));
++ OUT_BATCH_FLOAT32(dPriv->w / 2.0);
++ OUT_BATCH_FLOAT32(dPriv->h / 2.0);
++ OUT_BATCH_FLOAT32(ctx->Depth.Clear);
++ OUT_BATCH_FLOAT32(1.0);
++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
++ OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
++ }
++
+ r300EmitCacheFlush(rmesa);
+- cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
++ cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
++
++ R300_STATECHANGE(r300, cb);
++ R300_STATECHANGE(r300, cmk);
++ R300_STATECHANGE(r300, zs);
+ }
+
+ static void r300EmitClearState(GLcontext * ctx)
+ {
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+- r300ContextPtr rmesa = r300;
++ BATCH_LOCALS(&r300->radeon);
+ __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
+ int i;
+- int cmd_reserved = 0;
+- int cmd_written = 0;
+- drm_radeon_cmd_header_t *cmd = NULL;
+ int has_tcl = 1;
+ int is_r500 = 0;
+ GLuint vap_cntl;
+@@ -184,35 +215,37 @@ static void r300EmitClearState(GLcontext * ctx)
+ if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ has_tcl = 0;
+
+- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+- is_r500 = 1;
++ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
++ is_r500 = 1;
+
+-
+- /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and
+- * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are
+- * quite complex; see the functions in r300_emit.c.
++ /* State atom dirty tracking is a little subtle here.
++ *
++ * On the one hand, we need to make sure base state is emitted
++ * here if we start with an empty batch buffer, otherwise clear
++ * works incorrectly with multiple processes. Therefore, the first
++ * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE.
++ *
++ * On the other hand, implicit state emission clears the state atom
++ * dirty bits, so we have to call R300_STATECHANGE later than the
++ * first BEGIN_BATCH.
+ *
+- * I believe it would be a good idea to extend the functions in
+- * r300_emit.c so that they can be used to setup the default values for
+- * these registers, as well as the actual values used for rendering.
++ * The final trickiness is that, because we change state, we need
++ * to ensure that any stored swtcl primitives are flushed properly
++ * before we start changing state. See the R300_NEWPRIM in r300Clear
++ * for this.
+ */
+- R300_STATECHANGE(r300, vir[0]);
+- reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0);
++ BEGIN_BATCH(31);
++ OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1);
+ if (!has_tcl)
+- e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
++ OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+ ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
+ else
+- e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
++ OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+ ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
+
+- /* disable fog */
+- R300_STATECHANGE(r300, fogs);
+- reg_start(R300_FG_FOG_BLEND, 0);
+- e32(0x0);
+-
+- R300_STATECHANGE(r300, vir[1]);
+- reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0);
+- e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
++ OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
++ OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
++ ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
+ (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
+ (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
+@@ -226,238 +259,278 @@ static void r300EmitClearState(GLcontext * ctx)
+ << R300_SWIZZLE1_SHIFT)));
+
+ /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
+- R300_STATECHANGE(r300, vic);
+- reg_start(R300_VAP_VTX_STATE_CNTL, 1);
+- e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
+- e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
++ OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2);
++ OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
++ OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
+
+- R300_STATECHANGE(r300, vte);
+ /* comes from fglrx startup of clear */
+- reg_start(R300_SE_VTE_CNTL, 1);
+- e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
+- R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
+- R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
+- R300_VPORT_Z_OFFSET_ENA);
+- e32(0x8);
++ OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
++ OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
++ R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
++ R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
++ R300_VPORT_Z_OFFSET_ENA);
++ OUT_BATCH(0x8);
+
+- reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0);
+- e32(0xaaaaaaaa);
++ OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
+
+- R300_STATECHANGE(r300, vof);
+- reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
+- e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
+- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
+- e32(0x0); /* no textures */
++ OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
++ OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
++ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
++ OUT_BATCH(0); /* no textures */
+
+- R300_STATECHANGE(r300, txe);
+- reg_start(R300_TX_ENABLE, 0);
+- e32(0x0);
++ OUT_BATCH_REGVAL(R300_TX_ENABLE, 0);
+
+- R300_STATECHANGE(r300, vpt);
+- reg_start(R300_SE_VPORT_XSCALE, 5);
+- efloat(1.0);
+- efloat(dPriv->x);
+- efloat(1.0);
+- efloat(dPriv->y);
+- efloat(1.0);
+- efloat(0.0);
++ OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6);
++ OUT_BATCH_FLOAT32(1.0);
++ OUT_BATCH_FLOAT32(dPriv->x);
++ OUT_BATCH_FLOAT32(1.0);
++ OUT_BATCH_FLOAT32(dPriv->y);
++ OUT_BATCH_FLOAT32(1.0);
++ OUT_BATCH_FLOAT32(0.0);
+
+- R300_STATECHANGE(r300, at);
+- reg_start(R300_FG_ALPHA_FUNC, 0);
+- e32(0x0);
++ OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
++
++ OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
++ OUT_BATCH(0x0);
++ OUT_BATCH(0x0);
++ END_BATCH();
+
++ R300_STATECHANGE(r300, vir[0]);
++ R300_STATECHANGE(r300, fogs);
++ R300_STATECHANGE(r300, vir[1]);
++ R300_STATECHANGE(r300, vic);
++ R300_STATECHANGE(r300, vte);
++ R300_STATECHANGE(r300, vof);
++ R300_STATECHANGE(r300, txe);
++ R300_STATECHANGE(r300, vpt);
++ R300_STATECHANGE(r300, at);
+ R300_STATECHANGE(r300, bld);
+- reg_start(R300_RB3D_CBLEND, 1);
+- e32(0x0);
+- e32(0x0);
++ R300_STATECHANGE(r300, ps);
+
+ if (has_tcl) {
+- R300_STATECHANGE(r300, vap_clip_cntl);
+- reg_start(R300_VAP_CLIP_CNTL, 0);
+- e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
++ R300_STATECHANGE(r300, vap_clip_cntl);
++
++ BEGIN_BATCH_NO_AUTOSTATE(2);
++ OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
++ END_BATCH();
+ }
+
+- R300_STATECHANGE(r300, ps);
+- reg_start(R300_GA_POINT_SIZE, 0);
+- e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
+- ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
++ BEGIN_BATCH_NO_AUTOSTATE(2);
++ OUT_BATCH_REGVAL(R300_GA_POINT_SIZE,
++ ((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
++ ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
++ END_BATCH();
+
+ if (!is_r500) {
+ R300_STATECHANGE(r300, ri);
+- reg_start(R300_RS_IP_0, 7);
+- for (i = 0; i < 8; ++i) {
+- e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
+- }
+-
+ R300_STATECHANGE(r300, rc);
+- /* The second constant is needed to get glxgears display anything .. */
+- reg_start(R300_RS_COUNT, 1);
+- e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+- e32(0x0);
+-
+ R300_STATECHANGE(r300, rr);
+- reg_start(R300_RS_INST_0, 0);
+- e32(R300_RS_INST_COL_CN_WRITE);
++
++ BEGIN_BATCH(14);
++ OUT_BATCH_REGSEQ(R300_RS_IP_0, 8);
++ for (i = 0; i < 8; ++i)
++ OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
++
++ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
++ OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
++ OUT_BATCH(0x0);
++
++ OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
++ END_BATCH();
+ } else {
+ R300_STATECHANGE(r300, ri);
+- reg_start(R500_RS_IP_0, 7);
++ R300_STATECHANGE(r300, rc);
++ R300_STATECHANGE(r300, rr);
++
++ BEGIN_BATCH(14);
++ OUT_BATCH_REGSEQ(R500_RS_IP_0, 8);
+ for (i = 0; i < 8; ++i) {
+- e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+- (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
++ OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
++ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
++ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
++ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+ }
+
+- R300_STATECHANGE(r300, rc);
+- /* The second constant is needed to get glxgears display anything .. */
+- reg_start(R300_RS_COUNT, 1);
+- e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+- e32(0x0);
+-
+- R300_STATECHANGE(r300, rr);
+- reg_start(R500_RS_INST_0, 0);
+- e32(R500_RS_INST_COL_CN_WRITE);
++ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
++ OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
++ OUT_BATCH(0x0);
+
++ OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
++ END_BATCH();
+ }
+
+ if (!is_r500) {
+ R300_STATECHANGE(r300, fp);
+- reg_start(R300_US_CONFIG, 2);
+- e32(0x0);
+- e32(0x0);
+- e32(0x0);
+- reg_start(R300_US_CODE_ADDR_0, 3);
+- e32(0x0);
+- e32(0x0);
+- e32(0x0);
+- e32(R300_RGBA_OUT);
+-
+ R300_STATECHANGE(r300, fpi[0]);
+ R300_STATECHANGE(r300, fpi[1]);
+ R300_STATECHANGE(r300, fpi[2]);
+ R300_STATECHANGE(r300, fpi[3]);
+
+- reg_start(R300_US_ALU_RGB_INST_0, 0);
+- e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
+-
+- reg_start(R300_US_ALU_RGB_ADDR_0, 0);
+- e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
+-
+- reg_start(R300_US_ALU_ALPHA_INST_0, 0);
+- e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
+-
+- reg_start(R300_US_ALU_ALPHA_ADDR_0, 0);
+- e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
++ BEGIN_BATCH(17);
++ OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
++ OUT_BATCH(0x0);
++ OUT_BATCH(0x0);
++ OUT_BATCH(0x0);
++ OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
++ OUT_BATCH(0x0);
++ OUT_BATCH(0x0);
++ OUT_BATCH(0x0);
++ OUT_BATCH(R300_RGBA_OUT);
++
++ OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0,
++ FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
++ OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0,
++ FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
++ OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0,
++ FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
++ OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0,
++ FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
++ END_BATCH();
+ } else {
+- R300_STATECHANGE(r300, fp);
+- reg_start(R500_US_CONFIG, 1);
+- e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+- e32(0x0);
+- reg_start(R500_US_CODE_ADDR, 2);
+- e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
+- e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
+- e32(R500_US_CODE_OFFSET_ADDR(0));
++ struct radeon_state_atom r500fp;
++ uint32_t _cmd[10];
+
++ R300_STATECHANGE(r300, fp);
+ R300_STATECHANGE(r300, r500fp);
+- r500fp_start_fragment(0, 6);
+-
+- e32(R500_INST_TYPE_OUT |
+- R500_INST_TEX_SEM_WAIT |
+- R500_INST_LAST |
+- R500_INST_RGB_OMASK_R |
+- R500_INST_RGB_OMASK_G |
+- R500_INST_RGB_OMASK_B |
+- R500_INST_ALPHA_OMASK |
+- R500_INST_RGB_CLAMP |
+- R500_INST_ALPHA_CLAMP);
+-
+- e32(R500_RGB_ADDR0(0) |
+- R500_RGB_ADDR1(0) |
+- R500_RGB_ADDR1_CONST |
+- R500_RGB_ADDR2(0) |
+- R500_RGB_ADDR2_CONST);
+-
+- e32(R500_ALPHA_ADDR0(0) |
+- R500_ALPHA_ADDR1(0) |
+- R500_ALPHA_ADDR1_CONST |
+- R500_ALPHA_ADDR2(0) |
+- R500_ALPHA_ADDR2_CONST);
+-
+- e32(R500_ALU_RGB_SEL_A_SRC0 |
+- R500_ALU_RGB_R_SWIZ_A_R |
+- R500_ALU_RGB_G_SWIZ_A_G |
+- R500_ALU_RGB_B_SWIZ_A_B |
+- R500_ALU_RGB_SEL_B_SRC0 |
+- R500_ALU_RGB_R_SWIZ_B_R |
+- R500_ALU_RGB_B_SWIZ_B_G |
+- R500_ALU_RGB_G_SWIZ_B_B);
+-
+- e32(R500_ALPHA_OP_CMP |
+- R500_ALPHA_SWIZ_A_A |
+- R500_ALPHA_SWIZ_B_A);
+-
+- e32(R500_ALU_RGBA_OP_CMP |
+- R500_ALU_RGBA_R_SWIZ_0 |
+- R500_ALU_RGBA_G_SWIZ_0 |
+- R500_ALU_RGBA_B_SWIZ_0 |
+- R500_ALU_RGBA_A_SWIZ_0);
++
++ BEGIN_BATCH(7);
++ OUT_BATCH_REGSEQ(R500_US_CONFIG, 2);
++ OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
++ OUT_BATCH(0x0);
++ OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
++ OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
++ OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
++ OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0));
++ END_BATCH();
++
++ r500fp.check = check_r500fp;
++ r500fp.cmd = _cmd;
++ r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0);
++ r500fp.cmd[1] = R500_INST_TYPE_OUT |
++ R500_INST_TEX_SEM_WAIT |
++ R500_INST_LAST |
++ R500_INST_RGB_OMASK_R |
++ R500_INST_RGB_OMASK_G |
++ R500_INST_RGB_OMASK_B |
++ R500_INST_ALPHA_OMASK |
++ R500_INST_RGB_CLAMP |
++ R500_INST_ALPHA_CLAMP;
++ r500fp.cmd[2] = R500_RGB_ADDR0(0) |
++ R500_RGB_ADDR1(0) |
++ R500_RGB_ADDR1_CONST |
++ R500_RGB_ADDR2(0) |
++ R500_RGB_ADDR2_CONST;
++ r500fp.cmd[3] = R500_ALPHA_ADDR0(0) |
++ R500_ALPHA_ADDR1(0) |
++ R500_ALPHA_ADDR1_CONST |
++ R500_ALPHA_ADDR2(0) |
++ R500_ALPHA_ADDR2_CONST;
++ r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 |
++ R500_ALU_RGB_R_SWIZ_A_R |
++ R500_ALU_RGB_G_SWIZ_A_G |
++ R500_ALU_RGB_B_SWIZ_A_B |
++ R500_ALU_RGB_SEL_B_SRC0 |
++ R500_ALU_RGB_R_SWIZ_B_R |
++ R500_ALU_RGB_B_SWIZ_B_G |
++ R500_ALU_RGB_G_SWIZ_B_B;
++ r500fp.cmd[5] = R500_ALPHA_OP_CMP |
++ R500_ALPHA_SWIZ_A_A |
++ R500_ALPHA_SWIZ_B_A;
++ r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP |
++ R500_ALU_RGBA_R_SWIZ_0 |
++ R500_ALU_RGBA_G_SWIZ_0 |
++ R500_ALU_RGBA_B_SWIZ_0 |
++ R500_ALU_RGBA_A_SWIZ_0;
++
++ r500fp.cmd[7] = 0;
++ emit_r500fp(ctx, &r500fp);
+ }
+
+- reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+- e32(0x00000000);
++ BEGIN_BATCH(2);
++ OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
++ END_BATCH();
++
+ if (has_tcl) {
+- vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
++ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (12 << R300_VF_MAX_VTX_NUM_SHIFT));
+- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+- vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
+- } else
+- vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
++ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
++ vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
++ } else {
++ vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (5 << R300_VF_MAX_VTX_NUM_SHIFT));
++ }
+
+ if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
+- vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
++ vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
+- vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
++ vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
+- vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
++ vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
+ (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
+- vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
++ vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+ else
+- vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
++ vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
++
++ R300_STATECHANGE(r300, vap_cntl);
+
+- R300_STATECHANGE(rmesa, vap_cntl);
+- reg_start(R300_VAP_CNTL, 0);
+- e32(vap_cntl);
++ BEGIN_BATCH(2);
++ OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl);
++ END_BATCH();
+
+ if (has_tcl) {
++ struct radeon_state_atom vpu;
++ uint32_t _cmd[10];
+ R300_STATECHANGE(r300, pvs);
+- reg_start(R300_VAP_PVS_CODE_CNTL_0, 2);
+-
+- e32((0 << R300_PVS_FIRST_INST_SHIFT) |
+- (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+- (1 << R300_PVS_LAST_INST_SHIFT));
+- e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
+- (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
+- e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+-
+ R300_STATECHANGE(r300, vpi);
+- vsf_start_fragment(0x0, 8);
+-
+- e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
+- e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+- e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+- e32(0x0);
+
+- e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
+- e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+- e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+- e32(0x0);
++ BEGIN_BATCH(4);
++ OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
++ OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
++ (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
++ (1 << R300_PVS_LAST_INST_SHIFT));
++ OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
++ (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
++ OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
++ END_BATCH();
++
++ vpu.check = check_vpu;
++ vpu.cmd = _cmd;
++ vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2);
++
++ vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE,
++ 0, 0xf, PVS_DST_REG_OUT);
++ vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
++ PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
++ PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
++ vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0,
++ PVS_SRC_SELECT_FORCE_0,
++ PVS_SRC_SELECT_FORCE_0,
++ PVS_SRC_SELECT_FORCE_0,
++ PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
++ vpu.cmd[4] = 0x0;
++
++ vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf,
++ PVS_DST_REG_OUT);
++ vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X,
++ PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
++ PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT,
++
++ VSF_FLAG_NONE);
++ vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0,
++ PVS_SRC_SELECT_FORCE_0,
++ PVS_SRC_SELECT_FORCE_0,
++ PVS_SRC_SELECT_FORCE_0,
++ PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
++ vpu.cmd[8] = 0x0;
++
++ r300->vap_flush_needed = GL_TRUE;
++ emit_vpu(ctx, &vpu);
+ }
+ }
+
+@@ -468,9 +541,11 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
+ {
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
++ GLframebuffer *fb = dPriv->driverPrivate;
++ struct radeon_renderbuffer *rrb;
++ struct radeon_renderbuffer *rrbd;
+ int flags = 0;
+ int bits = 0;
+- int swapped;
+
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "r300Clear\n");
+@@ -482,6 +557,12 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
+ return;
+ }
+
++ /* Flush swtcl vertices if necessary, because we will change hardware
++ * state during clear. See also the state-related comment in
++ * r300EmitClearState.
++ */
++ R300_NEWPRIM(r300);
++
+ if (mask & BUFFER_BIT_FRONT_LEFT) {
+ flags |= BUFFER_BIT_FRONT_LEFT;
+ mask &= ~BUFFER_BIT_FRONT_LEFT;
+@@ -497,7 +578,7 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
+ mask &= ~BUFFER_BIT_DEPTH;
+ }
+
+- if ((mask & BUFFER_BIT_STENCIL) && r300->state.stencil.hw_stencil) {
++ if ((mask & BUFFER_BIT_STENCIL) && r300->radeon.state.stencil.hwBuffer) {
+ bits |= CLEARBUFFER_STENCIL;
+ mask &= ~BUFFER_BIT_STENCIL;
+ }
+@@ -509,336 +590,33 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
+ _swrast_Clear(ctx, mask);
+ }
+
+- swapped = r300->radeon.sarea->pfCurrentPage == 1;
+-
+ /* Make sure it fits there. */
+- r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__);
++ rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__);
+ if (flags || bits)
+ r300EmitClearState(ctx);
++ rrbd = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
+
+ if (flags & BUFFER_BIT_FRONT_LEFT) {
+- r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped);
++ rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
+ bits = 0;
+ }
+
+ if (flags & BUFFER_BIT_BACK_LEFT) {
+- r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1);
++ rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++ r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
+ bits = 0;
+ }
+
+ if (bits)
+- r300ClearBuffer(r300, bits, 0);
++ r300ClearBuffer(r300, bits, NULL, rrbd);
+
+-}
+-
+-void r300Flush(GLcontext * ctx)
+-{
+- r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- if (rmesa->dma.flush)
+- rmesa->dma.flush( rmesa );
+-
+- if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit)
+- r300FlushCmdBuf(rmesa, __FUNCTION__);
+-}
+-
+-#ifdef USER_BUFFERS
+-#include "r300_mem.h"
+-
+-void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
+-{
+- struct r300_dma_buffer *dmabuf;
+- size = MAX2(size, RADEON_BUFFER_SIZE * 16);
+-
+- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- if (rmesa->dma.flush) {
+- rmesa->dma.flush(rmesa);
+- }
+-
+- if (rmesa->dma.current.buf) {
+-#ifdef USER_BUFFERS
+- r300_mem_use(rmesa, rmesa->dma.current.buf->id);
+-#endif
+- r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
+- }
+- if (rmesa->dma.nr_released_bufs > 4)
+- r300FlushCmdBuf(rmesa, __FUNCTION__);
+-
+- dmabuf = CALLOC_STRUCT(r300_dma_buffer);
+- dmabuf->buf = (void *)1; /* hack */
+- dmabuf->refcount = 1;
+-
+- dmabuf->id = r300_mem_alloc(rmesa, 4, size);
+- if (dmabuf->id == 0) {
+- LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */
+-
+- r300FlushCmdBufLocked(rmesa, __FUNCTION__);
+- radeonWaitForIdleLocked(&rmesa->radeon);
+-
+- dmabuf->id = r300_mem_alloc(rmesa, 4, size);
+-
+- UNLOCK_HARDWARE(&rmesa->radeon);
+-
+- if (dmabuf->id == 0) {
+- fprintf(stderr,
+- "Error: Could not get dma buffer... exiting\n");
+- _mesa_exit(-1);
+- }
+- }
+-
+- rmesa->dma.current.buf = dmabuf;
+- rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id);
+- rmesa->dma.current.end = size;
+- rmesa->dma.current.start = 0;
+- rmesa->dma.current.ptr = 0;
+-}
+-
+-void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+- struct r300_dma_region *region, const char *caller)
+-{
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+-
+- if (!region->buf)
+- return;
+-
+- if (rmesa->dma.flush)
+- rmesa->dma.flush(rmesa);
+-
+- if (--region->buf->refcount == 0) {
+- r300_mem_free(rmesa, region->buf->id);
+- FREE(region->buf);
+- rmesa->dma.nr_released_bufs++;
+- }
+-
+- region->buf = 0;
+- region->start = 0;
+-}
+-
+-/* Allocates a region from rmesa->dma.current. If there isn't enough
+- * space in current, grab a new buffer (and discard what was left of current)
+- */
+-void r300AllocDmaRegion(r300ContextPtr rmesa,
+- struct r300_dma_region *region,
+- int bytes, int alignment)
+-{
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+-
+- if (rmesa->dma.flush)
+- rmesa->dma.flush(rmesa);
+-
+- if (region->buf)
+- r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
+-
+- alignment--;
+- rmesa->dma.current.start = rmesa->dma.current.ptr =
+- (rmesa->dma.current.ptr + alignment) & ~alignment;
+-
+- if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
+- r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7);
+-
+- region->start = rmesa->dma.current.start;
+- region->ptr = rmesa->dma.current.start;
+- region->end = rmesa->dma.current.start + bytes;
+- region->address = rmesa->dma.current.address;
+- region->buf = rmesa->dma.current.buf;
+- region->buf->refcount++;
+-
+- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+- rmesa->dma.current.start =
+- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
+-
+- assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
+-}
+-
+-#else
+-static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
+-{
+- struct r300_dma_buffer *dmabuf;
+- int fd = rmesa->radeon.dri.fd;
+- int index = 0;
+- int size = 0;
+- drmDMAReq dma;
+- int ret;
+-
+- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- if (rmesa->dma.flush) {
+- rmesa->dma.flush(rmesa);
+- }
+-
+- if (rmesa->dma.current.buf)
+- r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
+-
+- if (rmesa->dma.nr_released_bufs > 4)
+- r300FlushCmdBuf(rmesa, __FUNCTION__);
+-
+- dma.context = rmesa->radeon.dri.hwContext;
+- dma.send_count = 0;
+- dma.send_list = NULL;
+- dma.send_sizes = NULL;
+- dma.flags = 0;
+- dma.request_count = 1;
+- dma.request_size = RADEON_BUFFER_SIZE;
+- dma.request_list = &index;
+- dma.request_sizes = &size;
+- dma.granted_count = 0;
+-
+- LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */
+-
+- ret = drmDMA(fd, &dma);
+-
+- if (ret != 0) {
+- /* Try to release some buffers and wait until we can't get any more */
+- if (rmesa->dma.nr_released_bufs) {
+- r300FlushCmdBufLocked(rmesa, __FUNCTION__);
+- }
+-
+- if (RADEON_DEBUG & DEBUG_DMA)
+- fprintf(stderr, "Waiting for buffers\n");
+-
+- radeonWaitForIdleLocked(&rmesa->radeon);
+- ret = drmDMA(fd, &dma);
+-
+- if (ret != 0) {
+- UNLOCK_HARDWARE(&rmesa->radeon);
+- fprintf(stderr,
+- "Error: Could not get dma buffer... exiting\n");
+- _mesa_exit(-1);
+- }
+- }
+-
+- UNLOCK_HARDWARE(&rmesa->radeon);
+-
+- if (RADEON_DEBUG & DEBUG_DMA)
+- fprintf(stderr, "Allocated buffer %d\n", index);
+-
+- dmabuf = CALLOC_STRUCT(r300_dma_buffer);
+- dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index];
+- dmabuf->refcount = 1;
+-
+- rmesa->dma.current.buf = dmabuf;
+- rmesa->dma.current.address = dmabuf->buf->address;
+- rmesa->dma.current.end = dmabuf->buf->total;
+- rmesa->dma.current.start = 0;
+- rmesa->dma.current.ptr = 0;
+-}
+-
+-void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+- struct r300_dma_region *region, const char *caller)
+-{
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+-
+- if (!region->buf)
+- return;
+-
+- if (rmesa->dma.flush)
+- rmesa->dma.flush(rmesa);
+-
+- if (--region->buf->refcount == 0) {
+- drm_radeon_cmd_header_t *cmd;
+-
+- if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+- fprintf(stderr, "%s -- DISCARD BUF %d\n",
+- __FUNCTION__, region->buf->buf->idx);
+- cmd =
+- (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
+- sizeof
+- (*cmd) / 4,
+- __FUNCTION__);
+- cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
+- cmd->dma.buf_idx = region->buf->buf->idx;
+-
+- FREE(region->buf);
+- rmesa->dma.nr_released_bufs++;
+- }
+-
+- region->buf = 0;
+- region->start = 0;
+-}
+-
+-/* Allocates a region from rmesa->dma.current. If there isn't enough
+- * space in current, grab a new buffer (and discard what was left of current)
+- */
+-void r300AllocDmaRegion(r300ContextPtr rmesa,
+- struct r300_dma_region *region,
+- int bytes, int alignment)
+-{
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+-
+- if (rmesa->dma.flush)
+- rmesa->dma.flush(rmesa);
+-
+- if (region->buf)
+- r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
+-
+- alignment--;
+- rmesa->dma.current.start = rmesa->dma.current.ptr =
+- (rmesa->dma.current.ptr + alignment) & ~alignment;
+-
+- if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
+- r300RefillCurrentDmaRegion(rmesa);
+-
+- region->start = rmesa->dma.current.start;
+- region->ptr = rmesa->dma.current.start;
+- region->end = rmesa->dma.current.start + bytes;
+- region->address = rmesa->dma.current.address;
+- region->buf = rmesa->dma.current.buf;
+- region->buf->refcount++;
+-
+- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+- rmesa->dma.current.start =
+- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
+-
+- assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
+-}
+-
+-#endif
+-
+-GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer,
+- GLint size)
+-{
+- int offset =
+- (char *)pointer -
+- (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+- int valid = (size >= 0 && offset >= 0
+- && offset + size <
+- rmesa->radeon.radeonScreen->gartTextures.size);
+-
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer,
+- valid);
+-
+- return valid;
+-}
+-
+-GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer)
+-{
+- int offset =
+- (char *)pointer -
+- (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+-
+- //fprintf(stderr, "offset=%08x\n", offset);
+-
+- if (offset < 0
+- || offset > rmesa->radeon.radeonScreen->gartTextures.size)
+- return ~0;
+- else
+- return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
++ COMMIT_BATCH();
+ }
+
+ void r300InitIoctlFuncs(struct dd_function_table *functions)
+ {
+ functions->Clear = r300Clear;
+ functions->Finish = radeonFinish;
+- functions->Flush = r300Flush;
++ functions->Flush = radeonFlush;
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h
+index e1143fb..3abfa71 100644
+--- a/src/mesa/drivers/dri/r300/r300_ioctl.h
++++ b/src/mesa/drivers/dri/r300/r300_ioctl.h
+@@ -39,22 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_context.h"
+ #include "radeon_drm.h"
+
+-extern GLboolean r300IsGartMemory(r300ContextPtr rmesa,
+- const GLvoid * pointer, GLint size);
+-
+-extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa,
+- const GLvoid * pointer);
+-
+-extern void r300Flush(GLcontext * ctx);
+-
+-extern void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+- struct r300_dma_region *region,
+- const char *caller);
+-extern void r300AllocDmaRegion(r300ContextPtr rmesa,
+- struct r300_dma_region *region, int bytes,
+- int alignment);
+-
+ extern void r300InitIoctlFuncs(struct dd_function_table *functions);
+
+-extern void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size);
+ #endif /* __R300_IOCTL_H__ */
+diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c
+deleted file mode 100644
+index f8f9d4f..0000000
+--- a/src/mesa/drivers/dri/r300/r300_mem.c
++++ /dev/null
+@@ -1,385 +0,0 @@
+-/*
+- * Copyright (C) 2005 Aapo Tahkola.
+- *
+- * All Rights Reserved.
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining
+- * a copy of this software and associated documentation files (the
+- * "Software"), to deal in the Software without restriction, including
+- * without limitation the rights to use, copy, modify, merge, publish,
+- * distribute, sublicense, and/or sell copies of the Software, and to
+- * permit persons to whom the Software is furnished to do so, subject to
+- * the following conditions:
+- *
+- * The above copyright notice and this permission notice (including the
+- * next paragraph) shall be included in all copies or substantial
+- * portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+- *
+- */
+-
+-/**
+- * \file
+- *
+- * \author Aapo Tahkola <aet@rasterburn.org>
+- */
+-
+-#include <unistd.h>
+-
+-#include "r300_context.h"
+-#include "r300_cmdbuf.h"
+-#include "r300_ioctl.h"
+-#include "r300_mem.h"
+-#include "radeon_ioctl.h"
+-
+-#ifdef USER_BUFFERS
+-
+-static void resize_u_list(r300ContextPtr rmesa)
+-{
+- void *temp;
+- int nsize;
+-
+- temp = rmesa->rmm->u_list;
+- nsize = rmesa->rmm->u_size * 2;
+-
+- rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list));
+- _mesa_memset(rmesa->rmm->u_list, 0,
+- nsize * sizeof(*rmesa->rmm->u_list));
+-
+- if (temp) {
+- r300FlushCmdBuf(rmesa, __FUNCTION__);
+-
+- _mesa_memcpy(rmesa->rmm->u_list, temp,
+- rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list));
+- _mesa_free(temp);
+- }
+-
+- rmesa->rmm->u_size = nsize;
+-}
+-
+-void r300_mem_init(r300ContextPtr rmesa)
+-{
+- rmesa->rmm = malloc(sizeof(struct r300_memory_manager));
+- memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager));
+-
+- rmesa->rmm->u_size = 128;
+- resize_u_list(rmesa);
+-}
+-
+-void r300_mem_destroy(r300ContextPtr rmesa)
+-{
+- _mesa_free(rmesa->rmm->u_list);
+- rmesa->rmm->u_list = NULL;
+-
+- _mesa_free(rmesa->rmm);
+- rmesa->rmm = NULL;
+-}
+-
+-void *r300_mem_ptr(r300ContextPtr rmesa, int id)
+-{
+- assert(id <= rmesa->rmm->u_last);
+- return rmesa->rmm->u_list[id].ptr;
+-}
+-
+-int r300_mem_find(r300ContextPtr rmesa, void *ptr)
+-{
+- int i;
+-
+- for (i = 1; i < rmesa->rmm->u_size + 1; i++)
+- if (rmesa->rmm->u_list[i].ptr &&
+- ptr >= rmesa->rmm->u_list[i].ptr &&
+- ptr <
+- rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size)
+- break;
+-
+- if (i < rmesa->rmm->u_size + 1)
+- return i;
+-
+- fprintf(stderr, "%p failed\n", ptr);
+- return 0;
+-}
+-
+-//#define MM_DEBUG
+-int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size)
+-{
+- drm_radeon_mem_alloc_t alloc;
+- int offset = 0, ret;
+- int i, free = -1;
+- int done_age;
+- drm_radeon_mem_free_t memfree;
+- int tries = 0;
+- static int bytes_wasted = 0, allocated = 0;
+-
+- if (size < 4096)
+- bytes_wasted += 4096 - size;
+-
+- allocated += size;
+-
+-#if 0
+- static int t = 0;
+- if (t != time(NULL)) {
+- t = time(NULL);
+- fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n",
+- rmesa->rmm->u_last, bytes_wasted / 1024,
+- allocated / 1024);
+- }
+-#endif
+-
+- memfree.region = RADEON_MEM_REGION_GART;
+-
+- again:
+-
+- done_age = radeonGetAge((radeonContextPtr) rmesa);
+-
+- if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size)
+- resize_u_list(rmesa);
+-
+- for (i = rmesa->rmm->u_last + 1; i > 0; i--) {
+- if (rmesa->rmm->u_list[i].ptr == NULL) {
+- free = i;
+- continue;
+- }
+-
+- if (rmesa->rmm->u_list[i].h_pending == 0 &&
+- rmesa->rmm->u_list[i].pending
+- && rmesa->rmm->u_list[i].age <= done_age) {
+- memfree.region_offset =
+- (char *)rmesa->rmm->u_list[i].ptr -
+- (char *)rmesa->radeon.radeonScreen->gartTextures.
+- map;
+-
+- ret =
+- drmCommandWrite(rmesa->radeon.radeonScreen->
+- driScreen->fd, DRM_RADEON_FREE,
+- &memfree, sizeof(memfree));
+-
+- if (ret) {
+- fprintf(stderr, "Failed to free at %p\n",
+- rmesa->rmm->u_list[i].ptr);
+- fprintf(stderr, "ret = %s\n", strerror(-ret));
+- exit(1);
+- } else {
+-#ifdef MM_DEBUG
+- fprintf(stderr, "really freed %d at age %x\n",
+- i,
+- radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+- if (i == rmesa->rmm->u_last)
+- rmesa->rmm->u_last--;
+-
+- if (rmesa->rmm->u_list[i].size < 4096)
+- bytes_wasted -=
+- 4096 - rmesa->rmm->u_list[i].size;
+-
+- allocated -= rmesa->rmm->u_list[i].size;
+- rmesa->rmm->u_list[i].pending = 0;
+- rmesa->rmm->u_list[i].ptr = NULL;
+- free = i;
+- }
+- }
+- }
+- rmesa->rmm->u_head = i;
+-
+- if (free == -1) {
+- WARN_ONCE("Ran out of slots!\n");
+- //usleep(100);
+- r300FlushCmdBuf(rmesa, __FUNCTION__);
+- tries++;
+- if (tries > 100) {
+- WARN_ONCE("Ran out of slots!\n");
+- exit(1);
+- }
+- goto again;
+- }
+-
+- alloc.region = RADEON_MEM_REGION_GART;
+- alloc.alignment = alignment;
+- alloc.size = size;
+- alloc.region_offset = &offset;
+-
+- ret =
+- drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc,
+- sizeof(alloc));
+- if (ret) {
+-#if 0
+- WARN_ONCE("Ran out of mem!\n");
+- r300FlushCmdBuf(rmesa, __FUNCTION__);
+- //usleep(100);
+- tries2++;
+- tries = 0;
+- if (tries2 > 100) {
+- WARN_ONCE("Ran out of GART memory!\n");
+- exit(1);
+- }
+- goto again;
+-#else
+- WARN_ONCE
+- ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n",
+- size);
+- return 0;
+-#endif
+- }
+-
+- i = free;
+-
+- if (i > rmesa->rmm->u_last)
+- rmesa->rmm->u_last = i;
+-
+- rmesa->rmm->u_list[i].ptr =
+- ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset;
+- rmesa->rmm->u_list[i].size = size;
+- rmesa->rmm->u_list[i].age = 0;
+- //fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i);
+-
+-#ifdef MM_DEBUG
+- fprintf(stderr, "allocated %d at age %x\n", i,
+- radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-
+- return i;
+-}
+-
+-void r300_mem_use(r300ContextPtr rmesa, int id)
+-{
+- uint64_t ull;
+-#ifdef MM_DEBUG
+- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+- radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+- drm_r300_cmd_header_t *cmd;
+-
+- assert(id <= rmesa->rmm->u_last);
+-
+- if (id == 0)
+- return;
+-
+- cmd =
+- (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa,
+- 2 + sizeof(ull) / 4,
+- __FUNCTION__);
+- cmd[0].scratch.cmd_type = R300_CMD_SCRATCH;
+- cmd[0].scratch.reg = R300_MEM_SCRATCH;
+- cmd[0].scratch.n_bufs = 1;
+- cmd[0].scratch.flags = 0;
+- cmd++;
+-
+- ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age;
+- _mesa_memcpy(cmd, &ull, sizeof(ull));
+- cmd += sizeof(ull) / 4;
+-
+- cmd[0].u = /*id */ 0;
+-
+- LOCK_HARDWARE(&rmesa->radeon); /* Protect from DRM. */
+- rmesa->rmm->u_list[id].h_pending++;
+- UNLOCK_HARDWARE(&rmesa->radeon);
+-}
+-
+-unsigned long r300_mem_offset(r300ContextPtr rmesa, int id)
+-{
+- unsigned long offset;
+-
+- assert(id <= rmesa->rmm->u_last);
+-
+- offset = (char *)rmesa->rmm->u_list[id].ptr -
+- (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+- offset += rmesa->radeon.radeonScreen->gart_texture_offset;
+-
+- return offset;
+-}
+-
+-void *r300_mem_map(r300ContextPtr rmesa, int id, int access)
+-{
+-#ifdef MM_DEBUG
+- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+- radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+- void *ptr;
+- int tries = 0;
+-
+- assert(id <= rmesa->rmm->u_last);
+-
+- if (access == R300_MEM_R) {
+-
+- if (rmesa->rmm->u_list[id].mapped == 1)
+- WARN_ONCE("buffer %d already mapped\n", id);
+-
+- rmesa->rmm->u_list[id].mapped = 1;
+- ptr = r300_mem_ptr(rmesa, id);
+-
+- return ptr;
+- }
+-
+- if (rmesa->rmm->u_list[id].h_pending)
+- r300FlushCmdBuf(rmesa, __FUNCTION__);
+-
+- if (rmesa->rmm->u_list[id].h_pending) {
+- return NULL;
+- }
+-
+- while (rmesa->rmm->u_list[id].age >
+- radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000)
+- usleep(10);
+-
+- if (tries >= 1000) {
+- fprintf(stderr, "Idling failed (%x vs %x)\n",
+- rmesa->rmm->u_list[id].age,
+- radeonGetAge((radeonContextPtr) rmesa));
+- return NULL;
+- }
+-
+- if (rmesa->rmm->u_list[id].mapped == 1)
+- WARN_ONCE("buffer %d already mapped\n", id);
+-
+- rmesa->rmm->u_list[id].mapped = 1;
+- ptr = r300_mem_ptr(rmesa, id);
+-
+- return ptr;
+-}
+-
+-void r300_mem_unmap(r300ContextPtr rmesa, int id)
+-{
+-#ifdef MM_DEBUG
+- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+- radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-
+- assert(id <= rmesa->rmm->u_last);
+-
+- if (rmesa->rmm->u_list[id].mapped == 0)
+- WARN_ONCE("buffer %d not mapped\n", id);
+-
+- rmesa->rmm->u_list[id].mapped = 0;
+-}
+-
+-void r300_mem_free(r300ContextPtr rmesa, int id)
+-{
+-#ifdef MM_DEBUG
+- fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+- radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-
+- assert(id <= rmesa->rmm->u_last);
+-
+- if (id == 0)
+- return;
+-
+- if (rmesa->rmm->u_list[id].ptr == NULL) {
+- WARN_ONCE("Not allocated!\n");
+- return;
+- }
+-
+- if (rmesa->rmm->u_list[id].pending) {
+- WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr);
+- return;
+- }
+-
+- rmesa->rmm->u_list[id].pending = 1;
+-}
+-#endif
+diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h
+deleted file mode 100644
+index 625a7f6..0000000
+--- a/src/mesa/drivers/dri/r300/r300_mem.h
++++ /dev/null
+@@ -1,37 +0,0 @@
+-#ifndef __R300_MEM_H__
+-#define __R300_MEM_H__
+-
+-//#define R300_MEM_PDL 0
+-#define R300_MEM_UL 1
+-
+-#define R300_MEM_R 1
+-#define R300_MEM_W 2
+-#define R300_MEM_RW (R300_MEM_R | R300_MEM_W)
+-
+-#define R300_MEM_SCRATCH 2
+-
+-struct r300_memory_manager {
+- struct {
+- void *ptr;
+- uint32_t size;
+- uint32_t age;
+- uint32_t h_pending;
+- int pending;
+- int mapped;
+- } *u_list;
+- int u_head, u_size, u_last;
+-
+-};
+-
+-extern void r300_mem_init(r300ContextPtr rmesa);
+-extern void r300_mem_destroy(r300ContextPtr rmesa);
+-extern void *r300_mem_ptr(r300ContextPtr rmesa, int id);
+-extern int r300_mem_find(r300ContextPtr rmesa, void *ptr);
+-extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size);
+-extern void r300_mem_use(r300ContextPtr rmesa, int id);
+-extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id);
+-extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access);
+-extern void r300_mem_unmap(r300ContextPtr rmesa, int id);
+-extern void r300_mem_free(r300ContextPtr rmesa, int id);
+-
+-#endif
+diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
+index 7c6485e..5f344be 100644
+--- a/src/mesa/drivers/dri/r300/r300_reg.h
++++ b/src/mesa/drivers/dri/r300/r300_reg.h
+@@ -656,7 +656,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ # define R300_GB_FOG_SELECT_C3A (3 << 0)
+ # define R300_GB_FOG_SELECT_1_1_W (4 << 0)
+ # define R300_GB_FOG_SELECT_Z (5 << 0)
+-# define R300_GB_DEPTH_SELECT_Z (0 << 3
++# define R300_GB_DEPTH_SELECT_Z (0 << 3)
+ # define R300_GB_DEPTH_SELECT_1_1_W (1 << 3)
+ # define R300_GB_W_SELECT_1_W (0 << 4)
+ # define R300_GB_W_SELECT_1 (1 << 4)
+@@ -730,8 +730,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define R500_RS_IP_TEX_PTR_Q_SHIFT 18
+ #define R500_RS_IP_COL_PTR_SHIFT 24
+ #define R500_RS_IP_COL_FMT_SHIFT 27
+-# define R500_RS_COL_PTR(x) (x << 24)
+-# define R500_RS_COL_FMT(x) (x << 27)
++# define R500_RS_COL_PTR(x) ((x) << 24)
++# define R500_RS_COL_FMT(x) ((x) << 27)
+ /* gap */
+ #define R500_RS_IP_OFFSET_DIS (0 << 31)
+ #define R500_RS_IP_OFFSET_EN (1 << 31)
+@@ -1172,9 +1172,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define R300_RS_IP_3 0x431C
+ # define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */
+ # define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */
+-# define R300_RS_TEX_PTR(x) (x << 0)
+-# define R300_RS_COL_PTR(x) (x << 6)
+-# define R300_RS_COL_FMT(x) (x << 9)
++# define R300_RS_TEX_PTR(x) ((x) << 0)
++# define R300_RS_COL_PTR(x) ((x) << 6)
++# define R300_RS_COL_FMT(x) ((x) << 9)
+ # define R300_RS_COL_FMT_RGBA 0
+ # define R300_RS_COL_FMT_RGB0 1
+ # define R300_RS_COL_FMT_RGB1 2
+@@ -1184,10 +1184,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ # define R300_RS_COL_FMT_111A 8
+ # define R300_RS_COL_FMT_1110 9
+ # define R300_RS_COL_FMT_1111 10
+-# define R300_RS_SEL_S(x) (x << 13)
+-# define R300_RS_SEL_T(x) (x << 16)
+-# define R300_RS_SEL_R(x) (x << 19)
+-# define R300_RS_SEL_Q(x) (x << 22)
++# define R300_RS_SEL_S(x) ((x) << 13)
++# define R300_RS_SEL_T(x) ((x) << 16)
++# define R300_RS_SEL_R(x) ((x) << 19)
++# define R300_RS_SEL_Q(x) ((x) << 22)
+ # define R300_RS_SEL_C0 0
+ # define R300_RS_SEL_C1 1
+ # define R300_RS_SEL_C2 2
+@@ -1525,6 +1525,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ # define R500_SEL_FILTER4_TC3 (3 << 18)
+
+ #define R300_TX_OFFSET_0 0x4540
++#define R300_TX_OFFSET_1 0x4544
++#define R300_TX_OFFSET_2 0x4548
++#define R300_TX_OFFSET_3 0x454C
++#define R300_TX_OFFSET_4 0x4550
++#define R300_TX_OFFSET_5 0x4554
++#define R300_TX_OFFSET_6 0x4558
++#define R300_TX_OFFSET_7 0x455C
+ /* BEGIN: Guess from R200 */
+ # define R300_TXO_ENDIAN_NO_SWAP (0 << 0)
+ # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0)
+@@ -2705,7 +2712,7 @@ enum {
+ # define R500_ALPHA_OP_COS 13
+ # define R500_ALPHA_OP_MDH 14
+ # define R500_ALPHA_OP_MDV 15
+-# define R500_ALPHA_ADDRD(x) (x << 4)
++# define R500_ALPHA_ADDRD(x) ((x) << 4)
+ # define R500_ALPHA_ADDRD_REL (1 << 11)
+ # define R500_ALPHA_SEL_A_SHIFT 12
+ # define R500_ALPHA_SEL_A_SRC0 (0 << 12)
+@@ -2749,16 +2756,16 @@ enum {
+ # define R500_ALPHA_OMOD_DIV_4 (5 << 26)
+ # define R500_ALPHA_OMOD_DIV_8 (6 << 26)
+ # define R500_ALPHA_OMOD_DISABLE (7 << 26)
+-# define R500_ALPHA_TARGET(x) (x << 29)
++# define R500_ALPHA_TARGET(x) ((x) << 29)
+ # define R500_ALPHA_W_OMASK (1 << 31)
+ #define R500_US_ALU_ALPHA_ADDR_0 0x9800
+-# define R500_ALPHA_ADDR0(x) (x << 0)
++# define R500_ALPHA_ADDR0(x) ((x) << 0)
+ # define R500_ALPHA_ADDR0_CONST (1 << 8)
+ # define R500_ALPHA_ADDR0_REL (1 << 9)
+-# define R500_ALPHA_ADDR1(x) (x << 10)
++# define R500_ALPHA_ADDR1(x) ((x) << 10)
+ # define R500_ALPHA_ADDR1_CONST (1 << 18)
+ # define R500_ALPHA_ADDR1_REL (1 << 19)
+-# define R500_ALPHA_ADDR2(x) (x << 20)
++# define R500_ALPHA_ADDR2(x) ((x) << 20)
+ # define R500_ALPHA_ADDR2_CONST (1 << 28)
+ # define R500_ALPHA_ADDR2_REL (1 << 29)
+ # define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30)
+@@ -2779,7 +2786,7 @@ enum {
+ # define R500_ALU_RGBA_OP_SOP (10 << 0)
+ # define R500_ALU_RGBA_OP_MDH (11 << 0)
+ # define R500_ALU_RGBA_OP_MDV (12 << 0)
+-# define R500_ALU_RGBA_ADDRD(x) (x << 4)
++# define R500_ALU_RGBA_ADDRD(x) ((x) << 4)
+ # define R500_ALU_RGBA_ADDRD_REL (1 << 11)
+ # define R500_ALU_RGBA_SEL_C_SHIFT 12
+ # define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12)
+@@ -2906,16 +2913,16 @@ enum {
+ # define R500_ALU_RGB_OMOD_DIV_4 (5 << 26)
+ # define R500_ALU_RGB_OMOD_DIV_8 (6 << 26)
+ # define R500_ALU_RGB_OMOD_DISABLE (7 << 26)
+-# define R500_ALU_RGB_TARGET(x) (x << 29)
++# define R500_ALU_RGB_TARGET(x) ((x) << 29)
+ # define R500_ALU_RGB_WMASK (1 << 31)
+ #define R500_US_ALU_RGB_ADDR_0 0x9000
+-# define R500_RGB_ADDR0(x) (x << 0)
++# define R500_RGB_ADDR0(x) ((x) << 0)
+ # define R500_RGB_ADDR0_CONST (1 << 8)
+ # define R500_RGB_ADDR0_REL (1 << 9)
+-# define R500_RGB_ADDR1(x) (x << 10)
++# define R500_RGB_ADDR1(x) ((x) << 10)
+ # define R500_RGB_ADDR1_CONST (1 << 18)
+ # define R500_RGB_ADDR1_REL (1 << 19)
+-# define R500_RGB_ADDR2(x) (x << 20)
++# define R500_RGB_ADDR2(x) ((x) << 20)
+ # define R500_RGB_ADDR2_CONST (1 << 28)
+ # define R500_RGB_ADDR2_REL (1 << 29)
+ # define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30)
+@@ -2970,19 +2977,19 @@ enum {
+
+ /* note that these are 8 bit lengths, despite the offsets, at least for R500 */
+ #define R500_US_CODE_ADDR 0x4630
+-# define R500_US_CODE_START_ADDR(x) (x << 0)
+-# define R500_US_CODE_END_ADDR(x) (x << 16)
++# define R500_US_CODE_START_ADDR(x) ((x) << 0)
++# define R500_US_CODE_END_ADDR(x) ((x) << 16)
+ #define R500_US_CODE_OFFSET 0x4638
+-# define R500_US_CODE_OFFSET_ADDR(x) (x << 0)
++# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0)
+ #define R500_US_CODE_RANGE 0x4634
+-# define R500_US_CODE_RANGE_ADDR(x) (x << 0)
+-# define R500_US_CODE_RANGE_SIZE(x) (x << 16)
++# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0)
++# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16)
+ #define R500_US_CONFIG 0x4600
+ # define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1)
+ #define R500_US_FC_ADDR_0 0xa000
+-# define R500_FC_BOOL_ADDR(x) (x << 0)
+-# define R500_FC_INT_ADDR(x) (x << 8)
+-# define R500_FC_JUMP_ADDR(x) (x << 16)
++# define R500_FC_BOOL_ADDR(x) ((x) << 0)
++# define R500_FC_INT_ADDR(x) ((x) << 8)
++# define R500_FC_JUMP_ADDR(x) ((x) << 16)
+ # define R500_FC_JUMP_GLOBAL (1 << 31)
+ #define R500_US_FC_BOOL_CONST 0x4620
+ # define R500_FC_KBOOL(x) (x)
+@@ -3003,8 +3010,8 @@ enum {
+ # define R500_FC_A_OP_NONE (0 << 6)
+ # define R500_FC_A_OP_POP (1 << 6)
+ # define R500_FC_A_OP_PUSH (2 << 6)
+-# define R500_FC_JUMP_FUNC(x) (x << 8)
+-# define R500_FC_B_POP_CNT(x) (x << 16)
++# define R500_FC_JUMP_FUNC(x) ((x) << 8)
++# define R500_FC_B_POP_CNT(x) ((x) << 16)
+ # define R500_FC_B_OP0_NONE (0 << 24)
+ # define R500_FC_B_OP0_DECR (1 << 24)
+ # define R500_FC_B_OP0_INCR (2 << 24)
+@@ -3013,14 +3020,14 @@ enum {
+ # define R500_FC_B_OP1_INCR (2 << 26)
+ # define R500_FC_IGNORE_UNCOVERED (1 << 28)
+ #define R500_US_FC_INT_CONST_0 0x4c00
+-# define R500_FC_INT_CONST_KR(x) (x << 0)
+-# define R500_FC_INT_CONST_KG(x) (x << 8)
+-# define R500_FC_INT_CONST_KB(x) (x << 16)
++# define R500_FC_INT_CONST_KR(x) ((x) << 0)
++# define R500_FC_INT_CONST_KG(x) ((x) << 8)
++# define R500_FC_INT_CONST_KB(x) ((x) << 16)
+ /* _0 through _15 */
+ #define R500_US_FORMAT0_0 0x4640
+-# define R500_FORMAT_TXWIDTH(x) (x << 0)
+-# define R500_FORMAT_TXHEIGHT(x) (x << 11)
+-# define R500_FORMAT_TXDEPTH(x) (x << 22)
++# define R500_FORMAT_TXWIDTH(x) ((x) << 0)
++# define R500_FORMAT_TXHEIGHT(x) ((x) << 11)
++# define R500_FORMAT_TXDEPTH(x) ((x) << 22)
+ /* _0 through _3 */
+ #define R500_US_OUT_FMT_0 0x46a4
+ # define R500_OUT_FMT_C4_8 (0 << 0)
+@@ -3061,12 +3068,12 @@ enum {
+ # define R500_C3_SEL_R (1 << 14)
+ # define R500_C3_SEL_G (2 << 14)
+ # define R500_C3_SEL_B (3 << 14)
+-# define R500_OUT_SIGN(x) (x << 16)
++# define R500_OUT_SIGN(x) ((x) << 16)
+ # define R500_ROUND_ADJ (1 << 20)
+ #define R500_US_PIXSIZE 0x4604
+ # define R500_PIX_SIZE(x) (x)
+ #define R500_US_TEX_ADDR_0 0x9800
+-# define R500_TEX_SRC_ADDR(x) (x << 0)
++# define R500_TEX_SRC_ADDR(x) ((x) << 0)
+ # define R500_TEX_SRC_ADDR_REL (1 << 7)
+ # define R500_TEX_SRC_S_SWIZ_R (0 << 8)
+ # define R500_TEX_SRC_S_SWIZ_G (1 << 8)
+@@ -3084,7 +3091,7 @@ enum {
+ # define R500_TEX_SRC_Q_SWIZ_G (1 << 14)
+ # define R500_TEX_SRC_Q_SWIZ_B (2 << 14)
+ # define R500_TEX_SRC_Q_SWIZ_A (3 << 14)
+-# define R500_TEX_DST_ADDR(x) (x << 16)
++# define R500_TEX_DST_ADDR(x) ((x) << 16)
+ # define R500_TEX_DST_ADDR_REL (1 << 23)
+ # define R500_TEX_DST_R_SWIZ_R (0 << 24)
+ # define R500_TEX_DST_R_SWIZ_G (1 << 24)
+@@ -3103,7 +3110,7 @@ enum {
+ # define R500_TEX_DST_A_SWIZ_B (2 << 30)
+ # define R500_TEX_DST_A_SWIZ_A (3 << 30)
+ #define R500_US_TEX_ADDR_DXDY_0 0xa000
+-# define R500_DX_ADDR(x) (x << 0)
++# define R500_DX_ADDR(x) ((x) << 0)
+ # define R500_DX_ADDR_REL (1 << 7)
+ # define R500_DX_S_SWIZ_R (0 << 8)
+ # define R500_DX_S_SWIZ_G (1 << 8)
+@@ -3121,7 +3128,7 @@ enum {
+ # define R500_DX_Q_SWIZ_G (1 << 14)
+ # define R500_DX_Q_SWIZ_B (2 << 14)
+ # define R500_DX_Q_SWIZ_A (3 << 14)
+-# define R500_DY_ADDR(x) (x << 16)
++# define R500_DY_ADDR(x) ((x) << 16)
+ # define R500_DY_ADDR_REL (1 << 17)
+ # define R500_DY_S_SWIZ_R (0 << 24)
+ # define R500_DY_S_SWIZ_G (1 << 24)
+@@ -3140,7 +3147,7 @@ enum {
+ # define R500_DY_Q_SWIZ_B (2 << 30)
+ # define R500_DY_Q_SWIZ_A (3 << 30)
+ #define R500_US_TEX_INST_0 0x9000
+-# define R500_TEX_ID(x) (x << 16)
++# define R500_TEX_ID(x) ((x) << 16)
+ # define R500_TEX_INST_NOP (0 << 22)
+ # define R500_TEX_INST_LD (1 << 22)
+ # define R500_TEX_INST_TEXKILL (2 << 22)
+diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
+index f9266e4..f46477f 100644
+--- a/src/mesa/drivers/dri/r300/r300_render.c
++++ b/src/mesa/drivers/dri/r300/r300_render.c
+@@ -66,8 +66,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "tnl/t_vp_build.h"
+ #include "radeon_reg.h"
+ #include "radeon_macros.h"
+-#include "radeon_ioctl.h"
+-#include "radeon_state.h"
+ #include "r300_context.h"
+ #include "r300_ioctl.h"
+ #include "r300_state.h"
+@@ -175,85 +173,164 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
+ static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
+ {
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- struct r300_dma_region *rvb = &rmesa->state.elt_dma;
+ void *out;
+
+- if (r300IsGartMemory(rmesa, elts, n_elts * 4)) {
+- rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
+- rvb->start = ((char *)elts) - rvb->address;
+- rvb->aos_offset =
+- rmesa->radeon.radeonScreen->gart_texture_offset +
+- rvb->start;
+- return;
+- } else if (r300IsGartMemory(rmesa, elts, 1)) {
+- WARN_ONCE("Pointer not within GART memory!\n");
+- _mesa_exit(-1);
+- }
+-
+- r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4);
+- rvb->aos_offset = GET_START(rvb);
+-
+- out = rvb->address + rvb->start;
++ radeonAllocDmaRegion(&rmesa->radeon, &rmesa->state.elt_dma_bo,
++ &rmesa->state.elt_dma_offset, n_elts * 4, 4);
++ radeon_bo_map(rmesa->state.elt_dma_bo, 1);
++ out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
+ memcpy(out, elts, n_elts * 4);
++ radeon_bo_unmap(rmesa->state.elt_dma_bo);
+ }
+
+-static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
+- int vertex_count, int type)
++static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
+ {
+- int cmd_reserved = 0;
+- int cmd_written = 0;
+- drm_radeon_cmd_header_t *cmd = NULL;
+-
+- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0);
+- e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+-
+- start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2);
+- e32(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+- (R300_VAP_PORT_IDX0 >> 2));
+- e32(addr);
+- e32(vertex_count);
++ BATCH_LOCALS(&rmesa->radeon);
++
++ if (vertex_count > 0) {
++ BEGIN_BATCH(10);
++ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
++ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
++ ((vertex_count + 0) << 16) |
++ type |
++ R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
++
++ if (!rmesa->radeon.radeonScreen->kernel_mm) {
++ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
++ OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
++ (R300_VAP_PORT_IDX0 >> 2));
++ OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
++ rmesa->state.elt_dma_bo,
++ rmesa->state.elt_dma_offset,
++ RADEON_GEM_DOMAIN_GTT, 0, 0);
++ OUT_BATCH(vertex_count);
++ } else {
++ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
++ OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
++ (R300_VAP_PORT_IDX0 >> 2));
++ OUT_BATCH(rmesa->state.elt_dma_offset);
++ OUT_BATCH(vertex_count);
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->state.elt_dma_bo,
++ RADEON_GEM_DOMAIN_GTT, 0, 0);
++ }
++ END_BATCH();
++ }
+ }
+
+ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
+ {
++ BATCH_LOCALS(&rmesa->radeon);
++ uint32_t voffset;
+ int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+ int i;
+- int cmd_reserved = 0;
+- int cmd_written = 0;
+- drm_radeon_cmd_header_t *cmd = NULL;
+-
++
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
+ offset);
+
+- start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1);
+- e32(nr);
+-
+- for (i = 0; i + 1 < nr; i += 2) {
+- e32((rmesa->state.aos[i].aos_size << 0) |
+- (rmesa->state.aos[i].aos_stride << 8) |
+- (rmesa->state.aos[i + 1].aos_size << 16) |
+- (rmesa->state.aos[i + 1].aos_stride << 24));
++
++ if (!rmesa->radeon.radeonScreen->kernel_mm) {
++ BEGIN_BATCH(sz+2+(nr * 2));
++ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
++ OUT_BATCH(nr);
++
++ for (i = 0; i + 1 < nr; i += 2) {
++ OUT_BATCH((rmesa->state.aos[i].components << 0) |
++ (rmesa->state.aos[i].stride << 8) |
++ (rmesa->state.aos[i + 1].components << 16) |
++ (rmesa->state.aos[i + 1].stride << 24));
++
++ voffset = rmesa->state.aos[i + 0].offset +
++ offset * 4 * rmesa->state.aos[i + 0].stride;
++ OUT_BATCH_RELOC(voffset,
++ rmesa->state.aos[i].bo,
++ voffset,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ voffset = rmesa->state.aos[i + 1].offset +
++ offset * 4 * rmesa->state.aos[i + 1].stride;
++ OUT_BATCH_RELOC(voffset,
++ rmesa->state.aos[i+1].bo,
++ voffset,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ }
++
++ if (nr & 1) {
++ OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
++ (rmesa->state.aos[nr - 1].stride << 8));
++ voffset = rmesa->state.aos[nr - 1].offset +
++ offset * 4 * rmesa->state.aos[nr - 1].stride;
++ OUT_BATCH_RELOC(voffset,
++ rmesa->state.aos[nr - 1].bo,
++ voffset,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ }
++ END_BATCH();
++ } else {
+
+- e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride);
+- e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride);
++ BEGIN_BATCH(sz+2+(nr * 2));
++ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
++ OUT_BATCH(nr);
++
++ for (i = 0; i + 1 < nr; i += 2) {
++ OUT_BATCH((rmesa->state.aos[i].components << 0) |
++ (rmesa->state.aos[i].stride << 8) |
++ (rmesa->state.aos[i + 1].components << 16) |
++ (rmesa->state.aos[i + 1].stride << 24));
++
++ voffset = rmesa->state.aos[i + 0].offset +
++ offset * 4 * rmesa->state.aos[i + 0].stride;
++ OUT_BATCH(voffset);
++ voffset = rmesa->state.aos[i + 1].offset +
++ offset * 4 * rmesa->state.aos[i + 1].stride;
++ OUT_BATCH(voffset);
++ }
++
++ if (nr & 1) {
++ OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
++ (rmesa->state.aos[nr - 1].stride << 8));
++ voffset = rmesa->state.aos[nr - 1].offset +
++ offset * 4 * rmesa->state.aos[nr - 1].stride;
++ OUT_BATCH(voffset);
++ }
++ for (i = 0; i + 1 < nr; i += 2) {
++ voffset = rmesa->state.aos[i + 0].offset +
++ offset * 4 * rmesa->state.aos[i + 0].stride;
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->state.aos[i+0].bo,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ voffset = rmesa->state.aos[i + 1].offset +
++ offset * 4 * rmesa->state.aos[i + 1].stride;
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->state.aos[i+1].bo,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ }
++ if (nr & 1) {
++ voffset = rmesa->state.aos[nr - 1].offset +
++ offset * 4 * rmesa->state.aos[nr - 1].stride;
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->state.aos[nr-1].bo,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ }
++ END_BATCH();
+ }
+
+- if (nr & 1) {
+- e32((rmesa->state.aos[nr - 1].aos_size << 0) |
+- (rmesa->state.aos[nr - 1].aos_stride << 8));
+- e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
+- }
+ }
+
+ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
+ {
+- int cmd_reserved = 0;
+- int cmd_written = 0;
+- drm_radeon_cmd_header_t *cmd = NULL;
++ BATCH_LOCALS(&rmesa->radeon);
+
+- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
+- e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
++ BEGIN_BATCH(3);
++ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
++ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
++ END_BATCH();
+ }
+
+ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
+@@ -269,6 +346,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
+ if (type < 0 || num_verts <= 0)
+ return;
+
++ /* Make space for at least 64 dwords.
++ * This is supposed to ensure that we can get all rendering
++ * commands into a single command buffer.
++ */
++ rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
++
+ if (vb->Elts) {
+ if (num_verts > 65535) {
+ /* not implemented yet */
+@@ -288,11 +371,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
+ */
+ r300EmitElts(ctx, vb->Elts, num_verts);
+ r300EmitAOS(rmesa, rmesa->state.aos_count, start);
+- r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type);
++ r300FireEB(rmesa, num_verts, type);
+ } else {
+ r300EmitAOS(rmesa, rmesa->state.aos_count, start);
+ r300FireAOS(rmesa, num_verts, type);
+ }
++ COMMIT_BATCH();
+ }
+
+ static GLboolean r300RunRender(GLcontext * ctx,
+@@ -303,7 +387,6 @@ static GLboolean r300RunRender(GLcontext * ctx,
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *vb = &tnl->vb;
+
+-
+ if (RADEON_DEBUG & DEBUG_PRIMS)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+@@ -314,7 +397,7 @@ static GLboolean r300RunRender(GLcontext * ctx,
+ r300UpdateShaderStates(rmesa);
+
+ r300EmitCacheFlush(rmesa);
+- r300EmitState(rmesa);
++ radeonEmitState(&rmesa->radeon);
+
+ for (i = 0; i < vb->PrimitiveCount; i++) {
+ GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
+@@ -325,10 +408,6 @@ static GLboolean r300RunRender(GLcontext * ctx,
+
+ r300EmitCacheFlush(rmesa);
+
+-#ifdef USER_BUFFERS
+- r300UseArrays(ctx);
+-#endif
+-
+ r300ReleaseArrays(ctx);
+
+ return GL_FALSE;
+@@ -347,6 +426,8 @@ static GLboolean r300RunRender(GLcontext * ctx,
+ static int r300Fallback(GLcontext * ctx)
+ {
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
++ const unsigned back = ctx->Stencil._BackFace;
++
+ /* Do we need to use new-style shaders?
+ * Also is there a better way to do this? */
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+@@ -371,12 +452,14 @@ static int r300Fallback(GLcontext * ctx)
+
+ FALLBACK_IF(ctx->RenderMode != GL_RENDER);
+
+- FALLBACK_IF(ctx->Stencil._TestTwoSide
+- && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
+- || ctx->Stencil.ValueMask[0] !=
+- ctx->Stencil.ValueMask[1]
+- || ctx->Stencil.WriteMask[0] !=
+- ctx->Stencil.WriteMask[1]));
++ /* If GL_EXT_stencil_two_side is disabled, this fallback check can
++ * be removed.
++ */
++ FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
++ || ctx->Stencil.ValueMask[0] !=
++ ctx->Stencil.ValueMask[back]
++ || ctx->Stencil.WriteMask[0] !=
++ ctx->Stencil.WriteMask[back]);
+
+ if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
+ FALLBACK_IF(ctx->Point.PointSprite);
+@@ -428,6 +511,9 @@ static GLboolean r300RunTCLRender(GLcontext * ctx,
+ return GL_TRUE;
+ }
+
++ if (!r300ValidateBuffers(ctx))
++ return GL_TRUE;
++
+ r300UpdateShaders(rmesa);
+
+ vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
+diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
+index a63dbac..59728a0 100644
+--- a/src/mesa/drivers/dri/r300/r300_state.c
++++ b/src/mesa/drivers/dri/r300/r300_state.c
+@@ -53,8 +53,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "vbo/vbo.h"
+ #include "tnl/tnl.h"
+
+-#include "radeon_ioctl.h"
+-#include "radeon_state.h"
+ #include "r300_context.h"
+ #include "r300_ioctl.h"
+ #include "r300_state.h"
+@@ -590,7 +588,7 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state)
+ {
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+- if (r300->state.stencil.hw_stencil) {
++ if (r300->radeon.state.stencil.hwBuffer) {
+ R300_STATECHANGE(r300, zs);
+ if (state) {
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
+@@ -783,6 +781,7 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
+ R300_FG_FOG_BLEND_FN_EXP2;
+ fogScale.f = 0.3 * ctx->Fog.Density;
+ fogStart.f = 0.0;
++ break;
+ default:
+ return;
+ }
+@@ -974,15 +973,9 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
+ {
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLuint refmask =
+- (((ctx->Stencil.
+- Ref[0] & 0xff) << R300_STENCILREF_SHIFT) | ((ctx->
+- Stencil.
+- ValueMask
+- [0] &
+- 0xff)
+- <<
+- R300_STENCILMASK_SHIFT));
+-
++ ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT)
++ | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT);
++ const unsigned back = ctx->Stencil._BackFace;
+ GLuint flag;
+
+ R300_STATECHANGE(rmesa, zs);
+@@ -1000,8 +993,7 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ (flag << R300_S_FRONT_FUNC_SHIFT);
+
+- if (ctx->Stencil._TestTwoSide)
+- flag = translate_func(ctx->Stencil.Function[1]);
++ flag = translate_func(ctx->Stencil.Function[back]);
+
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ (flag << R300_S_BACK_FUNC_SHIFT);
+@@ -1026,6 +1018,7 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
+ GLenum fail, GLenum zfail, GLenum zpass)
+ {
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
++ const unsigned back = ctx->Stencil._BackFace;
+
+ R300_STATECHANGE(rmesa, zs);
+ /* It is easier to mask what's left.. */
+@@ -1042,23 +1035,13 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
+ | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) <<
+ R300_S_FRONT_ZPASS_OP_SHIFT);
+
+- if (ctx->Stencil._TestTwoSide) {
+- rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+- (translate_stencil_op(ctx->Stencil.FailFunc[1]) <<
+- R300_S_BACK_SFAIL_OP_SHIFT)
+- | (translate_stencil_op(ctx->Stencil.ZFailFunc[1]) <<
+- R300_S_BACK_ZFAIL_OP_SHIFT)
+- | (translate_stencil_op(ctx->Stencil.ZPassFunc[1]) <<
+- R300_S_BACK_ZPASS_OP_SHIFT);
+- } else {
+- rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+- (translate_stencil_op(ctx->Stencil.FailFunc[0]) <<
+- R300_S_BACK_SFAIL_OP_SHIFT)
+- | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) <<
+- R300_S_BACK_ZFAIL_OP_SHIFT)
+- | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) <<
+- R300_S_BACK_ZPASS_OP_SHIFT);
+- }
++ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
++ (translate_stencil_op(ctx->Stencil.FailFunc[back]) <<
++ R300_S_BACK_SFAIL_OP_SHIFT)
++ | (translate_stencil_op(ctx->Stencil.ZFailFunc[back]) <<
++ R300_S_BACK_ZFAIL_OP_SHIFT)
++ | (translate_stencil_op(ctx->Stencil.ZPassFunc[back]) <<
++ R300_S_BACK_ZPASS_OP_SHIFT);
+ }
+
+ /* =============================================================
+@@ -1083,10 +1066,10 @@ static void r300UpdateWindow(GLcontext * ctx)
+ GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
+ GLfloat sy = -v[MAT_SY];
+ GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+- GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
+- GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
++ GLfloat sz = v[MAT_SZ] * rmesa->radeon.state.depth.scale;
++ GLfloat tz = v[MAT_TZ] * rmesa->radeon.state.depth.scale;
+
+- R300_FIREVERTICES(rmesa);
++ radeon_firevertices(&rmesa->radeon);
+ R300_STATECHANGE(rmesa, vpt);
+
+ rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx);
+@@ -1100,10 +1083,19 @@ static void r300UpdateWindow(GLcontext * ctx)
+ static void r300Viewport(GLcontext * ctx, GLint x, GLint y,
+ GLsizei width, GLsizei height)
+ {
++ r300ContextPtr rmesa = R300_CONTEXT(ctx);
++ __DRIcontext *driContext = rmesa->radeon.dri.context;
+ /* Don't pipeline viewport changes, conflict with window offset
+ * setting below. Could apply deltas to rescue pipelined viewport
+ * values, or keep the originals hanging around.
+ */
++ if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled) {
++ radeon_update_renderbuffers(driContext, driContext->driDrawablePriv);
++ if (driContext->driDrawablePriv != driContext->driReadablePriv) {
++ radeon_update_renderbuffers(driContext,
++ driContext->driReadablePriv);
++ }
++ }
+ r300UpdateWindow(ctx);
+ }
+
+@@ -1144,55 +1136,25 @@ void r300UpdateViewportOffset(GLcontext * ctx)
+ void r300UpdateDrawBuffer(GLcontext * ctx)
+ {
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- r300ContextPtr r300 = rmesa;
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+- driRenderbuffer *drb;
++ struct radeon_renderbuffer *rrb;
+
+ if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+ /* draw to front */
+- drb =
+- (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].
+- Renderbuffer;
++ rrb =
++ (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+ } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+ /* draw to back */
+- drb =
+- (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].
+- Renderbuffer;
++ rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+ } else {
+ /* drawing to multiple buffers, or none */
+ return;
+ }
+
+- assert(drb);
+- assert(drb->flippedPitch);
++ assert(rrb);
++ assert(rrb->pitch);
+
+ R300_STATECHANGE(rmesa, cb);
+-
+- r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset + //r300->radeon.state.color.drawOffset +
+- r300->radeon.radeonScreen->fbLocation;
+- r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch; //r300->radeon.state.color.drawPitch;
+-
+- if (r300->radeon.radeonScreen->cpp == 4)
+- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+- else
+- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+-
+- if (r300->radeon.sarea->tiling_enabled)
+- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+-#if 0
+- R200_STATECHANGE(rmesa, ctx);
+-
+- /* Note: we used the (possibly) page-flipped values */
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
+- = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
+- & R200_COLOROFFSET_MASK);
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
+-
+- if (rmesa->sarea->tiling_enabled) {
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
+- R200_COLOR_TILE_ENABLE;
+- }
+-#endif
+ }
+
+ static void
+@@ -1412,7 +1374,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+ }
+
+ r300->hw.fpt.cmd[R300_FPT_CMD_0] =
+- cmdpacket0(R300_US_TEX_INST_0, code->tex.length);
++ cmdpacket0(r300->radeon.radeonScreen,
++ R300_US_TEX_INST_0, code->tex.length);
+ }
+
+ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+@@ -1463,7 +1426,7 @@ static GLuint translate_lod_bias(GLfloat bias)
+ static void r300SetupTextures(GLcontext * ctx)
+ {
+ int i, mtu;
+- struct r300_tex_obj *t;
++ struct radeon_tex_obj *t;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int hw_tmu = 0;
+ int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */
+@@ -1497,21 +1460,16 @@ static void r300SetupTextures(GLcontext * ctx)
+ /* We cannot let disabled tmu offsets pass DRM */
+ for (i = 0; i < mtu; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
+-
+-#if 0 /* Enables old behaviour */
+- hw_tmu = i;
+-#endif
+ tmu_mappings[i] = hw_tmu;
+
+- t = r300->state.texture.unit[i].texobj;
+- /* XXX questionable fix for bug 9170: */
++ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+ if (!t)
+ continue;
+
+- if ((t->format & 0xffffff00) == 0xffffff00) {
++ if ((t->pp_txformat & 0xffffff00) == 0xffffff00) {
+ WARN_ONCE
+ ("unknown texture format (entry %x) encountered. Help me !\n",
+- t->format & 0xff);
++ t->pp_txformat & 0xff);
+ }
+
+ if (RADEON_DEBUG & DEBUG_STATE)
+@@ -1522,29 +1480,28 @@ static void r300SetupTextures(GLcontext * ctx)
+
+ r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 +
+ hw_tmu] =
+- gen_fixed_filter(t->filter) | (hw_tmu << 28);
++ gen_fixed_filter(t->pp_txfilter) | (hw_tmu << 28);
+ /* Note: There is a LOD bias per texture unit and a LOD bias
+ * per texture object. We add them here to get the correct behaviour.
+ * (The per-texture object LOD bias was introduced in OpenGL 1.4
+ * and is not present in the EXT_texture_object extension).
+ */
+ r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+- t->filter_1 |
+- translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias);
++ t->pp_txfilter_1 |
++ translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias);
+ r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+- t->size;
++ t->pp_txsize;
+ r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
+- hw_tmu] = t->format;
++ hw_tmu] = t->pp_txformat;
+ r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+- t->pitch_reg;
+- r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 +
+- hw_tmu] = t->offset;
++ t->pp_txpitch;
++ r300->hw.textures[hw_tmu] = t;
+
+- if (t->offset & R300_TXO_MACRO_TILE) {
++ if (t->tile_bits & R300_TXO_MACRO_TILE) {
+ WARN_ONCE("macro tiling enabled!\n");
+ }
+
+- if (t->offset & R300_TXO_MICRO_TILE) {
++ if (t->tile_bits & R300_TXO_MICRO_TILE) {
+ WARN_ONCE("micro tiling enabled!\n");
+ }
+
+@@ -1561,21 +1518,21 @@ static void r300SetupTextures(GLcontext * ctx)
+ }
+
+ r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_FILTER0_0, last_hw_tmu + 1);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1);
+ r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1);
+ r300->hw.tex.size.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1);
+ r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1);
+ r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_FORMAT2_0, last_hw_tmu + 1);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1);
+ r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1);
+ r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
+ r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
+
+ if (!fp) /* should only happenen once, just after context is created */
+ return;
+@@ -1587,7 +1544,7 @@ static void r300SetupTextures(GLcontext * ctx)
+ r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
+ r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
+ r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+- cmdpacket0(R300_TX_FILTER0_0, 1);
++ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1);
+ }
+ r300SetupFragmentShaderTextures(ctx, tmu_mappings);
+ } else
+@@ -1756,7 +1713,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
+ | R300_HIRES_EN;
+
+ assert(high_rr >= 0);
+- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1);
++ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr + 1);
+ r300->hw.rc.cmd[2] = high_rr;
+
+ if (InputsRead)
+@@ -1916,7 +1873,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
+ | R300_HIRES_EN;
+
+ assert(high_rr >= 0);
+- r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1);
++ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr + 1);
+ r300->hw.rc.cmd[2] = 0xC0 | high_rr;
+
+ if (InputsRead)
+@@ -2114,6 +2071,7 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
+ (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+ }
+
++
+ static void r300SetupVertexProgram(r300ContextPtr rmesa)
+ {
+ GLcontext *ctx = rmesa->radeon.glCtx;
+@@ -2143,6 +2101,7 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa)
+ */
+ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+ {
++ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ if (RADEON_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__,
+ _mesa_lookup_enum_by_nr(cap),
+@@ -2188,8 +2147,12 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+ case GL_POLYGON_OFFSET_FILL:
+ r300SetPolygonOffsetState(ctx, state);
+ break;
++ case GL_SCISSOR_TEST:
++ radeon_firevertices(&rmesa->radeon);
++ rmesa->radeon.state.scissor.enabled = state;
++ radeonUpdateScissor( ctx );
++ break;
+ default:
+- radeonEnable(ctx, cap, state);
+ break;
+ }
+ }
+@@ -2200,6 +2163,7 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+ static void r300ResetHwState(r300ContextPtr r300)
+ {
+ GLcontext *ctx = r300->radeon.glCtx;
++ struct radeon_renderbuffer *rrb;
+ int has_tcl = 1;
+
+ if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+@@ -2230,8 +2194,6 @@ static void r300ResetHwState(r300ContextPtr r300)
+
+ r300UpdateCulling(ctx);
+
+- r300UpdateTextureState(ctx);
+-
+ r300SetBlendState(ctx);
+ r300SetLogicOpState(ctx);
+
+@@ -2378,20 +2340,6 @@ static void r300ResetHwState(r300ContextPtr r300)
+
+ r300BlendColor(ctx, ctx->Color.BlendColor);
+
+- /* Again, r300ClearBuffer uses this */
+- r300->hw.cb.cmd[R300_CB_OFFSET] =
+- r300->radeon.state.color.drawOffset +
+- r300->radeon.radeonScreen->fbLocation;
+- r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
+-
+- if (r300->radeon.radeonScreen->cpp == 4)
+- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+- else
+- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+-
+- if (r300->radeon.sarea->tiling_enabled)
+- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+-
+ r300->hw.rb3d_dither_ctl.cmd[1] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[2] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[3] = 0;
+@@ -2407,12 +2355,8 @@ static void r300ResetHwState(r300ContextPtr r300)
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
+
+- r300->hw.zb.cmd[R300_ZB_OFFSET] =
+- r300->radeon.radeonScreen->depthOffset +
+- r300->radeon.radeonScreen->fbLocation;
+- r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
+-
+- if (r300->radeon.sarea->tiling_enabled) {
++ rrb = r300->radeon.state.depth.rrb;
++ if (rrb && rrb->bo && (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)) {
+ /* XXX: Turn off when clearing buffers ? */
+ r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE;
+
+@@ -2423,18 +2367,6 @@ static void r300ResetHwState(r300ContextPtr r300)
+
+ r300->hw.zb_depthclearvalue.cmd[1] = 0;
+
+- switch (ctx->Visual.depthBits) {
+- case 16:
+- r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_16BIT_INT_Z;
+- break;
+- case 24:
+- r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+- break;
+- default:
+- fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits);
+- _mesa_exit(-1);
+- }
+-
+ r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE;
+ r300->hw.zstencil_format.cmd[3] = 0x00000003;
+ r300->hw.zstencil_format.cmd[4] = 0x00000000;
+@@ -2455,7 +2387,7 @@ static void r300ResetHwState(r300ContextPtr r300)
+ r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0;
+ }
+
+- r300->hw.all_dirty = GL_TRUE;
++ r300->radeon.hw.all_dirty = GL_TRUE;
+ }
+
+ void r300UpdateShaders(r300ContextPtr rmesa)
+@@ -2466,8 +2398,8 @@ void r300UpdateShaders(r300ContextPtr rmesa)
+
+ ctx = rmesa->radeon.glCtx;
+
+- if (rmesa->NewGLState && hw_tcl_on) {
+- rmesa->NewGLState = 0;
++ if (rmesa->radeon.NewGLState && hw_tcl_on) {
++ rmesa->radeon.NewGLState = 0;
+
+ for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ rmesa->temp_attrib[i] =
+@@ -2546,10 +2478,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
+ R300_STATECHANGE(rmesa, fpi[1]);
+ R300_STATECHANGE(rmesa, fpi[2]);
+ R300_STATECHANGE(rmesa, fpi[3]);
+- rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu.length);
+- rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu.length);
+- rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu.length);
+- rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
++ rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length);
++ rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length);
++ rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length);
++ rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++) {
+ rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0;
+ rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1;
+@@ -2580,7 +2512,7 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
+ }
+
+ R300_STATECHANGE(rmesa, fpp);
+- rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4);
++ rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4);
+ for (i = 0; i < code->const_nr; i++) {
+ const GLfloat *constant = get_fragmentprogram_constant(ctx,
+ &fp->mesa_program.Base, code->constant[i]);
+@@ -2682,7 +2614,6 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
+ GLcontext *ctx;
+ ctx = rmesa->radeon.glCtx;
+
+- r300UpdateTextureState(ctx);
+ r300SetEarlyZState(ctx);
+
+ GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
+@@ -2727,7 +2658,7 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
+
+ r300UpdateStateParameters(ctx, new_state);
+
+- r300->NewGLState |= new_state;
++ r300->radeon.NewGLState |= new_state;
+ }
+
+ /**
+@@ -2740,26 +2671,9 @@ void r300InitState(r300ContextPtr r300)
+ GLcontext *ctx = r300->radeon.glCtx;
+ GLuint depth_fmt;
+
+- radeonInitState(&r300->radeon);
+-
+- switch (ctx->Visual.depthBits) {
+- case 16:
+- r300->state.depth.scale = 1.0 / (GLfloat) 0xffff;
+- depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z;
+- break;
+- case 24:
+- r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff;
+- depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+- break;
+- default:
+- fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
+- ctx->Visual.depthBits);
+- _mesa_exit(-1);
+- }
+-
+ /* Only have hw stencil when depth buffer is 24 bits deep */
+- r300->state.stencil.hw_stencil = (ctx->Visual.stencilBits > 0 &&
+- ctx->Visual.depthBits == 24);
++ r300->radeon.state.stencil.hwBuffer = (ctx->Visual.stencilBits > 0 &&
++ ctx->Visual.depthBits == 24);
+
+ memset(&(r300->state.texture), 0, sizeof(r300->state.texture));
+
+@@ -2791,12 +2705,33 @@ void r300UpdateClipPlanes( GLcontext *ctx )
+ }
+ }
+
++static void r300DrawBuffer( GLcontext *ctx, GLenum mode )
++{
++ r300ContextPtr rmesa = R300_CONTEXT(ctx);
++ if (RADEON_DEBUG & DEBUG_DRI)
++ fprintf(stderr, "%s %s\n", __FUNCTION__,
++ _mesa_lookup_enum_by_nr( mode ));
++
++ radeon_firevertices(&rmesa->radeon); /* don't pipeline cliprect changes */
++
++ radeonSetCliprects( &rmesa->radeon );
++ if (!rmesa->radeon.radeonScreen->driScreen->dri2.enabled)
++ radeonUpdatePageFlipping(&rmesa->radeon);
++}
++
++static void r300ReadBuffer( GLcontext *ctx, GLenum mode )
++{
++ if (RADEON_DEBUG & DEBUG_DRI)
++ fprintf(stderr, "%s %s\n", __FUNCTION__,
++ _mesa_lookup_enum_by_nr( mode ));
++
++};
++
+ /**
+ * Initialize driver's state callback functions
+ */
+ void r300InitStateFuncs(struct dd_function_table *functions)
+ {
+- radeonInitStateFuncs(functions);
+
+ functions->UpdateState = r300InvalidateState;
+ functions->AlphaFunc = r300AlphaFunc;
+@@ -2833,4 +2768,8 @@ void r300InitStateFuncs(struct dd_function_table *functions)
+ functions->RenderMode = r300RenderMode;
+
+ functions->ClipPlane = r300ClipPlane;
++ functions->Scissor = radeonScissor;
++
++ functions->DrawBuffer = r300DrawBuffer;
++ functions->ReadBuffer = r300ReadBuffer;
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
+index 0589ab7..247a20e 100644
+--- a/src/mesa/drivers/dri/r300/r300_state.h
++++ b/src/mesa/drivers/dri/r300/r300_state.h
+@@ -39,32 +39,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #define R300_NEWPRIM( rmesa ) \
+ do { \
+- if ( rmesa->dma.flush ) \
+- rmesa->dma.flush( rmesa ); \
++ if ( rmesa->radeon.dma.flush ) \
++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \
+ } while (0)
+
+ #define R300_STATECHANGE(r300, atom) \
+ do { \
+ R300_NEWPRIM(r300); \
+ r300->hw.atom.dirty = GL_TRUE; \
+- r300->hw.is_dirty = GL_TRUE; \
++ r300->radeon.hw.is_dirty = GL_TRUE; \
+ } while(0)
+
+-#define R300_PRINT_STATE(r300, atom) \
+- r300PrintStateAtom(r300, &r300->hw.atom)
+-
+-/* Fire the buffered vertices no matter what.
+- TODO: This has not been implemented yet
+- */
+-#define R300_FIREVERTICES( r300 ) \
+-do { \
+- \
+- if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) { \
+- r300Flush( (r300)->radeon.glCtx ); \
+- } \
+- \
+-} while (0)
+-
+ // r300_state.c
+ extern int future_hw_tcl_on;
+ void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx);
+diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
+index b6e7ce1..0d8b7e5 100644
+--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
++++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
+@@ -56,26 +56,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_state.h"
+ #include "r300_ioctl.h"
+ #include "r300_emit.h"
+-#include "r300_mem.h"
++#include "r300_tex.h"
+
+-static void flush_last_swtcl_prim( r300ContextPtr rmesa );
+-
+-
+-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset);
++void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset);
+ void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
+ #define EMIT_ATTR( ATTR, STYLE ) \
+ do { \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \
+- rmesa->swtcl.vertex_attr_count++; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \
++ rmesa->radeon.swtcl.vertex_attr_count++; \
+ } while (0)
+
+ #define EMIT_PAD( N ) \
+ do { \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \
+- rmesa->swtcl.vertex_attr_count++; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \
++ rmesa->radeon.swtcl.vertex_attr_count++; \
+ } while (0)
+
+ static void r300SetVertexFormat( GLcontext *ctx )
+@@ -86,7 +83,6 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ DECLARE_RENDERINPUTS(index_bitset);
+ GLuint InputsRead = 0, OutputsWritten = 0;
+ int vap_fmt_0 = 0;
+- int vap_vte_cntl = 0;
+ int offset = 0;
+ int vte = 0;
+ GLint inputs[VERT_ATTRIB_MAX];
+@@ -114,7 +110,7 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ }
+
+ assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+- rmesa->swtcl.vertex_attr_count = 0;
++ rmesa->radeon.swtcl.vertex_attr_count = 0;
+
+ /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+ * build up a hardware vertex.
+@@ -175,7 +171,7 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ inputs[i] = -1;
+ }
+ }
+-
++
+ /* Fixed, apply to vir0 only */
+ if (InputsRead & (1 << VERT_ATTRIB_POS))
+ inputs[VERT_ATTRIB_POS] = 0;
+@@ -186,16 +182,16 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
+ if (InputsRead & (1 << i))
+ inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+-
++
+ for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (InputsRead & (1 << i)) {
+ tab[nr++] = i;
+ }
+ }
+-
++
+ for (i = 0; i < nr; i++) {
+ int ci;
+-
++
+ swizzle[i][0] = SWIZZLE_ZERO;
+ swizzle[i][1] = SWIZZLE_ZERO;
+ swizzle[i][2] = SWIZZLE_ZERO;
+@@ -215,98 +211,29 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
+ r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
+ nr);
+-
++
+ R300_STATECHANGE(rmesa, vic);
+ rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+ rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+-
++
+ R300_STATECHANGE(rmesa, vof);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
+-
+- rmesa->swtcl.vertex_size =
++
++ rmesa->radeon.swtcl.vertex_size =
+ _tnl_install_attrs( ctx,
+- rmesa->swtcl.vertex_attrs,
+- rmesa->swtcl.vertex_attr_count,
++ rmesa->radeon.swtcl.vertex_attrs,
++ rmesa->radeon.swtcl.vertex_attr_count,
+ NULL, 0 );
+-
+- rmesa->swtcl.vertex_size /= 4;
++
++ rmesa->radeon.swtcl.vertex_size /= 4;
+
+ RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
+
+
+ R300_STATECHANGE(rmesa, vte);
+ rmesa->hw.vte.cmd[1] = vte;
+- rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size;
+-}
+-
+-
+-/* Flush vertices in the current dma region.
+- */
+-static void flush_last_swtcl_prim( r300ContextPtr rmesa )
+-{
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- rmesa->dma.flush = NULL;
+-
+- if (rmesa->dma.current.buf) {
+- struct r300_dma_region *current = &rmesa->dma.current;
+- GLuint current_offset = GET_START(current);
+-
+- assert (current->start +
+- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+- current->ptr);
+-
+- if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+-
+- r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
+-
+- r300EmitState(rmesa);
+-
+- r300EmitVertexAOS( rmesa,
+- rmesa->swtcl.vertex_size,
+- current_offset);
+-
+- r300EmitVbufPrim( rmesa,
+- rmesa->swtcl.hw_primitive,
+- rmesa->swtcl.numverts);
+-
+- r300EmitCacheFlush(rmesa);
+- }
+-
+- rmesa->swtcl.numverts = 0;
+- current->start = current->ptr;
+- }
+-}
+-
+-/* Alloc space in the current dma region.
+- */
+-static void *
+-r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
+-{
+- GLuint bytes = vsize * nverts;
+-
+- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
+- r300RefillCurrentDmaRegion( rmesa, bytes);
+-
+- if (!rmesa->dma.flush) {
+- rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+- rmesa->dma.flush = flush_last_swtcl_prim;
+- }
+-
+- ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+- ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
+- ASSERT( rmesa->dma.current.start +
+- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+- rmesa->dma.current.ptr );
+-
+- {
+- GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
+- rmesa->dma.current.ptr += bytes;
+- rmesa->swtcl.numverts += nverts;
+- return head;
+- }
++ rmesa->hw.vte.cmd[2] = rmesa->radeon.swtcl.vertex_size;
+ }
+
+ static GLuint reduced_prim[] = {
+@@ -346,13 +273,13 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
+ #undef LOCAL_VARS
+ #undef ALLOC_VERTS
+ #define CTX_ARG r300ContextPtr rmesa
+-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
+-#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 )
++#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
++#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 )
+ #define LOCAL_VARS \
+ r300ContextPtr rmesa = R300_CONTEXT(ctx); \
+- const char *r300verts = (char *)rmesa->swtcl.verts;
++ const char *r300verts = (char *)rmesa->radeon.swtcl.verts;
+ #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
+-#define VERTEX r300Vertex
++#define VERTEX r300Vertex
+ #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
+ #define PRINT_VERTEX(x)
+ #undef TAG
+@@ -409,7 +336,7 @@ static struct {
+ #define VERT_Y(_v) _v->v.y
+ #define VERT_Z(_v) _v->v.z
+ #define AREA_IS_CCW( a ) (a < 0)
+-#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
++#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
+
+ /* Only used to pull back colors into vertices (ie, we know color is
+ * floating point).
+@@ -455,7 +382,7 @@ do { \
+ ***********************************************************************/
+
+ #define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] )
+-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
++#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+ #undef TAG
+ #define TAG(x) x
+ #include "tnl_dd/t_dd_unfilled.h"
+@@ -512,8 +439,8 @@ static void init_rast_tab( void )
+ #undef LOCAL_VARS
+ #define LOCAL_VARS \
+ r300ContextPtr rmesa = R300_CONTEXT(ctx); \
+- const GLuint vertsize = rmesa->swtcl.vertex_size; \
+- const char *r300verts = (char *)rmesa->swtcl.verts; \
++ const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \
++ const char *r300verts = (char *)rmesa->radeon.swtcl.verts; \
+ const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \
+ const GLboolean stipple = ctx->Line.StippleFlag; \
+ (void) elt; (void) stipple;
+@@ -545,7 +472,7 @@ static void r300ChooseRenderState( GLcontext *ctx )
+ if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
+ if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT;
+
+- if (index != rmesa->swtcl.RenderIndex) {
++ if (index != rmesa->radeon.swtcl.RenderIndex) {
+ tnl->Driver.Render.Points = rast_tab[index].points;
+ tnl->Driver.Render.Line = rast_tab[index].line;
+ tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+@@ -562,7 +489,7 @@ static void r300ChooseRenderState( GLcontext *ctx )
+ tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+ }
+
+- rmesa->swtcl.RenderIndex = index;
++ rmesa->radeon.swtcl.RenderIndex = index;
+ }
+ }
+
+@@ -572,18 +499,18 @@ static void r300RenderStart(GLcontext *ctx)
+ r300ContextPtr rmesa = R300_CONTEXT( ctx );
+ // fprintf(stderr, "%s\n", __FUNCTION__);
+
+- r300ChooseRenderState(ctx);
++ r300ChooseRenderState(ctx);
+ r300SetVertexFormat(ctx);
+
++ r300ValidateBuffers(ctx);
++
+ r300UpdateShaders(rmesa);
+ r300UpdateShaderStates(rmesa);
+
+ r300EmitCacheFlush(rmesa);
+-
+- if (rmesa->dma.flush != 0 &&
+- rmesa->dma.flush != flush_last_swtcl_prim)
+- rmesa->dma.flush( rmesa );
+-
++ if (rmesa->radeon.dma.flush != NULL) {
++ rmesa->radeon.dma.flush(ctx);
++ }
+ }
+
+ static void r300RenderFinish(GLcontext *ctx)
+@@ -593,10 +520,10 @@ static void r300RenderFinish(GLcontext *ctx)
+ static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+ {
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-
+- if (rmesa->swtcl.hw_primitive != hwprim) {
++
++ if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+ R300_NEWPRIM( rmesa );
+- rmesa->swtcl.hw_primitive = hwprim;
++ rmesa->radeon.swtcl.hw_primitive = hwprim;
+ }
+ }
+
+@@ -604,14 +531,14 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
+ {
+
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- rmesa->swtcl.render_primitive = prim;
++ rmesa->radeon.swtcl.render_primitive = prim;
+
+ if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+ return;
+
+ r300RasterPrimitive( ctx, reduced_prim[prim] );
+ // fprintf(stderr, "%s\n", __FUNCTION__);
+-
++
+ }
+
+ static void r300ResetLineStipple(GLcontext *ctx)
+@@ -625,12 +552,12 @@ void r300InitSwtcl(GLcontext *ctx)
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ static int firsttime = 1;
+-
++
+ if (firsttime) {
+ init_rast_tab();
+ firsttime = 0;
+ }
+-
++
+ tnl->Driver.Render.Start = r300RenderStart;
+ tnl->Driver.Render.Finish = r300RenderFinish;
+ tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
+@@ -638,15 +565,15 @@ void r300InitSwtcl(GLcontext *ctx)
+ tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+ tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+ tnl->Driver.Render.Interp = _tnl_interp;
+-
++
+ /* FIXME: what are these numbers? */
+- _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
++ _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+ 48 * sizeof(GLfloat) );
+-
+- rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+- rmesa->swtcl.RenderIndex = ~0;
+- rmesa->swtcl.render_primitive = GL_TRIANGLES;
+- rmesa->swtcl.hw_primitive = 0;
++
++ rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
++ rmesa->radeon.swtcl.RenderIndex = ~0;
++ rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
++ rmesa->radeon.swtcl.hw_primitive = 0;
+
+ _tnl_invalidate_vertex_state( ctx, ~0 );
+ _tnl_invalidate_vertices( ctx, ~0 );
+@@ -655,9 +582,9 @@ void r300InitSwtcl(GLcontext *ctx)
+ _tnl_need_projected_coords( ctx, GL_FALSE );
+ r300ChooseRenderState(ctx);
+
+- _mesa_validate_all_lighting_tables( ctx );
++ _mesa_validate_all_lighting_tables( ctx );
+
+- tnl->Driver.NotifyMaterialChange =
++ tnl->Driver.NotifyMaterialChange =
+ _mesa_validate_all_lighting_tables;
+ }
+
+@@ -665,33 +592,53 @@ void r300DestroySwtcl(GLcontext *ctx)
+ {
+ }
+
+-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
++void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
+ {
+- int cmd_reserved = 0;
+- int cmd_written = 0;
++ BATCH_LOCALS(&rmesa->radeon);
+
+- drm_radeon_cmd_header_t *cmd = NULL;
+ if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n",
+- __FUNCTION__, vertex_size, offset);
+-
+- start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
+- e32(1);
+- e32(vertex_size | (vertex_size << 8));
+- e32(offset);
++ fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n",
++ __FUNCTION__, vertex_size, offset);
++
++ BEGIN_BATCH(7);
++ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
++ OUT_BATCH(1);
++ OUT_BATCH(vertex_size | (vertex_size << 8));
++ OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
++ END_BATCH();
+ }
+
+ void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
+ {
+-
+- int cmd_reserved = 0;
+- int cmd_written = 0;
++ BATCH_LOCALS(&rmesa->radeon);
+ int type, num_verts;
+- drm_radeon_cmd_header_t *cmd = NULL;
+
+ type = r300PrimitiveType(rmesa, primitive);
+ num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
+-
+- start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
+- e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
++
++ BEGIN_BATCH(3);
++ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
++ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
++ END_BATCH();
++}
++
++void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
++{
++ r300ContextPtr rmesa = R300_CONTEXT(ctx);
++
++ rcommonEnsureCmdBufSpace(&rmesa->radeon,
++ rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
++ __FUNCTION__);
++ radeonEmitState(&rmesa->radeon);
++ r300EmitVertexAOS(rmesa,
++ rmesa->radeon.swtcl.vertex_size,
++ rmesa->radeon.dma.current,
++ current_offset);
++
++ r300EmitVbufPrim(rmesa,
++ rmesa->radeon.swtcl.hw_primitive,
++ rmesa->radeon.swtcl.numverts);
++ r300EmitCacheFlush(rmesa);
++ COMMIT_BATCH();
++
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h
+index 55df53c..23b4ce3 100644
+--- a/src/mesa/drivers/dri/r300/r300_swtcl.h
++++ b/src/mesa/drivers/dri/r300/r300_swtcl.h
+@@ -42,4 +42,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ extern void r300InitSwtcl( GLcontext *ctx );
+ extern void r300DestroySwtcl( GLcontext *ctx );
+
++extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
+ #endif
+diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
+index 8ab382c..0f5afbf 100644
+--- a/src/mesa/drivers/dri/r300/r300_tex.c
++++ b/src/mesa/drivers/dri/r300/r300_tex.c
+@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/context.h"
+ #include "main/enums.h"
+ #include "main/image.h"
++#include "main/mipmap.h"
+ #include "main/simple_list.h"
+ #include "main/texformat.h"
+ #include "main/texstore.h"
+@@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_context.h"
+ #include "r300_state.h"
+ #include "r300_ioctl.h"
++#include "radeon_mipmap_tree.h"
+ #include "r300_tex.h"
+
+ #include "xmlpool.h"
+@@ -77,20 +79,20 @@ static unsigned int translate_wrap_mode(GLenum wrapmode)
+ *
+ * \param t Texture object whose wrap modes are to be set
+ */
+-static void r300UpdateTexWrap(r300TexObjPtr t)
++static void r300UpdateTexWrap(radeonTexObjPtr t)
+ {
+- struct gl_texture_object *tObj = t->base.tObj;
++ struct gl_texture_object *tObj = &t->base;
+
+- t->filter &=
++ t->pp_txfilter &=
+ ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
+
+- t->filter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
++ t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
+
+ if (tObj->Target != GL_TEXTURE_1D) {
+- t->filter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
++ t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
+
+ if (tObj->Target == GL_TEXTURE_3D)
+- t->filter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
++ t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
+ }
+ }
+
+@@ -117,10 +119,13 @@ static GLuint aniso_filter(GLfloat anisotropy)
+ * \param magf Texture magnification mode
+ * \param anisotropy Maximum anisotropy level
+ */
+-static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
++static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
+ {
+- t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
+- t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
++ /* Force revalidation to account for switches from/to mipmapping. */
++ t->validated = GL_FALSE;
++
++ t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
++ t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
+
+ /* Note that EXT_texture_filter_anisotropic is extremely vague about
+ * how anisotropic filtering interacts with the "normal" filter modes.
+@@ -128,7 +133,7 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
+ * filter settings completely. This includes driconf's settings.
+ */
+ if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
+- t->filter |= R300_TX_MAG_FILTER_ANISO
++ t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO
+ | R300_TX_MIN_FILTER_ANISO
+ | R300_TX_MIN_FILTER_MIP_LINEAR
+ | aniso_filter(anisotropy);
+@@ -139,22 +144,22 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
+
+ switch (minf) {
+ case GL_NEAREST:
+- t->filter |= R300_TX_MIN_FILTER_NEAREST;
++ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST;
+ break;
+ case GL_LINEAR:
+- t->filter |= R300_TX_MIN_FILTER_LINEAR;
++ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR;
+ break;
+ case GL_NEAREST_MIPMAP_NEAREST:
+- t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
++ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
+ break;
+ case GL_NEAREST_MIPMAP_LINEAR:
+- t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
++ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
+ break;
+ case GL_LINEAR_MIPMAP_NEAREST:
+- t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
++ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
+ break;
+ case GL_LINEAR_MIPMAP_LINEAR:
+- t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
++ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
+ break;
+ }
+
+@@ -163,743 +168,20 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
+ */
+ switch (magf) {
+ case GL_NEAREST:
+- t->filter |= R300_TX_MAG_FILTER_NEAREST;
++ t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST;
+ break;
+ case GL_LINEAR:
+- t->filter |= R300_TX_MAG_FILTER_LINEAR;
++ t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR;
+ break;
+ }
+ }
+
+-static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4])
++static void r300SetTexBorderColor(radeonTexObjPtr t, GLubyte c[4])
+ {
+ t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
+ }
+
+ /**
+- * Allocate space for and load the mesa images into the texture memory block.
+- * This will happen before drawing with a new texture, or drawing with a
+- * texture after it was swapped out or teximaged again.
+- */
+-
+-static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj)
+-{
+- r300TexObjPtr t;
+-
+- t = CALLOC_STRUCT(r300_tex_obj);
+- texObj->DriverData = t;
+- if (t != NULL) {
+- if (RADEON_DEBUG & DEBUG_TEXTURE) {
+- fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
+- (void *)texObj, (void *)t);
+- }
+-
+- /* Initialize non-image-dependent parts of the state:
+- */
+- t->base.tObj = texObj;
+- t->border_fallback = GL_FALSE;
+-
+- make_empty_list(&t->base);
+-
+- r300UpdateTexWrap(t);
+- r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
+- r300SetTexBorderColor(t, texObj->_BorderChan);
+- }
+-
+- return t;
+-}
+-
+-/* try to find a format which will only need a memcopy */
+-static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat,
+- GLenum srcType)
+-{
+- const GLuint ui = 1;
+- const GLubyte littleEndian = *((const GLubyte *)&ui);
+-
+- if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
+- return &_mesa_texformat_rgba8888;
+- } else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+- (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+- (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
+- return &_mesa_texformat_rgba8888_rev;
+- } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+- srcType == GL_UNSIGNED_INT_8_8_8_8)) {
+- return &_mesa_texformat_argb8888_rev;
+- } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+- srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
+- return &_mesa_texformat_argb8888;
+- } else
+- return _dri_texformat_argb8888;
+-}
+-
+-static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx,
+- GLint
+- internalFormat,
+- GLenum format,
+- GLenum type)
+-{
+- r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- const GLboolean do32bpt =
+- (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
+- const GLboolean force16bpt =
+- (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
+- (void)format;
+-
+-#if 0
+- fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n",
+- _mesa_lookup_enum_by_nr(internalFormat), internalFormat,
+- _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+- fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt);
+-#endif
+-
+- switch (internalFormat) {
+- case 4:
+- case GL_RGBA:
+- case GL_COMPRESSED_RGBA:
+- switch (type) {
+- case GL_UNSIGNED_INT_10_10_10_2:
+- case GL_UNSIGNED_INT_2_10_10_10_REV:
+- return do32bpt ? _dri_texformat_argb8888 :
+- _dri_texformat_argb1555;
+- case GL_UNSIGNED_SHORT_4_4_4_4:
+- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+- return _dri_texformat_argb4444;
+- case GL_UNSIGNED_SHORT_5_5_5_1:
+- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+- return _dri_texformat_argb1555;
+- default:
+- return do32bpt ? r300Choose8888TexFormat(format, type) :
+- _dri_texformat_argb4444;
+- }
+-
+- case 3:
+- case GL_RGB:
+- case GL_COMPRESSED_RGB:
+- switch (type) {
+- case GL_UNSIGNED_SHORT_4_4_4_4:
+- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+- return _dri_texformat_argb4444;
+- case GL_UNSIGNED_SHORT_5_5_5_1:
+- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+- return _dri_texformat_argb1555;
+- case GL_UNSIGNED_SHORT_5_6_5:
+- case GL_UNSIGNED_SHORT_5_6_5_REV:
+- return _dri_texformat_rgb565;
+- default:
+- return do32bpt ? _dri_texformat_argb8888 :
+- _dri_texformat_rgb565;
+- }
+-
+- case GL_RGBA8:
+- case GL_RGB10_A2:
+- case GL_RGBA12:
+- case GL_RGBA16:
+- return !force16bpt ?
+- r300Choose8888TexFormat(format,
+- type) : _dri_texformat_argb4444;
+-
+- case GL_RGBA4:
+- case GL_RGBA2:
+- return _dri_texformat_argb4444;
+-
+- case GL_RGB5_A1:
+- return _dri_texformat_argb1555;
+-
+- case GL_RGB8:
+- case GL_RGB10:
+- case GL_RGB12:
+- case GL_RGB16:
+- return !force16bpt ? _dri_texformat_argb8888 :
+- _dri_texformat_rgb565;
+-
+- case GL_RGB5:
+- case GL_RGB4:
+- case GL_R3_G3_B2:
+- return _dri_texformat_rgb565;
+-
+- case GL_ALPHA:
+- case GL_ALPHA4:
+- case GL_ALPHA8:
+- case GL_ALPHA12:
+- case GL_ALPHA16:
+- case GL_COMPRESSED_ALPHA:
+- return _dri_texformat_a8;
+-
+- case 1:
+- case GL_LUMINANCE:
+- case GL_LUMINANCE4:
+- case GL_LUMINANCE8:
+- case GL_LUMINANCE12:
+- case GL_LUMINANCE16:
+- case GL_COMPRESSED_LUMINANCE:
+- return _dri_texformat_l8;
+-
+- case 2:
+- case GL_LUMINANCE_ALPHA:
+- case GL_LUMINANCE4_ALPHA4:
+- case GL_LUMINANCE6_ALPHA2:
+- case GL_LUMINANCE8_ALPHA8:
+- case GL_LUMINANCE12_ALPHA4:
+- case GL_LUMINANCE12_ALPHA12:
+- case GL_LUMINANCE16_ALPHA16:
+- case GL_COMPRESSED_LUMINANCE_ALPHA:
+- return _dri_texformat_al88;
+-
+- case GL_INTENSITY:
+- case GL_INTENSITY4:
+- case GL_INTENSITY8:
+- case GL_INTENSITY12:
+- case GL_INTENSITY16:
+- case GL_COMPRESSED_INTENSITY:
+- return _dri_texformat_i8;
+-
+- case GL_YCBCR_MESA:
+- if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+- type == GL_UNSIGNED_BYTE)
+- return &_mesa_texformat_ycbcr;
+- else
+- return &_mesa_texformat_ycbcr_rev;
+-
+- case GL_RGB_S3TC:
+- case GL_RGB4_S3TC:
+- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+- return &_mesa_texformat_rgb_dxt1;
+-
+- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+- return &_mesa_texformat_rgba_dxt1;
+-
+- case GL_RGBA_S3TC:
+- case GL_RGBA4_S3TC:
+- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+- return &_mesa_texformat_rgba_dxt3;
+-
+- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+- return &_mesa_texformat_rgba_dxt5;
+-
+- case GL_ALPHA16F_ARB:
+- return &_mesa_texformat_alpha_float16;
+- case GL_ALPHA32F_ARB:
+- return &_mesa_texformat_alpha_float32;
+- case GL_LUMINANCE16F_ARB:
+- return &_mesa_texformat_luminance_float16;
+- case GL_LUMINANCE32F_ARB:
+- return &_mesa_texformat_luminance_float32;
+- case GL_LUMINANCE_ALPHA16F_ARB:
+- return &_mesa_texformat_luminance_alpha_float16;
+- case GL_LUMINANCE_ALPHA32F_ARB:
+- return &_mesa_texformat_luminance_alpha_float32;
+- case GL_INTENSITY16F_ARB:
+- return &_mesa_texformat_intensity_float16;
+- case GL_INTENSITY32F_ARB:
+- return &_mesa_texformat_intensity_float32;
+- case GL_RGB16F_ARB:
+- return &_mesa_texformat_rgba_float16;
+- case GL_RGB32F_ARB:
+- return &_mesa_texformat_rgba_float32;
+- case GL_RGBA16F_ARB:
+- return &_mesa_texformat_rgba_float16;
+- case GL_RGBA32F_ARB:
+- return &_mesa_texformat_rgba_float32;
+-
+- case GL_DEPTH_COMPONENT:
+- case GL_DEPTH_COMPONENT16:
+- case GL_DEPTH_COMPONENT24:
+- case GL_DEPTH_COMPONENT32:
+-#if 0
+- switch (type) {
+- case GL_UNSIGNED_BYTE:
+- case GL_UNSIGNED_SHORT:
+- return &_mesa_texformat_z16;
+- case GL_UNSIGNED_INT:
+- return &_mesa_texformat_z32;
+- case GL_UNSIGNED_INT_24_8_EXT:
+- default:
+- return &_mesa_texformat_z24_s8;
+- }
+-#else
+- return &_mesa_texformat_z16;
+-#endif
+-
+- default:
+- _mesa_problem(ctx,
+- "unexpected internalFormat 0x%x in r300ChooseTextureFormat",
+- (int)internalFormat);
+- return NULL;
+- }
+-
+- return NULL; /* never get here */
+-}
+-
+-static GLboolean
+-r300ValidateClientStorage(GLcontext * ctx, GLenum target,
+- GLint internalFormat,
+- GLint srcWidth, GLint srcHeight,
+- GLenum format, GLenum type, const void *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage)
+-{
+- r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "intformat %s format %s type %s\n",
+- _mesa_lookup_enum_by_nr(internalFormat),
+- _mesa_lookup_enum_by_nr(format),
+- _mesa_lookup_enum_by_nr(type));
+-
+- if (!ctx->Unpack.ClientStorage)
+- return 0;
+-
+- if (ctx->_ImageTransferState ||
+- texImage->IsCompressed || texObj->GenerateMipmap)
+- return 0;
+-
+- /* This list is incomplete, may be different on ppc???
+- */
+- switch (internalFormat) {
+- case GL_RGBA:
+- if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) {
+- texImage->TexFormat = _dri_texformat_argb8888;
+- } else
+- return 0;
+- break;
+-
+- case GL_RGB:
+- if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
+- texImage->TexFormat = _dri_texformat_rgb565;
+- } else
+- return 0;
+- break;
+-
+- case GL_YCBCR_MESA:
+- if (format == GL_YCBCR_MESA &&
+- type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
+- texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
+- } else if (format == GL_YCBCR_MESA &&
+- (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+- type == GL_UNSIGNED_BYTE)) {
+- texImage->TexFormat = &_mesa_texformat_ycbcr;
+- } else
+- return 0;
+- break;
+-
+- default:
+- return 0;
+- }
+-
+- /* Could deal with these packing issues, but currently don't:
+- */
+- if (packing->SkipPixels ||
+- packing->SkipRows || packing->SwapBytes || packing->LsbFirst) {
+- return 0;
+- }
+-
+- GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
+- format, type);
+-
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: srcRowStride %d/%x\n",
+- __FUNCTION__, srcRowStride, srcRowStride);
+-
+- /* Could check this later in upload, pitch restrictions could be
+- * relaxed, but would need to store the image pitch somewhere,
+- * as packing details might change before image is uploaded:
+- */
+- if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
+- || (srcRowStride & 63))
+- return 0;
+-
+- /* Have validated that _mesa_transfer_teximage would be a straight
+- * memcpy at this point. NOTE: future calls to TexSubImage will
+- * overwrite the client data. This is explicitly mentioned in the
+- * extension spec.
+- */
+- texImage->Data = (void *)pixels;
+- texImage->IsClientData = GL_TRUE;
+- texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
+-
+- return 1;
+-}
+-
+-static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
+- GLint internalFormat,
+- GLint width, GLint border,
+- GLenum format, GLenum type, const GLvoid * pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage)
+-{
+- driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-
+- if (t) {
+- driSwapOutTextureObject(t);
+- } else {
+- t = (driTextureObject *) r300AllocTexObj(texObj);
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+- return;
+- }
+- }
+-
+- /* Note, this will call ChooseTextureFormat */
+- _mesa_store_teximage1d(ctx, target, level, internalFormat,
+- width, border, format, type, pixels,
+- &ctx->Unpack, texObj, texImage);
+-
+- t->dirty_images[0] |= (1 << level);
+-}
+-
+-static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+- GLint xoffset,
+- GLsizei width,
+- GLenum format, GLenum type,
+- const GLvoid * pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage)
+-{
+- driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-
+- assert(t); /* this _should_ be true */
+- if (t) {
+- driSwapOutTextureObject(t);
+- } else {
+- t = (driTextureObject *) r300AllocTexObj(texObj);
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+- return;
+- }
+- }
+-
+- _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+- format, type, pixels, packing, texObj,
+- texImage);
+-
+- t->dirty_images[0] |= (1 << level);
+-}
+-
+-static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
+- GLint internalFormat,
+- GLint width, GLint height, GLint border,
+- GLenum format, GLenum type, const GLvoid * pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage)
+-{
+- driTextureObject *t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face =
+- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- if (t != NULL) {
+- driSwapOutTextureObject(t);
+- } else {
+- t = (driTextureObject *) r300AllocTexObj(texObj);
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+- return;
+- }
+- }
+-
+- texImage->IsClientData = GL_FALSE;
+-
+- if (r300ValidateClientStorage(ctx, target,
+- internalFormat,
+- width, height,
+- format, type, pixels,
+- packing, texObj, texImage)) {
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using client storage\n",
+- __FUNCTION__);
+- } else {
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using normal storage\n",
+- __FUNCTION__);
+-
+- /* Normal path: copy (to cached memory) and eventually upload
+- * via another copy to GART memory and then a blit... Could
+- * eliminate one copy by going straight to (permanent) GART.
+- *
+- * Note, this will call r300ChooseTextureFormat.
+- */
+- _mesa_store_teximage2d(ctx, target, level, internalFormat,
+- width, height, border, format, type,
+- pixels, &ctx->Unpack, texObj, texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+- }
+-}
+-
+-static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+- GLint xoffset, GLint yoffset,
+- GLsizei width, GLsizei height,
+- GLenum format, GLenum type,
+- const GLvoid * pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage)
+-{
+- driTextureObject *t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face =
+- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- assert(t); /* this _should_ be true */
+- if (t) {
+- driSwapOutTextureObject(t);
+- } else {
+- t = (driTextureObject *) r300AllocTexObj(texObj);
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+- return;
+- }
+- }
+-
+- _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+- height, format, type, pixels, packing, texObj,
+- texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+-}
+-
+-static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
+- GLint level, GLint internalFormat,
+- GLint width, GLint height, GLint border,
+- GLsizei imageSize, const GLvoid * data,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage)
+-{
+- driTextureObject *t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face =
+- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- if (t != NULL) {
+- driSwapOutTextureObject(t);
+- } else {
+- t = (driTextureObject *) r300AllocTexObj(texObj);
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY,
+- "glCompressedTexImage2D");
+- return;
+- }
+- }
+-
+- texImage->IsClientData = GL_FALSE;
+-
+- /* can't call this, different parameters. Would never evaluate to true anyway currently */
+-#if 0
+- if (r300ValidateClientStorage(ctx, target,
+- internalFormat,
+- width, height,
+- format, type, pixels,
+- packing, texObj, texImage)) {
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using client storage\n",
+- __FUNCTION__);
+- } else
+-#endif
+- {
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using normal storage\n",
+- __FUNCTION__);
+-
+- /* Normal path: copy (to cached memory) and eventually upload
+- * via another copy to GART memory and then a blit... Could
+- * eliminate one copy by going straight to (permanent) GART.
+- *
+- * Note, this will call r300ChooseTextureFormat.
+- */
+- _mesa_store_compressed_teximage2d(ctx, target, level,
+- internalFormat, width, height,
+- border, imageSize, data,
+- texObj, texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+- }
+-}
+-
+-static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+- GLint level, GLint xoffset,
+- GLint yoffset, GLsizei width,
+- GLsizei height, GLenum format,
+- GLsizei imageSize, const GLvoid * data,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage)
+-{
+- driTextureObject *t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face =
+- (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- assert(t); /* this _should_ be true */
+- if (t) {
+- driSwapOutTextureObject(t);
+- } else {
+- t = (driTextureObject *) r300AllocTexObj(texObj);
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY,
+- "glCompressedTexSubImage3D");
+- return;
+- }
+- }
+-
+- _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset,
+- yoffset, width, height, format,
+- imageSize, data, texObj, texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+-}
+-
+-static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
+- GLint internalFormat,
+- GLint width, GLint height, GLint depth,
+- GLint border,
+- GLenum format, GLenum type, const GLvoid * pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage)
+-{
+- driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-
+- if (t) {
+- driSwapOutTextureObject(t);
+- } else {
+- t = (driTextureObject *) r300AllocTexObj(texObj);
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
+- return;
+- }
+- }
+-
+- texImage->IsClientData = GL_FALSE;
+-
+-#if 0
+- if (r300ValidateClientStorage(ctx, target,
+- internalFormat,
+- width, height,
+- format, type, pixels,
+- packing, texObj, texImage)) {
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using client storage\n",
+- __FUNCTION__);
+- } else
+-#endif
+- {
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: Using normal storage\n",
+- __FUNCTION__);
+-
+- /* Normal path: copy (to cached memory) and eventually upload
+- * via another copy to GART memory and then a blit... Could
+- * eliminate one copy by going straight to (permanent) GART.
+- *
+- * Note, this will call r300ChooseTextureFormat.
+- */
+- _mesa_store_teximage3d(ctx, target, level, internalFormat,
+- width, height, depth, border,
+- format, type, pixels,
+- &ctx->Unpack, texObj, texImage);
+-
+- t->dirty_images[0] |= (1 << level);
+- }
+-}
+-
+-static void
+-r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
+- GLint xoffset, GLint yoffset, GLint zoffset,
+- GLsizei width, GLsizei height, GLsizei depth,
+- GLenum format, GLenum type,
+- const GLvoid * pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage)
+-{
+- driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-
+-/* fprintf(stderr, "%s\n", __FUNCTION__); */
+-
+- assert(t); /* this _should_ be true */
+- if (t) {
+- driSwapOutTextureObject(t);
+- } else {
+- t = (driTextureObject *) r300AllocTexObj(texObj);
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
+- return;
+- }
+- texObj->DriverData = t;
+- }
+-
+- _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
+- width, height, depth,
+- format, type, pixels, packing, texObj,
+- texImage);
+-
+- t->dirty_images[0] |= (1 << level);
+-}
+-
+-/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+@@ -908,7 +190,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
+ struct gl_texture_object *texObj,
+ GLenum pname, const GLfloat * params)
+ {
+- r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData;
++ radeonTexObj* t = radeon_tex_obj(texObj);
+
+ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+ fprintf(stderr, "%s( %s )\n", __FUNCTION__,
+@@ -941,7 +223,11 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
+ * we just have to rely on loading the right subset of mipmap levels
+ * to simulate a clamped LOD.
+ */
+- driSwapOutTextureObject((driTextureObject *) t);
++ if (t->mt) {
++ radeon_miptree_unreference(t->mt);
++ t->mt = 0;
++ t->validated = GL_FALSE;
++ }
+ break;
+
+ case GL_DEPTH_TEXTURE_MODE:
+@@ -964,27 +250,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
+ }
+ }
+
+-static void r300BindTexture(GLcontext * ctx, GLenum target,
+- struct gl_texture_object *texObj)
+-{
+- if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+- fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__,
+- (void *)texObj, ctx->Texture.CurrentUnit);
+- }
+-
+- if ((target == GL_TEXTURE_1D)
+- || (target == GL_TEXTURE_2D)
+- || (target == GL_TEXTURE_3D)
+- || (target == GL_TEXTURE_CUBE_MAP)
+- || (target == GL_TEXTURE_RECTANGLE_NV)) {
+- assert(texObj->DriverData != NULL);
+- }
+-}
+-
+ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+ {
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- driTextureObject *t = (driTextureObject *) texObj->DriverData;
++ radeonTexObj* t = radeon_tex_obj(texObj);
+
+ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+@@ -992,14 +261,24 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+ _mesa_lookup_enum_by_nr(texObj->Target));
+ }
+
+- if (t != NULL) {
+- if (rmesa) {
+- R300_FIREVERTICES(rmesa);
+- }
++ if (rmesa) {
++ int i;
++ radeon_firevertices(&rmesa->radeon);
++
++ for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
++ if (rmesa->hw.textures[i] == t)
++ rmesa->hw.textures[i] = 0;
++ }
+
+- driDestroyTextureObject(t);
++ if (t->bo) {
++ radeon_bo_unref(t->bo);
++ t->bo = NULL;
++ }
++
++ if (t->mt) {
++ radeon_miptree_unreference(t->mt);
++ t->mt = 0;
+ }
+- /* Free mipmap images and the texture object itself */
+ _mesa_delete_texture_object(ctx, texObj);
+ }
+
+@@ -1008,8 +287,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+- * Note: we could use containment here to 'derive' the driver-specific
+- * texture object from the core mesa gl_texture_object. Not done at this time.
+ * Fixup MaxAnisotropy according to user preference.
+ */
+ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
+@@ -1017,14 +294,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
+ GLenum target)
+ {
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- struct gl_texture_object *obj;
+- obj = _mesa_new_texture_object(ctx, name, target);
+- if (!obj)
+- return NULL;
+- obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
++ radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
++
+
+- r300AllocTexObj(obj);
+- return obj;
++ if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
++ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
++ t, _mesa_lookup_enum_by_nr(target));
++ }
++
++ _mesa_initialize_texture_object(&t->base, name, target);
++ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
++
++ /* Initialize hardware state */
++ r300UpdateTexWrap(t);
++ r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
++ r300SetTexBorderColor(t, t->base._BorderChan);
++
++ return &t->base;
+ }
+
+ void r300InitTextureFuncs(struct dd_function_table *functions)
+@@ -1032,22 +318,30 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
+ /* Note: we only plug in the functions we implement in the driver
+ * since _mesa_init_driver_functions() was already called.
+ */
+- functions->ChooseTextureFormat = r300ChooseTextureFormat;
+- functions->TexImage1D = r300TexImage1D;
+- functions->TexImage2D = r300TexImage2D;
+- functions->TexImage3D = r300TexImage3D;
+- functions->TexSubImage1D = r300TexSubImage1D;
+- functions->TexSubImage2D = r300TexSubImage2D;
+- functions->TexSubImage3D = r300TexSubImage3D;
++ functions->NewTextureImage = radeonNewTextureImage;
++ functions->FreeTexImageData = radeonFreeTexImageData;
++ functions->MapTexture = radeonMapTexture;
++ functions->UnmapTexture = radeonUnmapTexture;
++
++ functions->ChooseTextureFormat = radeonChooseTextureFormat;
++ functions->TexImage1D = radeonTexImage1D;
++ functions->TexImage2D = radeonTexImage2D;
++ functions->TexImage3D = radeonTexImage3D;
++ functions->TexSubImage1D = radeonTexSubImage1D;
++ functions->TexSubImage2D = radeonTexSubImage2D;
++ functions->TexSubImage3D = radeonTexSubImage3D;
++ functions->GetTexImage = radeonGetTexImage;
++ functions->GetCompressedTexImage = radeonGetCompressedTexImage;
+ functions->NewTextureObject = r300NewTextureObject;
+- functions->BindTexture = r300BindTexture;
+ functions->DeleteTexture = r300DeleteTexture;
+ functions->IsTextureResident = driIsTextureResident;
+
+ functions->TexParameter = r300TexParameter;
+
+- functions->CompressedTexImage2D = r300CompressedTexImage2D;
+- functions->CompressedTexSubImage2D = r300CompressedTexSubImage2D;
++ functions->CompressedTexImage2D = radeonCompressedTexImage2D;
++ functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
++
++ functions->GenerateMipmap = radeonGenerateMipmap;
+
+ driInitTextureFormats();
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
+index b86d45b..baad3fe 100644
+--- a/src/mesa/drivers/dri/r300/r300_tex.h
++++ b/src/mesa/drivers/dri/r300/r300_tex.h
+@@ -37,16 +37,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ extern void r300SetDepthTexMode(struct gl_texture_object *tObj);
+
++extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
++ __DRIdrawable *dPriv);
++
+ extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth,
+ GLuint pitch);
+
+-extern void r300UpdateTextureState(GLcontext * ctx);
+-
+-extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t,
+- GLuint face);
+-
+-extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t);
++extern GLboolean r300ValidateBuffers(GLcontext * ctx);
+
+ extern void r300InitTextureFuncs(struct dd_function_table *functions);
+
+diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c
+deleted file mode 100644
+index b03eefa..0000000
+--- a/src/mesa/drivers/dri/r300/r300_texmem.c
++++ /dev/null
+@@ -1,567 +0,0 @@
+-/**************************************************************************
+-
+-Copyright (C) Tungsten Graphics 2002. All Rights Reserved.
+-The Weather Channel, Inc. funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86
+-license. This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation on the rights to use, copy, modify, merge, publish,
+-distribute, sub license, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
+-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+-SOFTWARE.
+-
+-**************************************************************************/
+-
+-/**
+- * \file
+- *
+- * \author Gareth Hughes <gareth@valinux.com>
+- *
+- * \author Kevin E. Martin <martin@valinux.com>
+- */
+-
+-#include <errno.h>
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/context.h"
+-#include "main/colormac.h"
+-#include "main/macros.h"
+-#include "main/simple_list.h"
+-#include "radeon_reg.h" /* gets definition for usleep */
+-#include "r300_context.h"
+-#include "r300_state.h"
+-#include "r300_cmdbuf.h"
+-#include "radeon_ioctl.h"
+-#include "r300_tex.h"
+-#include "r300_ioctl.h"
+-#include <unistd.h> /* for usleep() */
+-
+-#ifdef USER_BUFFERS
+-#include "r300_mem.h"
+-#endif
+-
+-/**
+- * Destroy any device-dependent state associated with the texture. This may
+- * include NULLing out hardware state that points to the texture.
+- */
+-void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t)
+-{
+- int i;
+-
+- if (RADEON_DEBUG & DEBUG_TEXTURE) {
+- fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
+- (void *)t, (void *)t->base.tObj);
+- }
+-
+- for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
+- if (rmesa->state.texture.unit[i].texobj == t) {
+- rmesa->state.texture.unit[i].texobj = NULL;
+- }
+- }
+-}
+-
+-/* ------------------------------------------------------------
+- * Texture image conversions
+- */
+-
+-static void r300UploadGARTClientSubImage(r300ContextPtr rmesa,
+- r300TexObjPtr t,
+- struct gl_texture_image *texImage,
+- GLint hwlevel,
+- GLint x, GLint y,
+- GLint width, GLint height)
+-{
+- const struct gl_texture_format *texFormat = texImage->TexFormat;
+- GLuint srcPitch, dstPitch;
+- int blit_format;
+- int srcOffset;
+-
+- /*
+- * XXX it appears that we always upload the full image, not a subimage.
+- * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever
+- * changed, the src pitch will have to change.
+- */
+- switch (texFormat->TexelBytes) {
+- case 1:
+- blit_format = R300_CP_COLOR_FORMAT_CI8;
+- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+- break;
+- case 2:
+- blit_format = R300_CP_COLOR_FORMAT_RGB565;
+- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+- break;
+- case 4:
+- blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
+- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+- break;
+- case 8:
+- case 16:
+- blit_format = R300_CP_COLOR_FORMAT_CI8;
+- srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+- dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+- break;
+- default:
+- return;
+- }
+-
+- t->image[0][hwlevel].data = texImage->Data;
+- srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
+-
+- assert(srcOffset != ~0);
+-
+- /* Don't currently need to cope with small pitches?
+- */
+- width = texImage->Width;
+- height = texImage->Height;
+-
+- if (texFormat->TexelBytes > 4) {
+- width *= texFormat->TexelBytes;
+- }
+-
+- r300EmitWait(rmesa, R300_WAIT_3D);
+-
+- r300EmitBlit(rmesa, blit_format,
+- srcPitch,
+- srcOffset,
+- dstPitch,
+- t->bufAddr,
+- x,
+- y,
+- t->image[0][hwlevel].x + x,
+- t->image[0][hwlevel].y + y, width, height);
+-
+- r300EmitWait(rmesa, R300_WAIT_2D);
+-}
+-
+-static void r300UploadRectSubImage(r300ContextPtr rmesa,
+- r300TexObjPtr t,
+- struct gl_texture_image *texImage,
+- GLint x, GLint y, GLint width, GLint height)
+-{
+- const struct gl_texture_format *texFormat = texImage->TexFormat;
+- int blit_format, dstPitch, done;
+-
+- switch (texFormat->TexelBytes) {
+- case 1:
+- blit_format = R300_CP_COLOR_FORMAT_CI8;
+- break;
+- case 2:
+- blit_format = R300_CP_COLOR_FORMAT_RGB565;
+- break;
+- case 4:
+- blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
+- break;
+- case 8:
+- case 16:
+- blit_format = R300_CP_COLOR_FORMAT_CI8;
+- break;
+- default:
+- return;
+- }
+-
+- t->image[0][0].data = texImage->Data;
+-
+- /* Currently don't need to cope with small pitches.
+- */
+- width = texImage->Width;
+- height = texImage->Height;
+- dstPitch = t->pitch;
+-
+- if (texFormat->TexelBytes > 4) {
+- width *= texFormat->TexelBytes;
+- }
+-
+- if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
+- /* In this case, could also use GART texturing. This is
+- * currently disabled, but has been tested & works.
+- */
+- t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
+- t->pitch = texImage->RowStride * texFormat->TexelBytes - 32;
+-
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr,
+- "Using GART texturing for rectangular client texture\n");
+-
+- /* Release FB memory allocated for this image:
+- */
+- /* FIXME This may not be correct as driSwapOutTextureObject sets
+- * FIXME dirty_images. It may be fine, though.
+- */
+- if (t->base.memBlock) {
+- driSwapOutTextureObject((driTextureObject *) t);
+- }
+- } else if (texImage->IsClientData) {
+- /* Data already in GART memory, with usable pitch.
+- */
+- GLuint srcPitch;
+- srcPitch = texImage->RowStride * texFormat->TexelBytes;
+- r300EmitBlit(rmesa,
+- blit_format,
+- srcPitch,
+- r300GartOffsetFromVirtual(rmesa, texImage->Data),
+- dstPitch, t->bufAddr, 0, 0, 0, 0, width, height);
+- } else {
+- /* Data not in GART memory, or bad pitch.
+- */
+- for (done = 0; done < height;) {
+- struct r300_dma_region region;
+- int lines =
+- MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch);
+- int src_pitch;
+- char *tex;
+-
+- src_pitch = texImage->RowStride * texFormat->TexelBytes;
+-
+- tex = (char *)texImage->Data + done * src_pitch;
+-
+- memset(&region, 0, sizeof(region));
+- r300AllocDmaRegion(rmesa, &region, lines * dstPitch,
+- 1024);
+-
+- /* Copy texdata to dma:
+- */
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr,
+- "%s: src_pitch %d dst_pitch %d\n",
+- __FUNCTION__, src_pitch, dstPitch);
+-
+- if (src_pitch == dstPitch) {
+- memcpy(region.address + region.start, tex,
+- lines * src_pitch);
+- } else {
+- char *buf = region.address + region.start;
+- int i;
+- for (i = 0; i < lines; i++) {
+- memcpy(buf, tex, src_pitch);
+- buf += dstPitch;
+- tex += src_pitch;
+- }
+- }
+-
+- r300EmitWait(rmesa, R300_WAIT_3D);
+-
+- /* Blit to framebuffer
+- */
+- r300EmitBlit(rmesa,
+- blit_format,
+- dstPitch, GET_START(&region),
+- dstPitch | (t->tile_bits >> 16),
+- t->bufAddr, 0, 0, 0, done, width, lines);
+-
+- r300EmitWait(rmesa, R300_WAIT_2D);
+-#ifdef USER_BUFFERS
+- r300_mem_use(rmesa, region.buf->id);
+-#endif
+-
+- r300ReleaseDmaRegion(rmesa, &region, __FUNCTION__);
+- done += lines;
+- }
+- }
+-}
+-
+-/**
+- * Upload the texture image associated with texture \a t at the specified
+- * level at the address relative to \a start.
+- */
+-static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
+- GLint hwlevel,
+- GLint x, GLint y, GLint width, GLint height,
+- GLuint face)
+-{
+- struct gl_texture_image *texImage = NULL;
+- GLuint offset;
+- GLint imageWidth, imageHeight;
+- GLint ret;
+- drm_radeon_texture_t tex;
+- drm_radeon_tex_image_t tmp;
+- const int level = hwlevel + t->base.firstLevel;
+-
+- if (RADEON_DEBUG & DEBUG_TEXTURE) {
+- fprintf(stderr,
+- "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
+- __FUNCTION__, (void *)t, (void *)t->base.tObj, level,
+- width, height, face);
+- }
+-
+- ASSERT(face < 6);
+-
+- /* Ensure we have a valid texture to upload */
+- if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) {
+- _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+- return;
+- }
+-
+- texImage = t->base.tObj->Image[face][level];
+-
+- if (!texImage) {
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: texImage %d is NULL!\n",
+- __FUNCTION__, level);
+- return;
+- }
+- if (!texImage->Data) {
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: image data is NULL!\n",
+- __FUNCTION__);
+- return;
+- }
+-
+- if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+- assert(level == 0);
+- assert(hwlevel == 0);
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: image data is rectangular\n",
+- __FUNCTION__);
+- r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height);
+- return;
+- } else if (texImage->IsClientData) {
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr,
+- "%s: image data is in GART client storage\n",
+- __FUNCTION__);
+- r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y,
+- width, height);
+- return;
+- } else if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "%s: image data is in normal memory\n",
+- __FUNCTION__);
+-
+- imageWidth = texImage->Width;
+- imageHeight = texImage->Height;
+-
+- offset = t->bufAddr;
+-
+- if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
+- GLint imageX = 0;
+- GLint imageY = 0;
+- GLint blitX = t->image[face][hwlevel].x;
+- GLint blitY = t->image[face][hwlevel].y;
+- GLint blitWidth = t->image[face][hwlevel].width;
+- GLint blitHeight = t->image[face][hwlevel].height;
+- fprintf(stderr, " upload image: %d,%d at %d,%d\n",
+- imageWidth, imageHeight, imageX, imageY);
+- fprintf(stderr, " upload blit: %d,%d at %d,%d\n",
+- blitWidth, blitHeight, blitX, blitY);
+- fprintf(stderr, " blit ofs: 0x%07x level: %d/%d\n",
+- (GLuint) offset, hwlevel, level);
+- }
+-
+- t->image[face][hwlevel].data = texImage->Data;
+-
+- /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
+- * NOTE: we're always use a 1KB-wide blit and I8 texture format.
+- * We used to use 1, 2 and 4-byte texels and used to use the texture
+- * width to dictate the blit width - but that won't work for compressed
+- * textures. (Brian)
+- * NOTE: can't do that with texture tiling. (sroland)
+- */
+- tex.offset = offset;
+- tex.image = &tmp;
+- /* copy (x,y,width,height,data) */
+- memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp));
+-
+- if (texImage->TexFormat->TexelBytes > 4) {
+- const int log2TexelBytes =
+- (3 + (texImage->TexFormat->TexelBytes >> 4));
+- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+- tex.pitch =
+- MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
+- 64, 1);
+- tex.height = imageHeight;
+- tex.width = imageWidth << log2TexelBytes;
+- tex.offset += (tmp.x << log2TexelBytes) & ~1023;
+- tmp.x = tmp.x % (1024 >> log2TexelBytes);
+- tmp.width = tmp.width << log2TexelBytes;
+- } else if (texImage->TexFormat->TexelBytes) {
+- /* use multi-byte upload scheme */
+- tex.height = imageHeight;
+- tex.width = imageWidth;
+- switch (texImage->TexFormat->TexelBytes) {
+- case 1:
+- tex.format = RADEON_TXFORMAT_I8;
+- break;
+- case 2:
+- tex.format = RADEON_TXFORMAT_AI88;
+- break;
+- case 4:
+- tex.format = RADEON_TXFORMAT_ARGB8888;
+- break;
+- }
+- tex.pitch =
+- MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
+- 64, 1);
+- tex.offset += tmp.x & ~1023;
+- tmp.x = tmp.x % 1024;
+-
+- if (t->tile_bits & R300_TXO_MICRO_TILE) {
+- /* need something like "tiled coordinates" ? */
+- tmp.y = tmp.x / (tex.pitch * 128) * 2;
+- tmp.x =
+- tmp.x % (tex.pitch * 128) / 2 /
+- texImage->TexFormat->TexelBytes;
+- tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+- } else {
+- tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+- }
+-#if 1
+- if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
+- (texImage->Width * texImage->TexFormat->TexelBytes >= 256)
+- && ((!(t->tile_bits & R300_TXO_MICRO_TILE)
+- && (texImage->Height >= 8))
+- || (texImage->Height >= 16))) {
+- /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
+- OR if height is smaller than 8 automatically, but if micro tiling is active
+- the limit is height 16 instead ? */
+- tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+- }
+-#endif
+- } else {
+- /* In case of for instance 8x8 texture (2x2 dxt blocks),
+- padding after the first two blocks is needed (only
+- with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
+- /* set tex.height to 1/4 since 1 "macropixel" (dxt-block)
+- has 4 real pixels. Needed so the kernel module reads
+- the right amount of data. */
+- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+- tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
+- tex.height = (imageHeight + 3) / 4;
+- tex.width = (imageWidth + 3) / 4;
+- if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
+- tex.width *= 8;
+- } else {
+- tex.width *= 16;
+- }
+- }
+-
+- LOCK_HARDWARE(&rmesa->radeon);
+- do {
+- ret =
+- drmCommandWriteRead(rmesa->radeon.dri.fd,
+- DRM_RADEON_TEXTURE, &tex,
+- sizeof(drm_radeon_texture_t));
+- if (ret) {
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr,
+- "DRM_RADEON_TEXTURE: again!\n");
+- usleep(1);
+- }
+- } while (ret == -EAGAIN);
+-
+- UNLOCK_HARDWARE(&rmesa->radeon);
+-
+- if (ret) {
+- fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret);
+- fprintf(stderr, " offset=0x%08x\n", offset);
+- fprintf(stderr, " image width=%d height=%d\n",
+- imageWidth, imageHeight);
+- fprintf(stderr, " blit width=%d height=%d data=%p\n",
+- t->image[face][hwlevel].width,
+- t->image[face][hwlevel].height,
+- t->image[face][hwlevel].data);
+- _mesa_exit(-1);
+- }
+-}
+-
+-/**
+- * Upload the texture images associated with texture \a t. This might
+- * require the allocation of texture memory.
+- *
+- * \param rmesa Context pointer
+- * \param t Texture to be uploaded
+- * \param face Cube map face to be uploaded. Zero for non-cube maps.
+- */
+-
+-int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
+-{
+- const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+- if (t->image_override)
+- return 0;
+-
+- if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
+- fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
+- (void *)rmesa->radeon.glCtx, (void *)t->base.tObj,
+- t->base.totalSize, t->base.firstLevel,
+- t->base.lastLevel);
+- }
+-
+- if (t->base.totalSize == 0)
+- return 0;
+-
+- if (RADEON_DEBUG & DEBUG_SYNC) {
+- fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+- radeonFinish(rmesa->radeon.glCtx);
+- }
+-
+- LOCK_HARDWARE(&rmesa->radeon);
+-
+- if (t->base.memBlock == NULL) {
+- int heap;
+-
+- heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps,
+- (driTextureObject *) t);
+- if (heap == -1) {
+- UNLOCK_HARDWARE(&rmesa->radeon);
+- return -1;
+- }
+-
+- /* Set the base offset of the texture image */
+- t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap]
+- + t->base.memBlock->ofs;
+- t->offset = t->bufAddr;
+-
+- if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+- /* hope it's safe to add that here... */
+- t->offset |= t->tile_bits;
+- }
+- }
+-
+- /* Let the world know we've used this memory recently.
+- */
+- driUpdateTextureLRU((driTextureObject *) t);
+- UNLOCK_HARDWARE(&rmesa->radeon);
+-
+- /* Upload any images that are new */
+- if (t->base.dirty_images[face]) {
+- int i;
+- for (i = 0; i < numLevels; i++) {
+- if ((t->base.
+- dirty_images[face] & (1 <<
+- (i + t->base.firstLevel))) !=
+- 0) {
+- r300UploadSubImage(rmesa, t, i, 0, 0,
+- t->image[face][i].width,
+- t->image[face][i].height,
+- face);
+- }
+- }
+- t->base.dirty_images[face] = 0;
+- }
+-
+- if (RADEON_DEBUG & DEBUG_SYNC) {
+- fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+- radeonFinish(rmesa->radeon.glCtx);
+- }
+-
+- return 0;
+-}
+diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
+index e2329f0..8a90069 100644
+--- a/src/mesa/drivers/dri/r300/r300_texstate.c
++++ b/src/mesa/drivers/dri/r300/r300_texstate.c
+@@ -47,7 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_context.h"
+ #include "r300_state.h"
+ #include "r300_ioctl.h"
+-#include "radeon_ioctl.h"
++#include "radeon_mipmap_tree.h"
+ #include "r300_tex.h"
+ #include "r300_reg.h"
+
+@@ -143,13 +143,12 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
+ },
+ };
+ const GLuint *format;
+- r300TexObjPtr t;
++ radeonTexObjPtr t;
+
+ if (!tObj)
+ return;
+
+- t = (r300TexObjPtr) tObj->DriverData;
+-
++ t = radeon_tex_obj(tObj);
+
+ switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
+ case MESA_FORMAT_Z16:
+@@ -171,13 +170,13 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
+
+ switch (tObj->DepthMode) {
+ case GL_LUMINANCE:
+- t->format = format[0];
++ t->pp_txformat = format[0];
+ break;
+ case GL_INTENSITY:
+- t->format = format[1];
++ t->pp_txformat = format[1];
+ break;
+ case GL_ALPHA:
+- t->format = format[2];
++ t->pp_txformat = format[2];
+ break;
+ default:
+ /* Error...which should have already been caught by higher
+@@ -190,479 +189,309 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
+
+
+ /**
+- * Compute sizes and fill in offset and blit information for the given
+- * image (determined by \p face and \p level).
+- *
+- * \param curOffset points to the offset at which the image is to be stored
+- * and is updated by this function according to the size of the image.
+- */
+-static void compute_tex_image_offset(
+- struct gl_texture_object *tObj,
+- GLuint face,
+- GLint level,
+- GLint* curOffset)
+-{
+- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+- const struct gl_texture_image* texImage;
+- GLuint blitWidth = R300_BLIT_WIDTH_BYTES;
+- GLuint texelBytes;
+- GLuint size;
+-
+- texImage = tObj->Image[0][level + t->base.firstLevel];
+- if (!texImage)
+- return;
+-
+- texelBytes = texImage->TexFormat->TexelBytes;
+-
+- /* find image size in bytes */
+- if (texImage->IsCompressed) {
+- if ((t->format & R300_TX_FORMAT_DXT1) ==
+- R300_TX_FORMAT_DXT1) {
+- // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
+- if ((texImage->Width + 3) < 8) /* width one block */
+- size = texImage->CompressedSize * 4;
+- else if ((texImage->Width + 3) < 16)
+- size = texImage->CompressedSize * 2;
+- else
+- size = texImage->CompressedSize;
+- } else {
+- /* DXT3/5, 16 bytes per block */
+- WARN_ONCE
+- ("DXT 3/5 suffers from multitexturing problems!\n");
+- // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
+- if ((texImage->Width + 3) < 8)
+- size = texImage->CompressedSize * 2;
+- else
+- size = texImage->CompressedSize;
+- }
+- } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+- size =
+- ((texImage->Width * texelBytes +
+- 63) & ~63) * texImage->Height;
+- blitWidth = 64 / texelBytes;
+- } else if (t->tile_bits & R300_TXO_MICRO_TILE) {
+- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+- though the actual offset may be different (if texture is less than
+- 32 bytes width) to the untiled case */
+- int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+- size =
+- (w * ((texImage->Height + 1) / 2)) *
+- texImage->Depth;
+- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+- } else {
+- int w = (texImage->Width * texelBytes + 31) & ~31;
+- size = w * texImage->Height * texImage->Depth;
+- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+- }
+- assert(size > 0);
+-
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
+- texImage->Width, texImage->Height,
+- texImage->Depth,
+- texImage->TexFormat->TexelBytes,
+- texImage->InternalFormat);
+-
+- /* All images are aligned to a 32-byte offset */
+- *curOffset = (*curOffset + 0x1f) & ~0x1f;
+-
+- if (texelBytes) {
+- /* fix x and y coords up later together with offset */
+- t->image[face][level].x = *curOffset;
+- t->image[face][level].y = 0;
+- t->image[face][level].width =
+- MIN2(size / texelBytes, blitWidth);
+- t->image[face][level].height =
+- (size / texelBytes) / t->image[face][level].width;
+- } else {
+- t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES;
+- t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES;
+- t->image[face][level].width =
+- MIN2(size, R300_BLIT_WIDTH_BYTES);
+- t->image[face][level].height = size / t->image[face][level].width;
+- }
+-
+- if (RADEON_DEBUG & DEBUG_TEXTURE)
+- fprintf(stderr,
+- "level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+- level, face, texImage->Width, texImage->Height,
+- t->image[face][level].x, t->image[face][level].y,
+- t->image[face][level].width, t->image[face][level].height,
+- size, *curOffset);
+-
+- *curOffset += size;
+-}
+-
+-
+-
+-/**
+- * This function computes the number of bytes of storage needed for
+- * the given texture object (all mipmap levels, all cube faces).
+- * The \c image[face][level].x/y/width/height parameters for upload/blitting
+- * are computed here. \c filter, \c format, etc. will be set here
+- * too.
++ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+- * \param tObj GL texture object whose images are to be posted to
+- * hardware state.
++ * \param t the r300 texture object
+ */
+-static void r300SetTexImages(r300ContextPtr rmesa,
+- struct gl_texture_object *tObj)
++static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
+ {
+- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+- const struct gl_texture_image *baseImage =
+- tObj->Image[0][tObj->BaseLevel];
+- GLint curOffset;
+- GLint i, texelBytes;
+- GLint numLevels;
+- GLint log2Width, log2Height, log2Depth;
+-
+- /* Set the hardware texture format
+- */
++ const struct gl_texture_image *firstImage;
++ int firstlevel = t->mt ? t->mt->firstLevel : 0;
++
++ firstImage = t->base.Image[0][firstlevel];
++
+ if (!t->image_override
+- && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) {
+- if (baseImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
+- r300SetDepthTexMode(tObj);
++ && VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
++ if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
++ r300SetDepthTexMode(&t->base);
+ } else {
+- t->format = tx_table[baseImage->TexFormat->MesaFormat].format;
++ t->pp_txformat = tx_table[firstImage->TexFormat->MesaFormat].format;
+ }
+
+- t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter;
++ t->pp_txfilter |= tx_table[firstImage->TexFormat->MesaFormat].filter;
+ } else if (!t->image_override) {
+ _mesa_problem(NULL, "unexpected texture format in %s",
+ __FUNCTION__);
+ return;
+ }
+
+- texelBytes = baseImage->TexFormat->TexelBytes;
+-
+- /* Compute which mipmap levels we really want to send to the hardware.
+- */
+- driCalculateTextureFirstLastLevel((driTextureObject *) t);
+- log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+- log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+- log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
+-
+- numLevels = t->base.lastLevel - t->base.firstLevel + 1;
++ if (t->image_override && t->bo)
++ return;
+
+- assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
++ t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
++ | ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)
++ | ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)
++ | ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT));
+
+- /* Calculate mipmap offsets and dimensions for blitting (uploading)
+- * The idea is that we lay out the mipmap levels within a block of
+- * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
+- */
+ t->tile_bits = 0;
+
+- /* figure out if this texture is suitable for tiling. */
+-#if 0 /* Disabled for now */
+- if (texelBytes) {
+- if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
+- /* texrect might be able to use micro tiling too in theory? */
+- (baseImage->Height > 1)) {
+-
+- /* allow 32 (bytes) x 1 mip (which will use two times the space
+- the non-tiled version would use) max if base texture is large enough */
+- if ((numLevels == 1) ||
+- (((baseImage->Width * texelBytes /
+- baseImage->Height) <= 32)
+- && (baseImage->Width * texelBytes > 64))
+- ||
+- ((baseImage->Width * texelBytes /
+- baseImage->Height) <= 16)) {
+- t->tile_bits |= R300_TXO_MICRO_TILE;
+- }
+- }
++ if (t->base.Target == GL_TEXTURE_CUBE_MAP)
++ t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP;
++ if (t->base.Target == GL_TEXTURE_3D)
++ t->pp_txformat |= R300_TX_FORMAT_3D;
+
+- if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
+- /* we can set macro tiling even for small textures, they will be untiled anyway */
+- t->tile_bits |= R300_TXO_MACRO_TILE;
+- }
+- }
+-#endif
+
+- curOffset = 0;
+-
+- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+- ASSERT(log2Width == log2Height);
+- t->format |= R300_TX_FORMAT_CUBIC_MAP;
+-
+- for(i = 0; i < numLevels; i++) {
+- GLuint face;
+- for(face = 0; face < 6; face++)
+- compute_tex_image_offset(tObj, face, i, &curOffset);
+- }
+- } else {
+- if (tObj->Target == GL_TEXTURE_3D)
+- t->format |= R300_TX_FORMAT_3D;
+-
+- for (i = 0; i < numLevels; i++)
+- compute_tex_image_offset(tObj, 0, i, &curOffset);
+- }
+-
+- /* Align the total size of texture memory block.
+- */
+- t->base.totalSize =
+- (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+-
+- t->size =
+- (((tObj->Image[0][t->base.firstLevel]->Width -
+- 1) << R300_TX_WIDTHMASK_SHIFT)
+- | ((tObj->Image[0][t->base.firstLevel]->Height - 1) <<
+- R300_TX_HEIGHTMASK_SHIFT)
+- | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) <<
+- R300_TX_DEPTHMASK_SHIFT))
+- | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
+-
+- t->pitch = 0;
+-
+- /* Only need to round to nearest 32 for textures, but the blitter
+- * requires 64-byte aligned pitches, and we may/may not need the
+- * blitter. NPOT only!
+- */
+- if (baseImage->IsCompressed) {
+- t->pitch |=
+- (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
+- } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+- unsigned int align = (64 / texelBytes) - 1;
+- t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width *
+- texelBytes) + 63) & ~(63);
+- t->size |= R300_TX_SIZE_TXPITCH_EN;
++ if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
++ unsigned int align = (64 / t->mt->bpp) - 1;
++ t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
+ if (!t->image_override)
+- t->pitch_reg =
+- (((tObj->Image[0][t->base.firstLevel]->Width) +
+- align) & ~align) - 1;
+- } else {
+- t->pitch |=
+- ((tObj->Image[0][t->base.firstLevel]->Width *
+- texelBytes) + 63) & ~(63);
++ t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1;
+ }
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+- if (tObj->Image[0][t->base.firstLevel]->Width > 2048)
+- t->pitch_reg |= R500_TXWIDTH_BIT11;
+- if (tObj->Image[0][t->base.firstLevel]->Height > 2048)
+- t->pitch_reg |= R500_TXHEIGHT_BIT11;
++ if (firstImage->Width > 2048)
++ t->pp_txpitch |= R500_TXWIDTH_BIT11;
++ if (firstImage->Height > 2048)
++ t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+ }
+ }
+
+-/* ================================================================
+- * Texture unit state management
++/**
++ * Ensure the given texture is ready for rendering.
++ *
++ * Mostly this means populating the texture object's mipmap tree.
+ */
+-
+-static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit)
++static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj)
+ {
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+-
+- ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
++ radeonTexObj *t = radeon_tex_obj(texObj);
+
+- if (t->base.dirty_images[0]) {
+- R300_FIREVERTICES(rmesa);
++ if (!radeon_validate_texture_miptree(ctx, texObj))
++ return GL_FALSE;
+
+- r300SetTexImages(rmesa, tObj);
+- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+- if (!t->base.memBlock && !t->image_override)
+- return GL_FALSE;
+- }
++ /* Configure the hardware registers (more precisely, the cached version
++ * of the hardware registers). */
++ setup_hardware_state(rmesa, t);
+
++ t->validated = GL_TRUE;
+ return GL_TRUE;
+ }
+
+-static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
++
++/**
++ * Ensure all enabled and complete textures are uploaded along with any buffers being used.
++ */
++GLboolean r300ValidateBuffers(GLcontext * ctx)
+ {
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+-
+- ASSERT(tObj->Target == GL_TEXTURE_3D);
+-
+- /* r300 does not support mipmaps for 3D textures. */
+- if ((tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR)) {
+- return GL_FALSE;
++ struct radeon_cs_space_check bos[16];
++ struct radeon_renderbuffer *rrb;
++ int num_bo = 0;
++ int i;
++ int flushed = 0, ret;
++again:
++ num_bo = 0;
++
++ rrb = radeon_get_colorbuffer(&rmesa->radeon);
++ /* color buffer */
++ if (rrb && rrb->bo) {
++ bos[num_bo].bo = rrb->bo;
++ bos[num_bo].read_domains = 0;
++ bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
++ bos[num_bo].new_accounted = 0;
++ num_bo++;
+ }
+
+- if (t->base.dirty_images[0]) {
+- R300_FIREVERTICES(rmesa);
+- r300SetTexImages(rmesa, tObj);
+- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+- if (!t->base.memBlock)
+- return GL_FALSE;
++ /* depth buffer */
++ rrb = radeon_get_depthbuffer(&rmesa->radeon);
++ /* color buffer */
++ if (rrb && rrb->bo) {
++ bos[num_bo].bo = rrb->bo;
++ bos[num_bo].read_domains = 0;
++ bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
++ bos[num_bo].new_accounted = 0;
++ num_bo++;
+ }
++
++ for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
++ radeonTexObj *t;
+
+- return GL_TRUE;
+-}
++ if (!ctx->Texture.Unit[i]._ReallyEnabled)
++ continue;
+
+-static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
+-{
+- r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+- GLuint face;
+-
+- ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+-
+- if (t->base.dirty_images[0] || t->base.dirty_images[1] ||
+- t->base.dirty_images[2] || t->base.dirty_images[3] ||
+- t->base.dirty_images[4] || t->base.dirty_images[5]) {
+- /* flush */
+- R300_FIREVERTICES(rmesa);
+- /* layout memory space, once for all faces */
+- r300SetTexImages(rmesa, tObj);
+- }
+-
+- /* upload (per face) */
+- for (face = 0; face < 6; face++) {
+- if (t->base.dirty_images[face]) {
+- r300UploadTexImages(rmesa,
+- (r300TexObjPtr) tObj->DriverData,
+- face);
++ if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) {
++ _mesa_warning(ctx,
++ "failed to validate texture for unit %d.\n",
++ i);
+ }
++ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
++ if (t->image_override && t->bo)
++ bos[num_bo].bo = t->bo;
++ else if (t->mt->bo)
++ bos[num_bo].bo = t->mt->bo;
++ bos[num_bo].read_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
++ bos[num_bo].write_domain = 0;
++ bos[num_bo].new_accounted = 0;
++ num_bo++;
+ }
+
+- if (!t->base.memBlock) {
+- /* texmem alloc failed, use s/w fallback */
++ ret = radeon_cs_space_check(rmesa->radeon.cmdbuf.cs, bos, num_bo);
++ if (ret == RADEON_CS_SPACE_OP_TO_BIG)
+ return GL_FALSE;
+- }
+-
+- return GL_TRUE;
+-}
+-
+-static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
+-{
+- r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+-
+- ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+-
+- if (t->base.dirty_images[0]) {
+- R300_FIREVERTICES(rmesa);
+-
+- r300SetTexImages(rmesa, tObj);
+- r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+- if (!t->base.memBlock && !t->image_override &&
+- !rmesa->prefer_gart_client_texturing)
++ if (ret == RADEON_CS_SPACE_FLUSH) {
++ radeonFlush(ctx);
++ if (flushed)
+ return GL_FALSE;
++ flushed = 1;
++ goto again;
+ }
+-
+ return GL_TRUE;
+ }
+
+-static GLboolean r300UpdateTexture(GLcontext * ctx, int unit)
+-{
+- r300ContextPtr rmesa = R300_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+-
+- /* Fallback if there's a texture border */
+- if (tObj->Image[0][tObj->BaseLevel]->Border > 0)
+- return GL_FALSE;
+-
+- /* Update state if this is a different texture object to last
+- * time.
+- */
+- if (rmesa->state.texture.unit[unit].texobj != t) {
+- if (rmesa->state.texture.unit[unit].texobj != NULL) {
+- /* The old texture is no longer bound to this texture unit.
+- * Mark it as such.
+- */
+-
+- rmesa->state.texture.unit[unit].texobj->base.bound &=
+- ~(1 << unit);
+- }
+-
+- rmesa->state.texture.unit[unit].texobj = t;
+- t->base.bound |= (1 << unit);
+- driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */
+- }
+-
+- return !t->border_fallback;
+-}
+-
+ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth, GLuint pitch)
+ {
+ r300ContextPtr rmesa = pDRICtx->driverPrivate;
+ struct gl_texture_object *tObj =
+ _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+- r300TexObjPtr t;
++ radeonTexObjPtr t = radeon_tex_obj(tObj);
+ uint32_t pitch_val;
+
+ if (!tObj)
+ return;
+
+- t = (r300TexObjPtr) tObj->DriverData;
+-
+ t->image_override = GL_TRUE;
+
+ if (!offset)
+ return;
+
+- t->offset = offset;
+- t->pitch_reg &= (1 << 13) -1;
++ t->bo = NULL;
++ t->override_offset = offset;
++ t->pp_txpitch &= (1 << 13) -1;
+ pitch_val = pitch;
+
+ switch (depth) {
+ case 32:
+- t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+- t->filter |= tx_table[2].filter;
++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
++ t->pp_txfilter |= tx_table[2].filter;
+ pitch_val /= 4;
+ break;
+ case 24:
+ default:
+- t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+- t->filter |= tx_table[4].filter;
++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
++ t->pp_txfilter |= tx_table[4].filter;
+ pitch_val /= 4;
+ break;
+ case 16:
+- t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+- t->filter |= tx_table[5].filter;
++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
++ t->pp_txfilter |= tx_table[5].filter;
+ pitch_val /= 2;
+ break;
+ }
+ pitch_val--;
+
+- t->pitch_reg |= pitch_val;
++ t->pp_txpitch |= pitch_val;
+ }
+
+-static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit)
++void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+ {
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-
+- if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) {
+- return (r300EnableTextureRect(ctx, unit) &&
+- r300UpdateTexture(ctx, unit));
+- } else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
+- return (r300EnableTexture2D(ctx, unit) &&
+- r300UpdateTexture(ctx, unit));
+- } else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) {
+- return (r300EnableTexture3D(ctx, unit) &&
+- r300UpdateTexture(ctx, unit));
+- } else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) {
+- return (r300EnableTextureCube(ctx, unit) &&
+- r300UpdateTexture(ctx, unit));
+- } else if (texUnit->_ReallyEnabled) {
+- return GL_FALSE;
+- } else {
+- return GL_TRUE;
+- }
+-}
++ struct gl_texture_unit *texUnit;
++ struct gl_texture_object *texObj;
++ struct gl_texture_image *texImage;
++ struct radeon_renderbuffer *rb;
++ radeon_texture_image *rImage;
++ radeonContextPtr radeon;
++ r300ContextPtr rmesa;
++ GLframebuffer *fb;
++ radeonTexObjPtr t;
++ uint32_t pitch_val;
+
+-void r300UpdateTextureState(GLcontext * ctx)
+-{
+- int i;
++ target = GL_TEXTURE_RECTANGLE_ARB;
+
+- for (i = 0; i < 8; i++) {
+- if (!r300UpdateTextureUnit(ctx, i)) {
+- _mesa_warning(ctx,
+- "failed to update texture state for unit %d.\n",
+- i);
+- }
++ radeon = pDRICtx->driverPrivate;
++ rmesa = pDRICtx->driverPrivate;
++
++ fb = dPriv->driverPrivate;
++ texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
++ texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
++ texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
++
++ rImage = get_radeon_texture_image(texImage);
++ t = radeon_tex_obj(texObj);
++ if (t == NULL) {
++ return;
++ }
++
++ radeon_update_renderbuffers(pDRICtx, dPriv);
++ /* back & depth buffer are useless free them right away */
++ rb = (void*)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
++ if (rb && rb->bo) {
++ radeon_bo_unref(rb->bo);
++ rb->bo = NULL;
++ }
++ rb = (void*)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++ if (rb && rb->bo) {
++ radeon_bo_unref(rb->bo);
++ rb->bo = NULL;
++ }
++ rb = (void*)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++ if (rb->bo == NULL) {
++ /* Failed to BO for the buffer */
++ return;
++ }
++
++ _mesa_lock_texture(radeon->glCtx, texObj);
++ if (t->bo) {
++ radeon_bo_unref(t->bo);
++ t->bo = NULL;
++ }
++ if (rImage->bo) {
++ radeon_bo_unref(rImage->bo);
++ rImage->bo = NULL;
++ }
++ if (t->mt) {
++ radeon_miptree_unreference(t->mt);
++ t->mt = NULL;
++ }
++ if (rImage->mt) {
++ radeon_miptree_unreference(rImage->mt);
++ rImage->mt = NULL;
++ }
++ fprintf(stderr,"settexbuf %dx%d@%d\n", rb->width, rb->height, rb->cpp);
++ _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
++ rb->width, rb->height, 1, 0, rb->cpp);
++ texImage->TexFormat = &_mesa_texformat_rgba8888_rev;
++ rImage->bo = rb->bo;
++ radeon_bo_ref(rImage->bo);
++ t->bo = rb->bo;
++ radeon_bo_ref(t->bo);
++ t->tile_bits = 0;
++ t->image_override = GL_TRUE;
++ t->override_offset = 0;
++ t->pp_txpitch &= (1 << 13) -1;
++ pitch_val = rb->pitch;
++ switch (rb->cpp) {
++ case 4:
++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
++ t->pp_txfilter |= tx_table[2].filter;
++ pitch_val /= 4;
++ break;
++ case 3:
++ default:
++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
++ t->pp_txfilter |= tx_table[4].filter;
++ pitch_val /= 4;
++ break;
++ case 2:
++ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
++ t->pp_txfilter |= tx_table[5].filter;
++ pitch_val /= 2;
++ break;
++ }
++ pitch_val--;
++ t->pp_txsize = ((rb->width - 1) << R300_TX_WIDTHMASK_SHIFT) |
++ ((rb->height - 1) << R300_TX_HEIGHTMASK_SHIFT);
++ t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
++ t->pp_txpitch |= pitch_val;
++
++ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
++ if (rb->width > 2048)
++ t->pp_txpitch |= R500_TXWIDTH_BIT11;
++ if (rb->height > 2048)
++ t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+ }
++ t->validated = GL_TRUE;
++ _mesa_unlock_texture(radeon->glCtx, texObj);
++ return;
+ }
+diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
+index 75dae86..926ddd5 100644
+--- a/src/mesa/drivers/dri/r300/r500_fragprog.c
++++ b/src/mesa/drivers/dri/r300/r500_fragprog.c
+@@ -31,6 +31,12 @@
+ #include "radeon_program_alu.h"
+
+
++static void reset_srcreg(struct prog_src_register* reg)
++{
++ _mesa_bzero(reg, sizeof(*reg));
++ reg->Swizzle = SWIZZLE_NOOP;
++}
++
+ static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
+ {
+ gl_state_index fail_value_tokens[STATE_LENGTH] = {
+@@ -99,6 +105,19 @@ static GLboolean transform_TEX(
+ destredirect = GL_TRUE;
+ }
+
++ if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
++ int tmpreg = radeonFindFreeTemporary(t);
++ tgt = radeonAppendInstructions(t->Program, 1);
++ tgt->Opcode = OPCODE_MOV;
++ tgt->DstReg.File = PROGRAM_TEMPORARY;
++ tgt->DstReg.Index = tmpreg;
++ tgt->SrcReg[0] = inst.SrcReg[0];
++
++ reset_srcreg(&inst.SrcReg[0]);
++ inst.SrcReg[0].File = PROGRAM_TEMPORARY;
++ inst.SrcReg[0].Index = tmpreg;
++ }
++
+ tgt = radeonAppendInstructions(t->Program, 1);
+ _mesa_copy_instructions(tgt, &inst, 1);
+
+diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c
+deleted file mode 100644
+index 5267fe9..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_context.c
++++ /dev/null
+@@ -1,330 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/**
+- * \file radeon_context.c
+- * Common context initialization.
+- *
+- * \author Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#include <dlfcn.h>
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/context.h"
+-#include "main/state.h"
+-#include "main/matrix.h"
+-#include "main/framebuffer.h"
+-
+-#include "drivers/common/driverfuncs.h"
+-#include "swrast/swrast.h"
+-
+-#include "radeon_screen.h"
+-#include "radeon_ioctl.h"
+-#include "radeon_macros.h"
+-#include "radeon_reg.h"
+-
+-#include "radeon_state.h"
+-#include "r300_state.h"
+-
+-#include "utils.h"
+-#include "vblank.h"
+-#include "xmlpool.h" /* for symbolic values of enum-type options */
+-
+-#define DRIVER_DATE "20060815"
+-
+-
+-/* Return various strings for glGetString().
+- */
+-static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
+-{
+- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+- static char buffer[128];
+-
+- switch (name) {
+- case GL_VENDOR:
+- if (IS_R300_CLASS(radeon->radeonScreen))
+- return (GLubyte *) "DRI R300 Project";
+- else
+- return (GLubyte *) "Tungsten Graphics, Inc.";
+-
+- case GL_RENDERER:
+- {
+- unsigned offset;
+- GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
+- radeon->radeonScreen->AGPMode;
+- const char* chipname;
+-
+- if (IS_R300_CLASS(radeon->radeonScreen))
+- chipname = "R300";
+- else
+- chipname = "R200";
+-
+- offset = driGetRendererString(buffer, chipname, DRIVER_DATE,
+- agp_mode);
+-
+- if (IS_R300_CLASS(radeon->radeonScreen)) {
+- sprintf(&buffer[offset], " %sTCL",
+- (radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
+- ? "" : "NO-");
+- } else {
+- sprintf(&buffer[offset], " %sTCL",
+- !(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+- ? "" : "NO-");
+- }
+-
+- return (GLubyte *) buffer;
+- }
+-
+- default:
+- return NULL;
+- }
+-}
+-
+-/* Initialize the driver's misc functions.
+- */
+-static void radeonInitDriverFuncs(struct dd_function_table *functions)
+-{
+- functions->GetString = radeonGetString;
+-}
+-
+-
+-/**
+- * Create and initialize all common fields of the context,
+- * including the Mesa context itself.
+- */
+-GLboolean radeonInitContext(radeonContextPtr radeon,
+- struct dd_function_table* functions,
+- const __GLcontextModes * glVisual,
+- __DRIcontextPrivate * driContextPriv,
+- void *sharedContextPrivate)
+-{
+- __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+- radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+- GLcontext* ctx;
+- GLcontext* shareCtx;
+- int fthrottle_mode;
+-
+- /* Fill in additional standard functions. */
+- radeonInitDriverFuncs(functions);
+-
+- radeon->radeonScreen = screen;
+- /* Allocate and initialize the Mesa context */
+- if (sharedContextPrivate)
+- shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
+- else
+- shareCtx = NULL;
+- radeon->glCtx = _mesa_create_context(glVisual, shareCtx,
+- functions, (void *)radeon);
+- if (!radeon->glCtx)
+- return GL_FALSE;
+-
+- ctx = radeon->glCtx;
+- driContextPriv->driverPrivate = radeon;
+-
+- /* DRI fields */
+- radeon->dri.context = driContextPriv;
+- radeon->dri.screen = sPriv;
+- radeon->dri.drawable = NULL;
+- radeon->dri.readable = NULL;
+- radeon->dri.hwContext = driContextPriv->hHWContext;
+- radeon->dri.hwLock = &sPriv->pSAREA->lock;
+- radeon->dri.fd = sPriv->fd;
+- radeon->dri.drmMinor = sPriv->drm_version.minor;
+-
+- radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
+- screen->sarea_priv_offset);
+-
+- /* Setup IRQs */
+- fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
+- radeon->iw.irq_seq = -1;
+- radeon->irqsEmitted = 0;
+- radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
+- radeon->radeonScreen->irq);
+-
+- radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+-
+- if (!radeon->do_irqs)
+- fprintf(stderr,
+- "IRQ's not enabled, falling back to %s: %d %d\n",
+- radeon->do_usleeps ? "usleeps" : "busy waits",
+- fthrottle_mode, radeon->radeonScreen->irq);
+-
+- (*sPriv->systemTime->getUST) (&radeon->swap_ust);
+-
+- return GL_TRUE;
+-}
+-
+-
+-/**
+- * Cleanup common context fields.
+- * Called by r200DestroyContext/r300DestroyContext
+- */
+-void radeonCleanupContext(radeonContextPtr radeon)
+-{
+- /* _mesa_destroy_context() might result in calls to functions that
+- * depend on the DriverCtx, so don't set it to NULL before.
+- *
+- * radeon->glCtx->DriverCtx = NULL;
+- */
+-
+- /* free the Mesa context */
+- _mesa_destroy_context(radeon->glCtx);
+-
+- if (radeon->state.scissor.pClipRects) {
+- FREE(radeon->state.scissor.pClipRects);
+- radeon->state.scissor.pClipRects = 0;
+- }
+-}
+-
+-
+-/**
+- * Swap front and back buffer.
+- */
+-void radeonSwapBuffers(__DRIdrawablePrivate * dPriv)
+-{
+- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+- radeonContextPtr radeon;
+- GLcontext *ctx;
+-
+- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+- ctx = radeon->glCtx;
+-
+- if (ctx->Visual.doubleBufferMode) {
+- _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */
+- if (radeon->doPageFlip) {
+- radeonPageFlip(dPriv);
+- } else {
+- radeonCopyBuffer(dPriv, NULL);
+- }
+- }
+- } else {
+- /* XXX this shouldn't be an error but we can't handle it for now */
+- _mesa_problem(NULL, "%s: drawable has no context!",
+- __FUNCTION__);
+- }
+-}
+-
+-void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+- int x, int y, int w, int h )
+-{
+- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+- radeonContextPtr radeon;
+- GLcontext *ctx;
+-
+- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+- ctx = radeon->glCtx;
+-
+- if (ctx->Visual.doubleBufferMode) {
+- drm_clip_rect_t rect;
+- rect.x1 = x + dPriv->x;
+- rect.y1 = (dPriv->h - y - h) + dPriv->y;
+- rect.x2 = rect.x1 + w;
+- rect.y2 = rect.y1 + h;
+- _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */
+- radeonCopyBuffer(dPriv, &rect);
+- }
+- } else {
+- /* XXX this shouldn't be an error but we can't handle it for now */
+- _mesa_problem(NULL, "%s: drawable has no context!",
+- __FUNCTION__);
+- }
+-}
+-
+-/* Force the context `c' to be the current context and associate with it
+- * buffer `b'.
+- */
+-GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+- __DRIdrawablePrivate * driDrawPriv,
+- __DRIdrawablePrivate * driReadPriv)
+-{
+- if (driContextPriv) {
+- radeonContextPtr radeon =
+- (radeonContextPtr) driContextPriv->driverPrivate;
+-
+- if (RADEON_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
+- radeon->glCtx);
+-
+- if (radeon->dri.drawable != driDrawPriv) {
+- if (driDrawPriv->swap_interval == (unsigned)-1) {
+- driDrawPriv->vblFlags =
+- (radeon->radeonScreen->irq != 0)
+- ? driGetDefaultVBlankFlags(&radeon->
+- optionCache)
+- : VBLANK_FLAG_NO_IRQ;
+-
+- driDrawableInitVBlank(driDrawPriv);
+- }
+- }
+-
+- radeon->dri.readable = driReadPriv;
+-
+- if (radeon->dri.drawable != driDrawPriv ||
+- radeon->lastStamp != driDrawPriv->lastStamp) {
+- radeon->dri.drawable = driDrawPriv;
+-
+- radeonSetCliprects(radeon);
+- r300UpdateViewportOffset(radeon->glCtx);
+- }
+-
+- _mesa_make_current(radeon->glCtx,
+- (GLframebuffer *) driDrawPriv->
+- driverPrivate,
+- (GLframebuffer *) driReadPriv->
+- driverPrivate);
+-
+- _mesa_update_state(radeon->glCtx);
+-
+- radeonUpdatePageFlipping(radeon);
+- } else {
+- if (RADEON_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+- _mesa_make_current(0, 0, 0);
+- }
+-
+- if (RADEON_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "End %s\n", __FUNCTION__);
+- return GL_TRUE;
+-}
+-
+-/* Force the context `c' to be unbound from its buffer.
+- */
+-GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv)
+-{
+- radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+-
+- if (RADEON_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
+- radeon->glCtx);
+-
+- return GL_TRUE;
+-}
+-
+diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h
+index 47cbc22..250570f 100644
+--- a/src/mesa/drivers/dri/r300/radeon_context.h
++++ b/src/mesa/drivers/dri/r300/radeon_context.h
+@@ -49,20 +49,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "drm.h"
+ #include "dri_util.h"
+
+-struct radeon_context;
+-typedef struct radeon_context radeonContextRec;
+-typedef struct radeon_context *radeonContextPtr;
+-
+-/* Rasterizing fallbacks */
+-/* See correponding strings in r200_swtcl.c */
+-#define RADEON_FALLBACK_TEXTURE 0x0001
+-#define RADEON_FALLBACK_DRAW_BUFFER 0x0002
+-#define RADEON_FALLBACK_STENCIL 0x0004
+-#define RADEON_FALLBACK_RENDER_MODE 0x0008
+-#define RADEON_FALLBACK_BLEND_EQ 0x0010
+-#define RADEON_FALLBACK_BLEND_FUNC 0x0020
+-#define RADEON_FALLBACK_DISABLE 0x0040
+-#define RADEON_FALLBACK_BORDER_MODE 0x0080
++#include "radeon_screen.h"
+
+ #if R200_MERGED
+ extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
+@@ -79,155 +66,11 @@ extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
+ /* TCL fallbacks */
+ extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
+
+-#define RADEON_TCL_FALLBACK_RASTER 0x0001 /* rasterization */
+-#define RADEON_TCL_FALLBACK_UNFILLED 0x0002 /* unfilled tris */
+-#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE 0x0004 /* twoside tris */
+-#define RADEON_TCL_FALLBACK_MATERIAL 0x0008 /* material in vb */
+-#define RADEON_TCL_FALLBACK_TEXGEN_0 0x0010 /* texgen, unit 0 */
+-#define RADEON_TCL_FALLBACK_TEXGEN_1 0x0020 /* texgen, unit 1 */
+-#define RADEON_TCL_FALLBACK_TEXGEN_2 0x0040 /* texgen, unit 2 */
+-#define RADEON_TCL_FALLBACK_TEXGEN_3 0x0080 /* texgen, unit 3 */
+-#define RADEON_TCL_FALLBACK_TEXGEN_4 0x0100 /* texgen, unit 4 */
+-#define RADEON_TCL_FALLBACK_TEXGEN_5 0x0200 /* texgen, unit 5 */
+-#define RADEON_TCL_FALLBACK_TCL_DISABLE 0x0400 /* user disable */
+-#define RADEON_TCL_FALLBACK_BITMAP 0x0800 /* draw bitmap with points */
+-#define RADEON_TCL_FALLBACK_VERTEX_PROGRAM 0x1000 /* vertex program active */
+-
+ #if R200_MERGED
+ #define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode )
+ #else
+ #define TCL_FALLBACK( ctx, bit, mode ) ;
+ #endif
+
+-struct radeon_dri_mirror {
+- __DRIcontextPrivate *context; /* DRI context */
+- __DRIscreenPrivate *screen; /* DRI screen */
+- /**
+- * DRI drawable bound to this context for drawing.
+- */
+- __DRIdrawablePrivate *drawable;
+-
+- /**
+- * DRI drawable bound to this context for reading.
+- */
+- __DRIdrawablePrivate *readable;
+-
+- drm_context_t hwContext;
+- drm_hw_lock_t *hwLock;
+- int fd;
+- int drmMinor;
+-};
+-
+-/**
+- * Derived state for internal purposes.
+- */
+-struct radeon_scissor_state {
+- drm_clip_rect_t rect;
+- GLboolean enabled;
+-
+- GLuint numClipRects; /* Cliprects active */
+- GLuint numAllocedClipRects; /* Cliprects available */
+- drm_clip_rect_t *pClipRects;
+-};
+-
+-struct radeon_colorbuffer_state {
+- GLuint clear;
+- GLint drawOffset, drawPitch;
+-};
+-
+-struct radeon_state {
+- struct radeon_colorbuffer_state color;
+- struct radeon_scissor_state scissor;
+-};
+-
+-/**
+- * Common per-context variables shared by R200 and R300.
+- * R200- and R300-specific code "derive" their own context from this
+- * structure.
+- */
+-struct radeon_context {
+- GLcontext *glCtx; /* Mesa context */
+- radeonScreenPtr radeonScreen; /* Screen private DRI data */
+-
+- /* Fallback state */
+- GLuint Fallback;
+- GLuint TclFallback;
+-
+- /* Page flipping */
+- GLuint doPageFlip;
+-
+- /* Drawable, cliprect and scissor information */
+- GLuint numClipRects; /* Cliprects for the draw buffer */
+- drm_clip_rect_t *pClipRects;
+- unsigned int lastStamp;
+- GLboolean lost_context;
+- drm_radeon_sarea_t *sarea; /* Private SAREA data */
+-
+- /* Mirrors of some DRI state */
+- struct radeon_dri_mirror dri;
+-
+- /* Busy waiting */
+- GLuint do_usleeps;
+- GLuint do_irqs;
+- GLuint irqsEmitted;
+- drm_radeon_irq_wait_t iw;
+-
+- /* buffer swap */
+- int64_t swap_ust;
+- int64_t swap_missed_ust;
+-
+- GLuint swap_count;
+- GLuint swap_missed_count;
+-
+- /* Derived state */
+- struct radeon_state state;
+-
+- /* Configuration cache
+- */
+- driOptionCache optionCache;
+-};
+-
+-#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
+-
+-extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
+-extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+- int x, int y, int w, int h);
+-extern GLboolean radeonInitContext(radeonContextPtr radeon,
+- struct dd_function_table *functions,
+- const __GLcontextModes * glVisual,
+- __DRIcontextPrivate * driContextPriv,
+- void *sharedContextPrivate);
+-extern void radeonCleanupContext(radeonContextPtr radeon);
+-extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+- __DRIdrawablePrivate * driDrawPriv,
+- __DRIdrawablePrivate * driReadPriv);
+-extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
+-
+-/* ================================================================
+- * Debugging:
+- */
+-#define DO_DEBUG 1
+-
+-#if DO_DEBUG
+-extern int RADEON_DEBUG;
+-#else
+-#define RADEON_DEBUG 0
+-#endif
+-
+-#define DEBUG_TEXTURE 0x0001
+-#define DEBUG_STATE 0x0002
+-#define DEBUG_IOCTL 0x0004
+-#define DEBUG_PRIMS 0x0008
+-#define DEBUG_VERTS 0x0010
+-#define DEBUG_FALLBACKS 0x0020
+-#define DEBUG_VFMT 0x0040
+-#define DEBUG_CODEGEN 0x0080
+-#define DEBUG_VERBOSE 0x0100
+-#define DEBUG_DRI 0x0200
+-#define DEBUG_DMA 0x0400
+-#define DEBUG_SANITY 0x0800
+-#define DEBUG_SYNC 0x1000
+-#define DEBUG_PIXEL 0x2000
+-#define DEBUG_MEMORY 0x4000
+
+ #endif /* __RADEON_CONTEXT_H__ */
+diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c
+deleted file mode 100644
+index 36502eb..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_ioctl.c
++++ /dev/null
+@@ -1,396 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#include <sched.h>
+-#include <errno.h>
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/macros.h"
+-#include "main/context.h"
+-#include "swrast/swrast.h"
+-#include "r300_context.h"
+-#include "radeon_ioctl.h"
+-#include "r300_ioctl.h"
+-#include "r300_state.h"
+-#include "radeon_reg.h"
+-
+-#include "drirenderbuffer.h"
+-#include "vblank.h"
+-
+-static void radeonWaitForIdle(radeonContextPtr radeon);
+-
+-/* ================================================================
+- * SwapBuffers with client-side throttling
+- */
+-
+-static uint32_t radeonGetLastFrame(radeonContextPtr radeon)
+-{
+- drm_radeon_getparam_t gp;
+- int ret;
+- uint32_t frame;
+-
+- gp.param = RADEON_PARAM_LAST_FRAME;
+- gp.value = (int *)&frame;
+- ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+- &gp, sizeof(gp));
+- if (ret) {
+- fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+- ret);
+- exit(1);
+- }
+-
+- return frame;
+-}
+-
+-uint32_t radeonGetAge(radeonContextPtr radeon)
+-{
+- drm_radeon_getparam_t gp;
+- int ret;
+- uint32_t age;
+-
+- gp.param = RADEON_PARAM_LAST_CLEAR;
+- gp.value = (int *)&age;
+- ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+- &gp, sizeof(gp));
+- if (ret) {
+- fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+- ret);
+- exit(1);
+- }
+-
+- return age;
+-}
+-
+-static void radeonEmitIrqLocked(radeonContextPtr radeon)
+-{
+- drm_radeon_irq_emit_t ie;
+- int ret;
+-
+- ie.irq_seq = &radeon->iw.irq_seq;
+- ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT,
+- &ie, sizeof(ie));
+- if (ret) {
+- fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__,
+- ret);
+- exit(1);
+- }
+-}
+-
+-static void radeonWaitIrq(radeonContextPtr radeon)
+-{
+- int ret;
+-
+- do {
+- ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT,
+- &radeon->iw, sizeof(radeon->iw));
+- } while (ret && (errno == EINTR || errno == EBUSY));
+-
+- if (ret) {
+- fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__,
+- ret);
+- exit(1);
+- }
+-}
+-
+-static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
+-{
+- drm_radeon_sarea_t *sarea = radeon->sarea;
+-
+- if (radeon->do_irqs) {
+- if (radeonGetLastFrame(radeon) < sarea->last_frame) {
+- if (!radeon->irqsEmitted) {
+- while (radeonGetLastFrame(radeon) <
+- sarea->last_frame) ;
+- } else {
+- UNLOCK_HARDWARE(radeon);
+- radeonWaitIrq(radeon);
+- LOCK_HARDWARE(radeon);
+- }
+- radeon->irqsEmitted = 10;
+- }
+-
+- if (radeon->irqsEmitted) {
+- radeonEmitIrqLocked(radeon);
+- radeon->irqsEmitted--;
+- }
+- } else {
+- while (radeonGetLastFrame(radeon) < sarea->last_frame) {
+- UNLOCK_HARDWARE(radeon);
+- if (radeon->do_usleeps)
+- DO_USLEEP(1);
+- LOCK_HARDWARE(radeon);
+- }
+- }
+-}
+-
+-/* Copy the back color buffer to the front color buffer.
+- */
+-void radeonCopyBuffer(__DRIdrawablePrivate * dPriv,
+- const drm_clip_rect_t * rect)
+-{
+- radeonContextPtr radeon;
+- GLint nbox, i, ret;
+- GLboolean missed_target;
+- int64_t ust;
+- __DRIscreenPrivate *psp = dPriv->driScreenPriv;
+-
+- assert(dPriv);
+- assert(dPriv->driContextPriv);
+- assert(dPriv->driContextPriv->driverPrivate);
+-
+- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-
+- if (RADEON_DEBUG & DEBUG_IOCTL) {
+- fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__,
+- (void *)radeon->glCtx);
+- }
+-
+- r300Flush(radeon->glCtx);
+-
+- LOCK_HARDWARE(radeon);
+-
+- /* Throttle the frame rate -- only allow one pending swap buffers
+- * request at a time.
+- */
+- radeonWaitForFrameCompletion(radeon);
+- if (!rect)
+- {
+- UNLOCK_HARDWARE(radeon);
+- driWaitForVBlank(dPriv, &missed_target);
+- LOCK_HARDWARE(radeon);
+- }
+-
+- nbox = dPriv->numClipRects; /* must be in locked region */
+-
+- for (i = 0; i < nbox;) {
+- GLint nr = MIN2(i + RADEON_NR_SAREA_CLIPRECTS, nbox);
+- drm_clip_rect_t *box = dPriv->pClipRects;
+- drm_clip_rect_t *b = radeon->sarea->boxes;
+- GLint n = 0;
+-
+- for ( ; i < nr ; i++ ) {
+-
+- *b = box[i];
+-
+- if (rect)
+- {
+- if (rect->x1 > b->x1)
+- b->x1 = rect->x1;
+- if (rect->y1 > b->y1)
+- b->y1 = rect->y1;
+- if (rect->x2 < b->x2)
+- b->x2 = rect->x2;
+- if (rect->y2 < b->y2)
+- b->y2 = rect->y2;
+-
+- if (b->x1 >= b->x2 || b->y1 >= b->y2)
+- continue;
+- }
+-
+- b++;
+- n++;
+- }
+- radeon->sarea->nbox = n;
+-
+- if (!n)
+- continue;
+-
+- ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP);
+-
+- if (ret) {
+- fprintf(stderr, "DRM_RADEON_SWAP: return = %d\n",
+- ret);
+- UNLOCK_HARDWARE(radeon);
+- exit(1);
+- }
+- }
+-
+- UNLOCK_HARDWARE(radeon);
+- if (!rect)
+- {
+- ((r300ContextPtr)radeon)->hw.all_dirty = GL_TRUE;
+-
+- radeon->swap_count++;
+- (*psp->systemTime->getUST) (&ust);
+- if (missed_target) {
+- radeon->swap_missed_count++;
+- radeon->swap_missed_ust = ust - radeon->swap_ust;
+- }
+-
+- radeon->swap_ust = ust;
+-
+- sched_yield();
+- }
+-}
+-
+-void radeonPageFlip(__DRIdrawablePrivate * dPriv)
+-{
+- radeonContextPtr radeon;
+- GLint ret;
+- GLboolean missed_target;
+- __DRIscreenPrivate *psp = dPriv->driScreenPriv;
+-
+- assert(dPriv);
+- assert(dPriv->driContextPriv);
+- assert(dPriv->driContextPriv->driverPrivate);
+-
+- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-
+- if (RADEON_DEBUG & DEBUG_IOCTL) {
+- fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
+- radeon->sarea->pfCurrentPage);
+- }
+-
+- r300Flush(radeon->glCtx);
+- LOCK_HARDWARE(radeon);
+-
+- if (!dPriv->numClipRects) {
+- UNLOCK_HARDWARE(radeon);
+- usleep(10000); /* throttle invisible client 10ms */
+- return;
+- }
+-
+- /* Need to do this for the perf box placement:
+- */
+- {
+- drm_clip_rect_t *box = dPriv->pClipRects;
+- drm_clip_rect_t *b = radeon->sarea->boxes;
+- b[0] = box[0];
+- radeon->sarea->nbox = 1;
+- }
+-
+- /* Throttle the frame rate -- only allow a few pending swap buffers
+- * request at a time.
+- */
+- radeonWaitForFrameCompletion(radeon);
+- UNLOCK_HARDWARE(radeon);
+- driWaitForVBlank(dPriv, &missed_target);
+- if (missed_target) {
+- radeon->swap_missed_count++;
+- (void)(*psp->systemTime->getUST) (&radeon->swap_missed_ust);
+- }
+- LOCK_HARDWARE(radeon);
+-
+- ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_FLIP);
+-
+- UNLOCK_HARDWARE(radeon);
+-
+- if (ret) {
+- fprintf(stderr, "DRM_RADEON_FLIP: return = %d\n", ret);
+- exit(1);
+- }
+-
+- radeon->swap_count++;
+- (void)(*psp->systemTime->getUST) (&radeon->swap_ust);
+-
+- driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer,
+- radeon->sarea->pfCurrentPage);
+-
+- if (radeon->sarea->pfCurrentPage == 1) {
+- radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
+- radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
+- } else {
+- radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
+- radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
+- }
+-
+- if (IS_R300_CLASS(radeon->radeonScreen)) {
+- r300ContextPtr r300 = (r300ContextPtr)radeon;
+- R300_STATECHANGE(r300, cb);
+- r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset +
+- r300->radeon.radeonScreen->fbLocation;
+- r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
+-
+- if (r300->radeon.radeonScreen->cpp == 4)
+- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+- else
+- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+-
+- if (r300->radeon.sarea->tiling_enabled)
+- r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+- }
+-}
+-
+-void radeonWaitForIdleLocked(radeonContextPtr radeon)
+-{
+- int ret;
+- int i = 0;
+-
+- do {
+- ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE);
+- if (ret)
+- DO_USLEEP(1);
+- } while (ret && ++i < 100);
+-
+- if (ret < 0) {
+- UNLOCK_HARDWARE(radeon);
+- fprintf(stderr, "Error: R300 timed out... exiting\n");
+- exit(-1);
+- }
+-}
+-
+-static void radeonWaitForIdle(radeonContextPtr radeon)
+-{
+- LOCK_HARDWARE(radeon);
+- radeonWaitForIdleLocked(radeon);
+- UNLOCK_HARDWARE(radeon);
+-}
+-
+-void radeonFlush(GLcontext * ctx)
+-{
+- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+-
+- if (IS_R300_CLASS(radeon->radeonScreen))
+- r300Flush(ctx);
+-}
+-
+-
+-/* Make sure all commands have been sent to the hardware and have
+- * completed processing.
+- */
+-void radeonFinish(GLcontext * ctx)
+-{
+- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+-
+- radeonFlush(ctx);
+-
+- if (radeon->do_irqs) {
+- LOCK_HARDWARE(radeon);
+- radeonEmitIrqLocked(radeon);
+- UNLOCK_HARDWARE(radeon);
+- radeonWaitIrq(radeon);
+- } else
+- radeonWaitForIdle(radeon);
+-}
+diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.h b/src/mesa/drivers/dri/r300/radeon_ioctl.h
+deleted file mode 100644
+index 3add775..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_ioctl.h
++++ /dev/null
+@@ -1,57 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#ifndef __RADEON_IOCTL_H__
+-#define __RADEON_IOCTL_H__
+-
+-#include "main/simple_list.h"
+-#include "radeon_dri.h"
+-#include "radeon_lock.h"
+-
+-#include "xf86drm.h"
+-#include "drm.h"
+-#if 0
+-#include "r200context.h"
+-#endif
+-#include "radeon_drm.h"
+-
+-extern void radeonCopyBuffer(__DRIdrawablePrivate * drawable,
+- const drm_clip_rect_t * rect);
+-extern void radeonPageFlip(__DRIdrawablePrivate * drawable);
+-extern void radeonFlush(GLcontext * ctx);
+-extern void radeonFinish(GLcontext * ctx);
+-extern void radeonWaitForIdleLocked(radeonContextPtr radeon);
+-extern uint32_t radeonGetAge(radeonContextPtr radeon);
+-
+-#endif /* __RADEON_IOCTL_H__ */
+diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
+deleted file mode 100644
+index 4f47afd..af4108a
+--- a/src/mesa/drivers/dri/r300/radeon_lock.c
++++ /dev/null
+@@ -1,137 +0,0 @@
+-/**************************************************************************
+-
+-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+- VA Linux Systems Inc., Fremont, California.
+-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Gareth Hughes <gareth@valinux.com>
+- * Keith Whitwell <keith@tungstengraphics.com>
+- * Kevin E. Martin <martin@valinux.com>
+- */
+-
+-#include "radeon_lock.h"
+-#include "radeon_ioctl.h"
+-#include "radeon_state.h"
+-#include "r300_context.h"
+-#include "r300_state.h"
+-
+-#include "main/framebuffer.h"
+-
+-#include "drirenderbuffer.h"
+-
+-#if DEBUG_LOCKING
+-char *prevLockFile = NULL;
+-int prevLockLine = 0;
+-#endif
+-
+-/* Turn on/off page flipping according to the flags in the sarea:
+- */
+-void radeonUpdatePageFlipping(radeonContextPtr rmesa)
+-{
+- int use_back;
+-
+- rmesa->doPageFlip = rmesa->sarea->pfState;
+- if (rmesa->glCtx->WinSysDrawBuffer) {
+- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+- rmesa->sarea->pfCurrentPage);
+- r300UpdateDrawBuffer(rmesa->glCtx);
+- }
+-
+- use_back = rmesa->glCtx->DrawBuffer ?
+- (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] ==
+- BUFFER_BACK_LEFT) : 1;
+- use_back ^= (rmesa->sarea->pfCurrentPage == 1);
+-
+- if (use_back) {
+- rmesa->state.color.drawOffset =
+- rmesa->radeonScreen->backOffset;
+- rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch;
+- } else {
+- rmesa->state.color.drawOffset =
+- rmesa->radeonScreen->frontOffset;
+- rmesa->state.color.drawPitch =
+- rmesa->radeonScreen->frontPitch;
+- }
+-}
+-
+-/* Update the hardware state. This is called if another context has
+- * grabbed the hardware lock, which includes the X server. This
+- * function also updates the driver's window state after the X server
+- * moves, resizes or restacks a window -- the change will be reflected
+- * in the drawable position and clip rects. Since the X server grabs
+- * the hardware lock when it changes the window state, this routine will
+- * automatically be called after such a change.
+- */
+-void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
+-{
+- __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
+- __DRIdrawablePrivate *const readable = rmesa->dri.readable;
+- __DRIscreenPrivate *sPriv = rmesa->dri.screen;
+- drm_radeon_sarea_t *sarea = rmesa->sarea;
+- r300ContextPtr r300 = (r300ContextPtr) rmesa;
+-
+- assert(drawable != NULL);
+-
+- drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
+-
+- /* The window might have moved, so we might need to get new clip
+- * rects.
+- *
+- * NOTE: This releases and regrabs the hw lock to allow the X server
+- * to respond to the DRI protocol request for new drawable info.
+- * Since the hardware state depends on having the latest drawable
+- * clip rects, all state checking must be done _after_ this call.
+- */
+- DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
+- if (drawable != readable) {
+- DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable);
+- }
+-
+- if (rmesa->lastStamp != drawable->lastStamp) {
+- radeonUpdatePageFlipping(rmesa);
+- radeonSetCliprects(rmesa);
+- r300UpdateViewportOffset(rmesa->glCtx);
+- driUpdateFramebufferSize(rmesa->glCtx, drawable);
+- }
+-
+- if (sarea->ctx_owner != rmesa->dri.hwContext) {
+- int i;
+-
+- sarea->ctx_owner = rmesa->dri.hwContext;
+- for (i = 0; i < r300->nr_heaps; i++) {
+- DRI_AGE_TEXTURES(r300->texture_heaps[i]);
+- }
+- }
+-
+- rmesa->lost_context = GL_TRUE;
+-}
+diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
+new file mode 120000
+index 4f47afd..af4108a
+--- /dev/null
++++ b/src/mesa/drivers/dri/r300/radeon_lock.c
+@@ -0,0 +1 @@
++../radeon/radeon_lock.c
+\ No newline at end of file
+diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h
+deleted file mode 100644
+index a344837..64bdf94
+--- a/src/mesa/drivers/dri/r300/radeon_lock.h
++++ /dev/null
+@@ -1,115 +0,0 @@
+-/**************************************************************************
+-
+-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+- VA Linux Systems Inc., Fremont, California.
+-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Gareth Hughes <gareth@valinux.com>
+- * Keith Whitwell <keith@tungstengraphics.com>
+- * Kevin E. Martin <martin@valinux.com>
+- */
+-
+-#ifndef __RADEON_LOCK_H__
+-#define __RADEON_LOCK_H__
+-
+-#include "radeon_context.h"
+-
+-extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
+-extern void radeonUpdatePageFlipping(radeonContextPtr rmesa);
+-
+-/* Turn DEBUG_LOCKING on to find locking conflicts.
+- */
+-#define DEBUG_LOCKING 0
+-
+-#if DEBUG_LOCKING
+-extern char *prevLockFile;
+-extern int prevLockLine;
+-
+-#define DEBUG_LOCK() \
+- do { \
+- prevLockFile = (__FILE__); \
+- prevLockLine = (__LINE__); \
+- } while (0)
+-
+-#define DEBUG_RESET() \
+- do { \
+- prevLockFile = 0; \
+- prevLockLine = 0; \
+- } while (0)
+-
+-#define DEBUG_CHECK_LOCK() \
+- do { \
+- if (prevLockFile) { \
+- fprintf(stderr, \
+- "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \
+- prevLockFile, prevLockLine, __FILE__, __LINE__); \
+- exit(1); \
+- } \
+- } while (0)
+-
+-#else
+-
+-#define DEBUG_LOCK()
+-#define DEBUG_RESET()
+-#define DEBUG_CHECK_LOCK()
+-
+-#endif
+-
+-/*
+- * !!! We may want to separate locks from locks with validation. This
+- * could be used to improve performance for those things commands that
+- * do not do any drawing !!!
+- */
+-
+-/* Lock the hardware and validate our state.
+- */
+-#define LOCK_HARDWARE( rmesa ) \
+- do { \
+- char __ret = 0; \
+- DEBUG_CHECK_LOCK(); \
+- DRM_CAS((rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \
+- (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret); \
+- if (__ret) \
+- radeonGetLock((rmesa), 0); \
+- DEBUG_LOCK(); \
+- } while (0)
+-
+-#define UNLOCK_HARDWARE( rmesa ) \
+- do { \
+- DRM_UNLOCK((rmesa)->dri.fd, \
+- (rmesa)->dri.hwLock, \
+- (rmesa)->dri.hwContext); \
+- DEBUG_RESET(); \
+- } while (0)
+-
+-#endif /* __RADEON_LOCK_H__ */
+diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h
+new file mode 120000
+index a344837..64bdf94
+--- /dev/null
++++ b/src/mesa/drivers/dri/r300/radeon_lock.h
+@@ -0,0 +1 @@
++../radeon/radeon_lock.h
+\ No newline at end of file
+diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c
+index 58bc0d5..8a945d8 100644
+--- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
++++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c
+@@ -35,7 +35,7 @@
+
+ #include "radeon_program_pair.h"
+
+-#include "radeon_context.h"
++#include "radeon_common.h"
+
+ #include "shader/prog_print.h"
+
+diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
+deleted file mode 100644
+index 16f9fb9..232868c
+--- a/src/mesa/drivers/dri/r300/radeon_span.c
++++ /dev/null
+@@ -1,349 +0,0 @@
+-/**************************************************************************
+-
+-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+- VA Linux Systems Inc., Fremont, California.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Kevin E. Martin <martin@valinux.com>
+- * Gareth Hughes <gareth@valinux.com>
+- * Keith Whitwell <keith@tungstengraphics.com>
+- *
+- */
+-
+-#include "main/glheader.h"
+-#include "swrast/swrast.h"
+-
+-#include "r300_state.h"
+-#include "radeon_ioctl.h"
+-#include "r300_ioctl.h"
+-#include "radeon_span.h"
+-
+-#include "drirenderbuffer.h"
+-
+-#define DBG 0
+-
+-/*
+- * Note that all information needed to access pixels in a renderbuffer
+- * should be obtained through the gl_renderbuffer parameter, not per-context
+- * information.
+- */
+-#define LOCAL_VARS \
+- driRenderbuffer *drb = (driRenderbuffer *) rb; \
+- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
+- const GLuint bottom = dPriv->h - 1; \
+- GLubyte *buf = (GLubyte *) drb->flippedData \
+- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \
+- GLuint p; \
+- (void) p;
+-
+-#define LOCAL_DEPTH_VARS \
+- driRenderbuffer *drb = (driRenderbuffer *) rb; \
+- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
+- const GLuint bottom = dPriv->h - 1; \
+- GLuint xo = dPriv->x; \
+- GLuint yo = dPriv->y; \
+- GLubyte *buf = (GLubyte *) drb->Base.Data;
+-
+-#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+-
+-#define Y_FLIP(Y) (bottom - (Y))
+-
+-#define HW_LOCK()
+-
+-#define HW_UNLOCK()
+-
+-/* ================================================================
+- * Color buffer
+- */
+-
+-/* 16 bit, RGB565 color spanline and pixel functions
+- */
+-#define SPANTMP_PIXEL_FMT GL_RGB
+-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+-
+-#define TAG(x) radeon##x##_RGB565
+-#define TAG2(x,y) radeon##x##_RGB565##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+-#include "spantmp2.h"
+-
+-/* 32 bit, ARGB8888 color spanline and pixel functions
+- */
+-#define SPANTMP_PIXEL_FMT GL_BGRA
+-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+-
+-#define TAG(x) radeon##x##_ARGB8888
+-#define TAG2(x,y) radeon##x##_ARGB8888##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+-#include "spantmp2.h"
+-
+-/* ================================================================
+- * Depth buffer
+- */
+-
+-/* The Radeon family has depth tiling on all the time, so we have to convert
+- * the x,y coordinates into the memory bus address (mba) in the same
+- * manner as the engine. In each case, the linear block address (ba)
+- * is calculated, and then wired with x and y to produce the final
+- * memory address.
+- * The chip will do address translation on its own if the surface registers
+- * are set up correctly. It is not quite enough to get it working with hyperz
+- * too...
+- */
+-
+-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
+-{
+- GLuint pitch = drb->pitch;
+- if (drb->depthHasSurface) {
+- return 4 * (x + y * pitch);
+- } else {
+- GLuint ba, address = 0; /* a[0..1] = 0 */
+-
+-#ifdef COMPILE_R300
+- ba = (y / 8) * (pitch / 8) + (x / 8);
+-#else
+- ba = (y / 16) * (pitch / 16) + (x / 16);
+-#endif
+-
+- address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */
+- address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */
+- address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */
+- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
+-
+- address |= (y & 0x8) << 7; /* a[10] = y[3] */
+- address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */
+- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
+-
+- return address;
+- }
+-}
+-
+-static INLINE GLuint
+-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
+-{
+- GLuint pitch = drb->pitch;
+- if (drb->depthHasSurface) {
+- return 2 * (x + y * pitch);
+- } else {
+- GLuint ba, address = 0; /* a[0] = 0 */
+-
+- ba = (y / 16) * (pitch / 32) + (x / 32);
+-
+- address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */
+- address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */
+- address |= (x & 0x8) << 4; /* a[7] = x[3] */
+- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
+- address |= (y & 0x8) << 7; /* a[10] = y[3] */
+- address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */
+- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
+-
+- return address;
+- }
+-}
+-
+-/* 16-bit depth buffer functions
+- */
+-#define VALUE_TYPE GLushort
+-
+-#define WRITE_DEPTH( _x, _y, d ) \
+- *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
+-
+-#define READ_DEPTH( d, _x, _y ) \
+- d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
+-
+-#define TAG(x) radeon##x##_z16
+-#include "depthtmp.h"
+-
+-/* 24 bit depth, 8 bit stencil depthbuffer functions
+- *
+- * Careful: It looks like the R300 uses ZZZS byte order while the R200
+- * uses SZZZ for 24 bit depth, 8 bit stencil mode.
+- */
+-#define VALUE_TYPE GLuint
+-
+-#ifdef COMPILE_R300
+-#define WRITE_DEPTH( _x, _y, d ) \
+-do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
+- tmp &= 0x000000ff; \
+- tmp |= ((d << 8) & 0xffffff00); \
+- *(GLuint *)(buf + offset) = tmp; \
+-} while (0)
+-#else
+-#define WRITE_DEPTH( _x, _y, d ) \
+-do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
+- tmp &= 0xff000000; \
+- tmp |= ((d) & 0x00ffffff); \
+- *(GLuint *)(buf + offset) = tmp; \
+-} while (0)
+-#endif
+-
+-#ifdef COMPILE_R300
+-#define READ_DEPTH( d, _x, _y ) \
+- do { \
+- d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
+- _y + yo )) & 0xffffff00) >> 8; \
+- }while(0)
+-#else
+-#define READ_DEPTH( d, _x, _y ) \
+- d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
+- _y + yo )) & 0x00ffffff;
+-#endif
+-
+-#define TAG(x) radeon##x##_z24_s8
+-#include "depthtmp.h"
+-
+-/* ================================================================
+- * Stencil buffer
+- */
+-
+-/* 24 bit depth, 8 bit stencil depthbuffer functions
+- */
+-#ifdef COMPILE_R300
+-#define WRITE_STENCIL( _x, _y, d ) \
+-do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
+- tmp &= 0xffffff00; \
+- tmp |= (d) & 0xff; \
+- *(GLuint *)(buf + offset) = tmp; \
+-} while (0)
+-#else
+-#define WRITE_STENCIL( _x, _y, d ) \
+-do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
+- tmp &= 0x00ffffff; \
+- tmp |= (((d) & 0xff) << 24); \
+- *(GLuint *)(buf + offset) = tmp; \
+-} while (0)
+-#endif
+-
+-#ifdef COMPILE_R300
+-#define READ_STENCIL( d, _x, _y ) \
+-do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
+- d = tmp & 0x000000ff; \
+-} while (0)
+-#else
+-#define READ_STENCIL( d, _x, _y ) \
+-do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
+- d = (tmp & 0xff000000) >> 24; \
+-} while (0)
+-#endif
+-
+-#define TAG(x) radeon##x##_z24_s8
+-#include "stenciltmp.h"
+-
+-/* Move locking out to get reasonable span performance (10x better
+- * than doing this in HW_LOCK above). WaitForIdle() is the main
+- * culprit.
+- */
+-
+-static void radeonSpanRenderStart(GLcontext * ctx)
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-#ifdef COMPILE_R300
+- r300ContextPtr r300 = (r300ContextPtr) rmesa;
+- R300_FIREVERTICES(r300);
+-#else
+- RADEON_FIREVERTICES(rmesa);
+-#endif
+- LOCK_HARDWARE(rmesa);
+- radeonWaitForIdleLocked(rmesa);
+-
+- /* Read the first pixel in the frame buffer. This should
+- * be a noop, right? In fact without this conform fails as reading
+- * from the framebuffer sometimes produces old results -- the
+- * on-card read cache gets mixed up and doesn't notice that the
+- * framebuffer has been updated.
+- *
+- * Note that we should probably be reading some otherwise unused
+- * region of VRAM, otherwise we might get incorrect results when
+- * reading pixels from the top left of the screen.
+- *
+- * I found this problem on an R420 with glean's texCube test.
+- * Note that the R200 span code also *writes* the first pixel in the
+- * framebuffer, but I've found this to be unnecessary.
+- * -- Nicolai Hähnle, June 2008
+- */
+- {
+- int p;
+- driRenderbuffer *drb =
+- (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
+- volatile int *buf =
+- (volatile int *)(rmesa->dri.screen->pFB + drb->offset);
+- p = *buf;
+- }
+-}
+-
+-static void radeonSpanRenderFinish(GLcontext * ctx)
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- _swrast_flush(ctx);
+- UNLOCK_HARDWARE(rmesa);
+-}
+-
+-void radeonInitSpanFuncs(GLcontext * ctx)
+-{
+- struct swrast_device_driver *swdd =
+- _swrast_GetDeviceDriverReference(ctx);
+- swdd->SpanRenderStart = radeonSpanRenderStart;
+- swdd->SpanRenderFinish = radeonSpanRenderFinish;
+-}
+-
+-/**
+- * Plug in the Get/Put routines for the given driRenderbuffer.
+- */
+-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
+-{
+- if (drb->Base.InternalFormat == GL_RGBA) {
+- if (vis->redBits == 5 && vis->greenBits == 6
+- && vis->blueBits == 5) {
+- radeonInitPointers_RGB565(&drb->Base);
+- } else {
+- radeonInitPointers_ARGB8888(&drb->Base);
+- }
+- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+- radeonInitDepthPointers_z16(&drb->Base);
+- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+- radeonInitDepthPointers_z24_s8(&drb->Base);
+- } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+- radeonInitStencilPointers_z24_s8(&drb->Base);
+- }
+-}
+diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
+new file mode 120000
+index 16f9fb9..232868c
+--- /dev/null
++++ b/src/mesa/drivers/dri/r300/radeon_span.c
+@@ -0,0 +1 @@
++../radeon/radeon_span.c
+\ No newline at end of file
+diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c
+deleted file mode 100644
+index c401da6..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_state.c
++++ /dev/null
+@@ -1,244 +0,0 @@
+-/**************************************************************************
+-
+-Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/api_arrayelt.h"
+-#include "main/enums.h"
+-#include "main/framebuffer.h"
+-#include "main/colormac.h"
+-#include "main/light.h"
+-
+-#include "swrast/swrast.h"
+-#include "vbo/vbo.h"
+-#include "tnl/tnl.h"
+-#include "tnl/t_pipeline.h"
+-#include "swrast_setup/swrast_setup.h"
+-
+-#include "radeon_ioctl.h"
+-#include "radeon_state.h"
+-#include "r300_ioctl.h"
+-
+-
+-/* =============================================================
+- * Scissoring
+- */
+-
+-static GLboolean intersect_rect(drm_clip_rect_t * out,
+- drm_clip_rect_t * a, drm_clip_rect_t * b)
+-{
+- *out = *a;
+- if (b->x1 > out->x1)
+- out->x1 = b->x1;
+- if (b->y1 > out->y1)
+- out->y1 = b->y1;
+- if (b->x2 < out->x2)
+- out->x2 = b->x2;
+- if (b->y2 < out->y2)
+- out->y2 = b->y2;
+- if (out->x1 >= out->x2)
+- return GL_FALSE;
+- if (out->y1 >= out->y2)
+- return GL_FALSE;
+- return GL_TRUE;
+-}
+-
+-void radeonRecalcScissorRects(radeonContextPtr radeon)
+-{
+- drm_clip_rect_t *out;
+- int i;
+-
+- /* Grow cliprect store?
+- */
+- if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) {
+- while (radeon->state.scissor.numAllocedClipRects <
+- radeon->numClipRects) {
+- radeon->state.scissor.numAllocedClipRects += 1; /* zero case */
+- radeon->state.scissor.numAllocedClipRects *= 2;
+- }
+-
+- if (radeon->state.scissor.pClipRects)
+- FREE(radeon->state.scissor.pClipRects);
+-
+- radeon->state.scissor.pClipRects =
+- MALLOC(radeon->state.scissor.numAllocedClipRects *
+- sizeof(drm_clip_rect_t));
+-
+- if (radeon->state.scissor.pClipRects == NULL) {
+- radeon->state.scissor.numAllocedClipRects = 0;
+- return;
+- }
+- }
+-
+- out = radeon->state.scissor.pClipRects;
+- radeon->state.scissor.numClipRects = 0;
+-
+- for (i = 0; i < radeon->numClipRects; i++) {
+- if (intersect_rect(out,
+- &radeon->pClipRects[i],
+- &radeon->state.scissor.rect)) {
+- radeon->state.scissor.numClipRects++;
+- out++;
+- }
+- }
+-}
+-
+-void radeonUpdateScissor(GLcontext* ctx)
+-{
+- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+-
+- if (radeon->dri.drawable) {
+- __DRIdrawablePrivate *dPriv = radeon->dri.drawable;
+- int x1 = dPriv->x + ctx->Scissor.X;
+- int y1 = dPriv->y + dPriv->h - (ctx->Scissor.Y + ctx->Scissor.Height);
+-
+- radeon->state.scissor.rect.x1 = x1;
+- radeon->state.scissor.rect.y1 = y1;
+- radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width;
+- radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height;
+-
+- radeonRecalcScissorRects(radeon);
+- }
+-}
+-
+-static void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+-{
+- if (ctx->Scissor.Enabled) {
+- /* We don't pipeline cliprect changes */
+- r300Flush(ctx);
+- radeonUpdateScissor(ctx);
+- }
+-}
+-
+-
+-/**
+- * Update cliprects and scissors.
+- */
+-void radeonSetCliprects(radeonContextPtr radeon)
+-{
+- __DRIdrawablePrivate *const drawable = radeon->dri.drawable;
+- __DRIdrawablePrivate *const readable = radeon->dri.readable;
+- GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate;
+- GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate;
+-
+- if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+- /* Can't ignore 2d windows if we are page flipping. */
+- if (drawable->numBackClipRects == 0 || radeon->doPageFlip ||
+- radeon->sarea->pfCurrentPage == 1) {
+- radeon->numClipRects = drawable->numClipRects;
+- radeon->pClipRects = drawable->pClipRects;
+- } else {
+- radeon->numClipRects = drawable->numBackClipRects;
+- radeon->pClipRects = drawable->pBackClipRects;
+- }
+- } else {
+- /* front buffer (or none, or multiple buffers */
+- radeon->numClipRects = drawable->numClipRects;
+- radeon->pClipRects = drawable->pClipRects;
+- }
+-
+- if ((draw_fb->Width != drawable->w) ||
+- (draw_fb->Height != drawable->h)) {
+- _mesa_resize_framebuffer(radeon->glCtx, draw_fb,
+- drawable->w, drawable->h);
+- draw_fb->Initialized = GL_TRUE;
+- }
+-
+- if (drawable != readable) {
+- if ((read_fb->Width != readable->w) ||
+- (read_fb->Height != readable->h)) {
+- _mesa_resize_framebuffer(radeon->glCtx, read_fb,
+- readable->w, readable->h);
+- read_fb->Initialized = GL_TRUE;
+- }
+- }
+-
+- if (radeon->state.scissor.enabled)
+- radeonRecalcScissorRects(radeon);
+-
+- radeon->lastStamp = drawable->lastStamp;
+-}
+-
+-
+-/**
+- * Handle common enable bits.
+- * Called as a fallback by r200Enable/r300Enable.
+- */
+-void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state)
+-{
+- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+-
+- switch(cap) {
+- case GL_SCISSOR_TEST:
+- /* We don't pipeline cliprect & scissor changes */
+- r300Flush(ctx);
+-
+- radeon->state.scissor.enabled = state;
+- radeonUpdateScissor(ctx);
+- break;
+-
+- default:
+- return;
+- }
+-}
+-
+-
+-/**
+- * Initialize default state.
+- * This function is called once at context init time from
+- * r200InitState/r300InitState
+- */
+-void radeonInitState(radeonContextPtr radeon)
+-{
+- radeon->Fallback = 0;
+-
+- if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) {
+- radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
+- radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
+- } else {
+- radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
+- radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
+- }
+-}
+-
+-
+-/**
+- * Initialize common state functions.
+- * Called by r200InitStateFuncs/r300InitStateFuncs
+- */
+-void radeonInitStateFuncs(struct dd_function_table *functions)
+-{
+- functions->Scissor = radeonScissor;
+-}
+diff --git a/src/mesa/drivers/dri/r300/radeon_state.h b/src/mesa/drivers/dri/r300/radeon_state.h
+deleted file mode 100644
+index 821cb40..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_state.h
++++ /dev/null
+@@ -1,43 +0,0 @@
+-/*
+-Copyright (C) 2004 Nicolai Haehnle. All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Nicolai Haehnle <prefect_@gmx.net>
+- */
+-
+-#ifndef __RADEON_STATE_H__
+-#define __RADEON_STATE_H__
+-
+-extern void radeonRecalcScissorRects(radeonContextPtr radeon);
+-extern void radeonSetCliprects(radeonContextPtr radeon);
+-extern void radeonUpdateScissor(GLcontext* ctx);
+-
+-extern void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state);
+-
+-extern void radeonInitState(radeonContextPtr radeon);
+-extern void radeonInitStateFuncs(struct dd_function_table* functions);
+-
+-#endif
+diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
+index f223b2d..f469c6f 100644
+--- a/src/mesa/drivers/dri/radeon/Makefile
++++ b/src/mesa/drivers/dri/radeon/Makefile
+@@ -4,25 +4,36 @@
+ TOP = ../../../../..
+ include $(TOP)/configs/current
+
++CFLAGS += $(RADEON_CFLAGS)
++
+ LIBNAME = radeon_dri.so
+
+ MINIGLX_SOURCES = server/radeon_dri.c
+
++RADEON_COMMON_SOURCES = \
++ radeon_texture.c \
++ radeon_common_context.c \
++ radeon_common.c \
++ radeon_dma.c \
++ radeon_lock.c \
++ radeon_bo_legacy.c \
++ radeon_cs_legacy.c \
++ radeon_mipmap_tree.c \
++ radeon_span.c
++
+ DRIVER_SOURCES = \
+ radeon_context.c \
+ radeon_ioctl.c \
+- radeon_lock.c \
+ radeon_screen.c \
+ radeon_state.c \
+ radeon_state_init.c \
+ radeon_tex.c \
+- radeon_texmem.c \
+ radeon_texstate.c \
+ radeon_tcl.c \
+ radeon_swtcl.c \
+- radeon_span.c \
+ radeon_maos.c \
+- radeon_sanity.c
++ radeon_sanity.c \
++ $(RADEON_COMMON_SOURCES)
+
+ C_SOURCES = \
+ $(COMMON_SOURCES) \
+@@ -30,6 +41,8 @@ C_SOURCES = \
+
+ DRIVER_DEFINES = -DRADEON_COMMON=0
+
++DRI_LIB_DEPS += $(RADEON_LDFLAGS)
++
+ X86_SOURCES =
+
+ include ../Makefile.template
diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h
new file mode 100644
index 0000000..1ed13f1
@@ -1187,6 +20653,28 @@ index 0000000..f80f0f7
+#include "radeon_cs_legacy.h"
+
+#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
+index f6bd1eb..55a73ea 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
++++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
+@@ -247,9 +247,6 @@
+ #define PCI_CHIP_RS350_7835 0x7835
+ #define PCI_CHIP_RS690_791E 0x791E
+ #define PCI_CHIP_RS690_791F 0x791F
+-#define PCI_CHIP_RS600_793F 0x793F
+-#define PCI_CHIP_RS600_7941 0x7941
+-#define PCI_CHIP_RS600_7942 0x7942
+ #define PCI_CHIP_RS740_796C 0x796C
+ #define PCI_CHIP_RS740_796D 0x796D
+ #define PCI_CHIP_RS740_796E 0x796E
+@@ -273,7 +270,6 @@ enum {
+ CHIP_FAMILY_R420,
+ CHIP_FAMILY_RV410,
+ CHIP_FAMILY_RS400,
+- CHIP_FAMILY_RS600,
+ CHIP_FAMILY_RS690,
+ CHIP_FAMILY_RS740,
+ CHIP_FAMILY_RV515,
diff --git a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
new file mode 100644
index 0000000..4b5116c
@@ -3361,6 +22849,1389 @@ index 0000000..a200e90
+#endif
+
+#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_compat.c b/src/mesa/drivers/dri/radeon/radeon_compat.c
+deleted file mode 100644
+index 46b490d..0000000
+--- a/src/mesa/drivers/dri/radeon/radeon_compat.c
++++ /dev/null
+@@ -1,301 +0,0 @@
+-/**************************************************************************
+-
+-Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+- Tungsten Graphics Inc., Austin, Texas.
+-
+-All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining a
+-copy of this software and associated documentation files (the "Software"),
+-to deal in the Software without restriction, including without limitation
+-on the rights to use, copy, modify, merge, publish, distribute, sub
+-license, and/or sell copies of the Software, and to permit persons to whom
+-the Software is furnished to do so, subject to the following conditions:
+-
+-The above copyright notice and this permission notice (including the next
+-paragraph) shall be included in all copies or substantial portions of the
+-Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+-ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+-USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Keith Whitwell <keith@tungstengraphics.com>
+- *
+- */
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-
+-#include "radeon_context.h"
+-#include "radeon_state.h"
+-#include "radeon_ioctl.h"
+-
+-
+-static struct {
+- int start;
+- int len;
+- const char *name;
+-} packet[RADEON_MAX_STATE_PACKETS] = {
+- { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+- { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+- { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+- { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+- { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+- { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+- { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+- { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+- { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+- { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+- { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+- { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+- { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+- { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+- { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+- { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+- { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+- { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+- { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+- { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+- { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+-};
+-
+-
+-static void radeonCompatEmitPacket( radeonContextPtr rmesa,
+- struct radeon_state_atom *state )
+-{
+- drm_radeon_sarea_t *sarea = rmesa->sarea;
+- drm_radeon_context_regs_t *ctx = &sarea->context_state;
+- drm_radeon_texture_regs_t *tex0 = &sarea->tex_state[0];
+- drm_radeon_texture_regs_t *tex1 = &sarea->tex_state[1];
+- int i;
+- int *buf = state->cmd;
+-
+- for ( i = 0 ; i < state->cmd_size ; ) {
+- drm_radeon_cmd_header_t *header = (drm_radeon_cmd_header_t *)&buf[i++];
+-
+- if (RADEON_DEBUG & DEBUG_STATE)
+- fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id,
+- packet[(int)header->packet.packet_id].name);
+-
+- switch (header->packet.packet_id) {
+- case RADEON_EMIT_PP_MISC:
+- ctx->pp_misc = buf[i++];
+- ctx->pp_fog_color = buf[i++];
+- ctx->re_solid_color = buf[i++];
+- ctx->rb3d_blendcntl = buf[i++];
+- ctx->rb3d_depthoffset = buf[i++];
+- ctx->rb3d_depthpitch = buf[i++];
+- ctx->rb3d_zstencilcntl = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+- break;
+- case RADEON_EMIT_PP_CNTL:
+- ctx->pp_cntl = buf[i++];
+- ctx->rb3d_cntl = buf[i++];
+- ctx->rb3d_coloroffset = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+- break;
+- case RADEON_EMIT_RB3D_COLORPITCH:
+- ctx->rb3d_colorpitch = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+- break;
+- case RADEON_EMIT_RE_LINE_PATTERN:
+- ctx->re_line_pattern = buf[i++];
+- ctx->re_line_state = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_LINE;
+- break;
+- case RADEON_EMIT_SE_LINE_WIDTH:
+- ctx->se_line_width = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_LINE;
+- break;
+- case RADEON_EMIT_PP_LUM_MATRIX:
+- ctx->pp_lum_matrix = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
+- break;
+- case RADEON_EMIT_PP_ROT_MATRIX_0:
+- ctx->pp_rot_matrix_0 = buf[i++];
+- ctx->pp_rot_matrix_1 = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
+- break;
+- case RADEON_EMIT_RB3D_STENCILREFMASK:
+- ctx->rb3d_stencilrefmask = buf[i++];
+- ctx->rb3d_ropcntl = buf[i++];
+- ctx->rb3d_planemask = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_MASKS;
+- break;
+- case RADEON_EMIT_SE_VPORT_XSCALE:
+- ctx->se_vport_xscale = buf[i++];
+- ctx->se_vport_xoffset = buf[i++];
+- ctx->se_vport_yscale = buf[i++];
+- ctx->se_vport_yoffset = buf[i++];
+- ctx->se_vport_zscale = buf[i++];
+- ctx->se_vport_zoffset = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_VIEWPORT;
+- break;
+- case RADEON_EMIT_SE_CNTL:
+- ctx->se_cntl = buf[i++];
+- ctx->se_coord_fmt = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT;
+- break;
+- case RADEON_EMIT_SE_CNTL_STATUS:
+- ctx->se_cntl_status = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_SETUP;
+- break;
+- case RADEON_EMIT_RE_MISC:
+- ctx->re_misc = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_MISC;
+- break;
+- case RADEON_EMIT_PP_TXFILTER_0:
+- tex0->pp_txfilter = buf[i++];
+- tex0->pp_txformat = buf[i++];
+- tex0->pp_txoffset = buf[i++];
+- tex0->pp_txcblend = buf[i++];
+- tex0->pp_txablend = buf[i++];
+- tex0->pp_tfactor = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_TEX0;
+- break;
+- case RADEON_EMIT_PP_BORDER_COLOR_0:
+- tex0->pp_border_color = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_TEX0;
+- break;
+- case RADEON_EMIT_PP_TXFILTER_1:
+- tex1->pp_txfilter = buf[i++];
+- tex1->pp_txformat = buf[i++];
+- tex1->pp_txoffset = buf[i++];
+- tex1->pp_txcblend = buf[i++];
+- tex1->pp_txablend = buf[i++];
+- tex1->pp_tfactor = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_TEX1;
+- break;
+- case RADEON_EMIT_PP_BORDER_COLOR_1:
+- tex1->pp_border_color = buf[i++];
+- sarea->dirty |= RADEON_UPLOAD_TEX1;
+- break;
+-
+- case RADEON_EMIT_SE_ZBIAS_FACTOR:
+- i++;
+- i++;
+- break;
+-
+- case RADEON_EMIT_PP_TXFILTER_2:
+- case RADEON_EMIT_PP_BORDER_COLOR_2:
+- case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
+- case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
+- default:
+- /* These states aren't understood by radeon drm 1.1 */
+- fprintf(stderr, "Tried to emit unsupported state\n");
+- return;
+- }
+- }
+-}
+-
+-
+-
+-static void radeonCompatEmitStateLocked( radeonContextPtr rmesa )
+-{
+- struct radeon_state_atom *atom;
+-
+- if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
+- return;
+-
+- foreach(atom, &rmesa->hw.atomlist) {
+- if (rmesa->hw.all_dirty)
+- atom->dirty = GL_TRUE;
+- if (atom->is_tcl)
+- atom->dirty = GL_FALSE;
+- if (atom->dirty)
+- radeonCompatEmitPacket(rmesa, atom);
+- }
+-
+- rmesa->hw.is_dirty = GL_FALSE;
+- rmesa->hw.all_dirty = GL_FALSE;
+-}
+-
+-
+-static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa,
+- GLuint hw_primitive,
+- GLuint nverts,
+- drm_clip_rect_t *pbox,
+- GLuint nbox )
+-{
+- int i;
+-
+- for ( i = 0 ; i < nbox ; ) {
+- int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox );
+- drm_clip_rect_t *b = rmesa->sarea->boxes;
+- drm_radeon_vertex_t vtx;
+-
+- rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS;
+- rmesa->sarea->nbox = nr - i;
+-
+- for ( ; i < nr ; i++)
+- *b++ = pbox[i];
+-
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr,
+- "RadeonFlushVertexBuffer: prim %x buf %d verts %d "
+- "disc %d nbox %d\n",
+- hw_primitive,
+- rmesa->dma.current.buf->buf->idx,
+- nverts,
+- nr == nbox,
+- rmesa->sarea->nbox );
+-
+- vtx.prim = hw_primitive;
+- vtx.idx = rmesa->dma.current.buf->buf->idx;
+- vtx.count = nverts;
+- vtx.discard = (nr == nbox);
+-
+- drmCommandWrite( rmesa->dri.fd,
+- DRM_RADEON_VERTEX,
+- &vtx, sizeof(vtx));
+- }
+-}
+-
+-
+-
+-/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer!
+- */
+-void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
+- GLuint vertex_format,
+- GLuint hw_primitive,
+- GLuint nrverts )
+-{
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- LOCK_HARDWARE( rmesa );
+-
+- radeonCompatEmitStateLocked( rmesa );
+- rmesa->sarea->vc_format = vertex_format;
+-
+- if (rmesa->state.scissor.enabled) {
+- radeonCompatEmitPrimitiveLocked( rmesa,
+- hw_primitive,
+- nrverts,
+- rmesa->state.scissor.pClipRects,
+- rmesa->state.scissor.numClipRects );
+- }
+- else {
+- radeonCompatEmitPrimitiveLocked( rmesa,
+- hw_primitive,
+- nrverts,
+- rmesa->pClipRects,
+- rmesa->numClipRects );
+- }
+-
+-
+- UNLOCK_HARDWARE( rmesa );
+-}
+-
+diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
+index 1e992c0..e4202c7 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_context.c
++++ b/src/mesa/drivers/dri/radeon/radeon_context.c
+@@ -53,6 +53,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #include "drivers/common/driverfuncs.h"
+
++#include "radeon_common.h"
+ #include "radeon_context.h"
+ #include "radeon_ioctl.h"
+ #include "radeon_state.h"
+@@ -62,9 +63,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_tcl.h"
+ #include "radeon_maos.h"
+
+-#define need_GL_ARB_multisample
+-#define need_GL_ARB_texture_compression
+-#define need_GL_ARB_vertex_buffer_object
+ #define need_GL_EXT_blend_minmax
+ #define need_GL_EXT_fog_coord
+ #define need_GL_EXT_secondary_color
+@@ -75,55 +73,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "vblank.h"
+ #include "utils.h"
+ #include "xmlpool.h" /* for symbolic values of enum-type options */
+-#ifndef RADEON_DEBUG
+-int RADEON_DEBUG = (0);
+-#endif
+-
+-
+-/* Return various strings for glGetString().
+- */
+-static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- static char buffer[128];
+- unsigned offset;
+- GLuint agp_mode = (rmesa->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
+- rmesa->radeonScreen->AGPMode;
+-
+- switch ( name ) {
+- case GL_VENDOR:
+- return (GLubyte *)"Tungsten Graphics, Inc.";
+-
+- case GL_RENDERER:
+- offset = driGetRendererString( buffer, "Radeon", DRIVER_DATE,
+- agp_mode );
+-
+- sprintf( & buffer[ offset ], " %sTCL",
+- !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+- ? "" : "NO-" );
+-
+- return (GLubyte *)buffer;
+-
+- default:
+- return NULL;
+- }
+-}
+-
+
+ /* Extension strings exported by the R100 driver.
+ */
+ const struct dri_extension card_extensions[] =
+ {
+- { "GL_ARB_multisample", GL_ARB_multisample_functions },
+ { "GL_ARB_multitexture", NULL },
+ { "GL_ARB_texture_border_clamp", NULL },
+- { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions },
+ { "GL_ARB_texture_env_add", NULL },
+ { "GL_ARB_texture_env_combine", NULL },
+ { "GL_ARB_texture_env_crossbar", NULL },
+ { "GL_ARB_texture_env_dot3", NULL },
+ { "GL_ARB_texture_mirrored_repeat", NULL },
+- { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions },
+ { "GL_EXT_blend_logic_op", NULL },
+ { "GL_EXT_blend_subtract", GL_EXT_blend_minmax_functions },
+ { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+@@ -166,15 +127,6 @@ static const struct tnl_pipeline_stage *radeon_pipeline[] = {
+ NULL,
+ };
+
+-
+-
+-/* Initialize the driver's misc functions.
+- */
+-static void radeonInitDriverFuncs( struct dd_function_table *functions )
+-{
+- functions->GetString = radeonGetString;
+-}
+-
+ static const struct dri_debug_control debug_control[] =
+ {
+ { "fall", DEBUG_FALLBACKS },
+@@ -194,6 +146,51 @@ static const struct dri_debug_control debug_control[] =
+ { NULL, 0 }
+ };
+
++static void r100_get_lock(radeonContextPtr radeon)
++{
++ r100ContextPtr rmesa = (r100ContextPtr)radeon;
++ drm_radeon_sarea_t *sarea = radeon->sarea;
++
++ RADEON_STATECHANGE(rmesa, ctx);
++ if (rmesa->radeon.sarea->tiling_enabled) {
++ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
++ RADEON_COLOR_TILE_ENABLE;
++ } else {
++ rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
++ ~RADEON_COLOR_TILE_ENABLE;
++ }
++
++ if (sarea->ctx_owner != rmesa->radeon.dri.hwContext) {
++ sarea->ctx_owner = rmesa->radeon.dri.hwContext;
++
++ if (!radeon->radeonScreen->kernel_mm)
++ radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
++ }
++}
++
++static void r100_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
++{
++}
++
++static void r100_vtbl_pre_emit_state(radeonContextPtr radeon)
++{
++ r100ContextPtr rmesa = (r100ContextPtr)radeon;
++
++ /* r100 always needs to emit ZBS to avoid TCL lockups */
++ rmesa->hw.zbs.dirty = 1;
++ radeon->hw.is_dirty = 1;
++}
++
++
++static void r100_init_vtbl(radeonContextPtr radeon)
++{
++ radeon->vtbl.get_lock = r100_get_lock;
++ radeon->vtbl.update_viewport_offset = radeonUpdateViewportOffset;
++ radeon->vtbl.update_draw_buffer = radeonUpdateDrawBuffer;
++ radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header;
++ radeon->vtbl.swtcl_flush = r100_swtcl_flush;
++ radeon->vtbl.pre_emit_state = r100_vtbl_pre_emit_state;
++}
+
+ /* Create the device specific context.
+ */
+@@ -205,8 +202,8 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+ __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+ radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
+ struct dd_function_table functions;
+- radeonContextPtr rmesa;
+- GLcontext *ctx, *shareCtx;
++ r100ContextPtr rmesa;
++ GLcontext *ctx;
+ int i;
+ int tcl_mode, fthrottle_mode;
+
+@@ -215,10 +212,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+ assert(screen);
+
+ /* Allocate the Radeon context */
+- rmesa = (radeonContextPtr) CALLOC( sizeof(*rmesa) );
++ rmesa = (r100ContextPtr) CALLOC( sizeof(*rmesa) );
+ if ( !rmesa )
+ return GL_FALSE;
+
++ r100_init_vtbl(&rmesa->radeon);
++
+ /* init exp fog table data */
+ radeonInitStaticFogData();
+
+@@ -226,12 +225,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+ * Do this here so that initialMaxAnisotropy is set before we create
+ * the default textures.
+ */
+- driParseConfigFiles (&rmesa->optionCache, &screen->optionCache,
++ driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
+ screen->driScreen->myNum, "radeon");
+- rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
++ rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
+ "def_max_anisotropy");
+
+- if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
++ if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
+ if ( sPriv->drm_version.minor < 13 )
+ fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
+ "disabling.\n", sPriv->drm_version.minor );
+@@ -246,65 +245,23 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+ * (the texture functions are especially important)
+ */
+ _mesa_init_driver_functions( &functions );
+- radeonInitDriverFuncs( &functions );
+ radeonInitTextureFuncs( &functions );
+
+- /* Allocate the Mesa context */
+- if (sharedContextPrivate)
+- shareCtx = ((radeonContextPtr) sharedContextPrivate)->glCtx;
+- else
+- shareCtx = NULL;
+- rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
+- &functions, (void *) rmesa);
+- if (!rmesa->glCtx) {
+- FREE(rmesa);
+- return GL_FALSE;
+- }
+- driContextPriv->driverPrivate = rmesa;
+-
+- /* Init radeon context data */
+- rmesa->dri.context = driContextPriv;
+- rmesa->dri.screen = sPriv;
+- rmesa->dri.drawable = NULL;
+- rmesa->dri.readable = NULL;
+- rmesa->dri.hwContext = driContextPriv->hHWContext;
+- rmesa->dri.hwLock = &sPriv->pSAREA->lock;
+- rmesa->dri.fd = sPriv->fd;
+- rmesa->dri.drmMinor = sPriv->drm_version.minor;
+-
+- rmesa->radeonScreen = screen;
+- rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
+- screen->sarea_priv_offset);
+-
+-
+- rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address;
+-
+- (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
+- make_empty_list( & rmesa->swapped );
+-
+- rmesa->nr_heaps = screen->numTexHeaps;
+- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+- rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
+- screen->texSize[i],
+- 12,
+- RADEON_NR_TEX_REGIONS,
+- (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
+- & rmesa->sarea->tex_age[i],
+- & rmesa->swapped,
+- sizeof( radeonTexObj ),
+- (destroy_texture_object_t *) radeonDestroyTexObj );
+-
+- driSetTextureSwapCounterLocation( rmesa->texture_heaps[i],
+- & rmesa->c_textureSwaps );
++ if (!radeonInitContext(&rmesa->radeon, &functions,
++ glVisual, driContextPriv,
++ sharedContextPrivate)) {
++ FREE(rmesa);
++ return GL_FALSE;
+ }
+- rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
++
++ rmesa->radeon.texture_depth = driQueryOptioni (&rmesa->radeon.optionCache,
+ "texture_depth");
+- if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+- rmesa->texture_depth = ( screen->cpp == 4 ) ?
++ if (rmesa->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
++ rmesa->radeon.texture_depth = ( screen->cpp == 4 ) ?
+ DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+
+- rmesa->swtcl.RenderIndex = ~0;
+- rmesa->hw.all_dirty = GL_TRUE;
++ rmesa->radeon.swtcl.RenderIndex = ~0;
++ rmesa->radeon.hw.all_dirty = GL_TRUE;
+
+ /* Set the maximum texture size small enough that we can guarentee that
+ * all texture units can bind a maximal texture and have all of them in
+@@ -312,26 +269,13 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+ * setting allow larger textures.
+ */
+
+- ctx = rmesa->glCtx;
+- ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache,
++ ctx = rmesa->radeon.glCtx;
++ ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
+ "texture_units");
+ ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+ ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+
+- i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures");
+-
+- driCalculateMaxTextureLevels( rmesa->texture_heaps,
+- rmesa->nr_heaps,
+- & ctx->Const,
+- 4,
+- 11, /* max 2D texture size is 2048x2048 */
+- 8, /* 256^3 */
+- 9, /* \todo: max cube texture size seems to be 512x512(x6) */
+- 11, /* max rect texture size is 2048x2048. */
+- 12,
+- GL_FALSE,
+- i );
+-
++ i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
+
+ ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
+@@ -359,6 +303,8 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+
+ rmesa->boxes = 0;
+
++ ctx->Const.MaxDrawBuffers = 1;
++
+ /* Initialize the software rasterizer and helper modules.
+ */
+ _swrast_CreateContext( ctx );
+@@ -392,38 +338,38 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+ }
+
+ driInitExtensions( ctx, card_extensions, GL_TRUE );
+- if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
++ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
+ _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
+- if (rmesa->glCtx->Mesa_DXTn) {
++ if (rmesa->radeon.glCtx->Mesa_DXTn) {
+ _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+ _mesa_enable_extension( ctx, "GL_S3_s3tc" );
+ }
+- else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) {
++ else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
+ _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+ }
+
+- if (rmesa->dri.drmMinor >= 9)
++ if (rmesa->radeon.dri.drmMinor >= 9)
+ _mesa_enable_extension( ctx, "GL_NV_texture_rectangle");
+
+ /* XXX these should really go right after _mesa_init_driver_functions() */
++ radeonInitSpanFuncs( ctx );
+ radeonInitIoctlFuncs( ctx );
+ radeonInitStateFuncs( ctx );
+- radeonInitSpanFuncs( ctx );
+ radeonInitState( rmesa );
+ radeonInitSwtcl( ctx );
+
+ _mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0,
+ ctx->Const.MaxArrayLockSize, 32 );
+
+- fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode");
+- rmesa->iw.irq_seq = -1;
+- rmesa->irqsEmitted = 0;
+- rmesa->do_irqs = (rmesa->radeonScreen->irq != 0 &&
+- fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
++ fthrottle_mode = driQueryOptioni(&rmesa->radeon.optionCache, "fthrottle_mode");
++ rmesa->radeon.iw.irq_seq = -1;
++ rmesa->radeon.irqsEmitted = 0;
++ rmesa->radeon.do_irqs = (rmesa->radeon.radeonScreen->irq != 0 &&
++ fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
+
+- rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
++ rmesa->radeon.do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+
+- (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
++ (*sPriv->systemTime->getUST)( & rmesa->radeon.swap_ust );
+
+
+ #if DO_DEBUG
+@@ -431,20 +377,20 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+ debug_control );
+ #endif
+
+- tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode");
+- if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
++ tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
++ if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
+ fprintf(stderr, "disabling 3D acceleration\n");
+ FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1);
+ } else if (tcl_mode == DRI_CONF_TCL_SW ||
+- !(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+- rmesa->radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
++ !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
++ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
++ rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
+ fprintf(stderr, "Disabling HW TCL support\n");
+ }
+- TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
++ TCL_FALLBACK(rmesa->radeon.glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
+ }
+
+- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
++ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ /* _tnl_need_dlist_norm_lengths( ctx, GL_FALSE ); */
+ }
+ return GL_TRUE;
+@@ -458,179 +404,41 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+ void radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
+ {
+ GET_CURRENT_CONTEXT(ctx);
+- radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
+- radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
++ r100ContextPtr rmesa = (r100ContextPtr) driContextPriv->driverPrivate;
++ r100ContextPtr current = ctx ? R100_CONTEXT(ctx) : NULL;
+
+ /* check if we're deleting the currently bound context */
+ if (rmesa == current) {
+- RADEON_FIREVERTICES( rmesa );
++ radeon_firevertices(&rmesa->radeon);
+ _mesa_make_current(NULL, NULL, NULL);
+ }
+
+ /* Free radeon context resources */
+ assert(rmesa); /* should never be null */
+ if ( rmesa ) {
+- GLboolean release_texture_heaps;
+-
+
+- release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
+- _swsetup_DestroyContext( rmesa->glCtx );
+- _tnl_DestroyContext( rmesa->glCtx );
+- _vbo_DestroyContext( rmesa->glCtx );
+- _swrast_DestroyContext( rmesa->glCtx );
++ _swsetup_DestroyContext( rmesa->radeon.glCtx );
++ _tnl_DestroyContext( rmesa->radeon.glCtx );
++ _vbo_DestroyContext( rmesa->radeon.glCtx );
++ _swrast_DestroyContext( rmesa->radeon.glCtx );
+
+- radeonDestroySwtcl( rmesa->glCtx );
+- radeonReleaseArrays( rmesa->glCtx, ~0 );
+- if (rmesa->dma.current.buf) {
+- radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
++ radeonDestroySwtcl( rmesa->radeon.glCtx );
++ radeonReleaseArrays( rmesa->radeon.glCtx, ~0 );
++ if (rmesa->radeon.dma.current) {
++ radeonReleaseDmaRegion( &rmesa->radeon );
++ rcommonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ );
+ }
+
+ _mesa_vector4f_free( &rmesa->tcl.ObjClean );
+
+- if (rmesa->state.scissor.pClipRects) {
+- FREE(rmesa->state.scissor.pClipRects);
+- rmesa->state.scissor.pClipRects = NULL;
+- }
+-
+- if ( release_texture_heaps ) {
+- /* This share group is about to go away, free our private
+- * texture object data.
+- */
+- int i;
+-
+- for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+- driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
+- rmesa->texture_heaps[ i ] = NULL;
+- }
+-
+- assert( is_empty_list( & rmesa->swapped ) );
++ if (rmesa->radeon.state.scissor.pClipRects) {
++ FREE(rmesa->radeon.state.scissor.pClipRects);
++ rmesa->radeon.state.scissor.pClipRects = NULL;
+ }
+
+- /* free the Mesa context */
+- rmesa->glCtx->DriverCtx = NULL;
+- _mesa_destroy_context( rmesa->glCtx );
+-
+- /* free the option cache */
+- driDestroyOptionCache (&rmesa->optionCache);
++ radeonCleanupContext(&rmesa->radeon);
+
+ FREE( rmesa );
+ }
+ }
+
+-
+-
+-
+-void
+-radeonSwapBuffers( __DRIdrawablePrivate *dPriv )
+-{
+-
+- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+- radeonContextPtr rmesa;
+- GLcontext *ctx;
+- rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+- ctx = rmesa->glCtx;
+- if (ctx->Visual.doubleBufferMode) {
+- _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */
+-
+- if ( rmesa->doPageFlip ) {
+- radeonPageFlip( dPriv );
+- }
+- else {
+- radeonCopyBuffer( dPriv, NULL );
+- }
+- }
+- }
+- else {
+- /* XXX this shouldn't be an error but we can't handle it for now */
+- _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+- }
+-}
+-
+-void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+- int x, int y, int w, int h )
+-{
+- if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+- radeonContextPtr radeon;
+- GLcontext *ctx;
+-
+- radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+- ctx = radeon->glCtx;
+-
+- if (ctx->Visual.doubleBufferMode) {
+- drm_clip_rect_t rect;
+- rect.x1 = x + dPriv->x;
+- rect.y1 = (dPriv->h - y - h) + dPriv->y;
+- rect.x2 = rect.x1 + w;
+- rect.y2 = rect.y1 + h;
+- _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */
+- radeonCopyBuffer(dPriv, &rect);
+- }
+- } else {
+- /* XXX this shouldn't be an error but we can't handle it for now */
+- _mesa_problem(NULL, "%s: drawable has no context!",
+- __FUNCTION__);
+- }
+-}
+-
+-/* Make context `c' the current context and bind it to the given
+- * drawing and reading surfaces.
+- */
+-GLboolean
+-radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
+- __DRIdrawablePrivate *driDrawPriv,
+- __DRIdrawablePrivate *driReadPriv )
+-{
+- if ( driContextPriv ) {
+- radeonContextPtr newCtx =
+- (radeonContextPtr) driContextPriv->driverPrivate;
+-
+- if (RADEON_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) newCtx->glCtx);
+-
+- newCtx->dri.readable = driReadPriv;
+-
+- if ( (newCtx->dri.drawable != driDrawPriv) ||
+- newCtx->lastStamp != driDrawPriv->lastStamp ) {
+- if (driDrawPriv->swap_interval == (unsigned)-1) {
+- driDrawPriv->vblFlags = (newCtx->radeonScreen->irq != 0)
+- ? driGetDefaultVBlankFlags(&newCtx->optionCache)
+- : VBLANK_FLAG_NO_IRQ;
+-
+- driDrawableInitVBlank( driDrawPriv );
+- }
+-
+- newCtx->dri.drawable = driDrawPriv;
+-
+- radeonSetCliprects(newCtx);
+- radeonUpdateViewportOffset( newCtx->glCtx );
+- }
+-
+- _mesa_make_current( newCtx->glCtx,
+- (GLframebuffer *) driDrawPriv->driverPrivate,
+- (GLframebuffer *) driReadPriv->driverPrivate );
+-
+- _mesa_update_state( newCtx->glCtx );
+- } else {
+- if (RADEON_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+- _mesa_make_current( NULL, NULL, NULL );
+- }
+-
+- if (RADEON_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "End %s\n", __FUNCTION__);
+- return GL_TRUE;
+-}
+-
+-/* Force the context `c' to be unbound from its buffer.
+- */
+-GLboolean
+-radeonUnbindContext( __DRIcontextPrivate *driContextPriv )
+-{
+- radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
+-
+- if (RADEON_DEBUG & DEBUG_DRI)
+- fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) rmesa->glCtx);
+-
+- return GL_TRUE;
+-}
+diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
+index 53df766..2efabd1 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_context.h
++++ b/src/mesa/drivers/dri/radeon/radeon_context.h
+@@ -48,91 +48,23 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "drm.h"
+ #include "radeon_drm.h"
+ #include "texmem.h"
+-
+ #include "main/macros.h"
+ #include "main/mtypes.h"
+ #include "main/colormac.h"
+-
+-struct radeon_context;
+-typedef struct radeon_context radeonContextRec;
+-typedef struct radeon_context *radeonContextPtr;
+-
+-/* This union is used to avoid warnings/miscompilation
+- with float to uint32_t casts due to strict-aliasing */
+-typedef union {
+- GLfloat f;
+- uint32_t ui32;
+-} float_ui32_type;
+-
+-#include "radeon_lock.h"
+ #include "radeon_screen.h"
+-#include "main/mm.h"
+-
+-#include "math/m_vector.h"
+-
+-#define TEX_0 0x1
+-#define TEX_1 0x2
+-#define TEX_2 0x4
+-#define TEX_ALL 0x7
+-
+-/* Rasterizing fallbacks */
+-/* See correponding strings in r200_swtcl.c */
+-#define RADEON_FALLBACK_TEXTURE 0x0001
+-#define RADEON_FALLBACK_DRAW_BUFFER 0x0002
+-#define RADEON_FALLBACK_STENCIL 0x0004
+-#define RADEON_FALLBACK_RENDER_MODE 0x0008
+-#define RADEON_FALLBACK_BLEND_EQ 0x0010
+-#define RADEON_FALLBACK_BLEND_FUNC 0x0020
+-#define RADEON_FALLBACK_DISABLE 0x0040
+-#define RADEON_FALLBACK_BORDER_MODE 0x0080
+-
+-/* The blit width for texture uploads
+- */
+-#define BLIT_WIDTH_BYTES 1024
+
+-/* Use the templated vertex format:
+- */
+-#define COLOR_IS_RGBA
+-#define TAG(x) radeon##x
+-#include "tnl_dd/t_dd_vertex.h"
+-#undef TAG
+-
+-typedef void (*radeon_tri_func) (radeonContextPtr,
+- radeonVertex *,
+- radeonVertex *, radeonVertex *);
+-
+-typedef void (*radeon_line_func) (radeonContextPtr,
+- radeonVertex *, radeonVertex *);
++#include "radeon_common.h"
+
+-typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
+-
+-struct radeon_colorbuffer_state {
+- GLuint clear;
+- int roundEnable;
+-};
+
+-struct radeon_depthbuffer_state {
+- GLuint clear;
+- GLfloat scale;
+-};
++struct r100_context;
++typedef struct r100_context r100ContextRec;
++typedef struct r100_context *r100ContextPtr;
+
+-struct radeon_scissor_state {
+- drm_clip_rect_t rect;
+- GLboolean enabled;
++#include "radeon_lock.h"
+
+- GLuint numClipRects; /* Cliprects active */
+- GLuint numAllocedClipRects; /* Cliprects available */
+- drm_clip_rect_t *pClipRects;
+-};
+
+-struct radeon_stencilbuffer_state {
+- GLboolean hwBuffer;
+- GLuint clear; /* rb3d_stencilrefmask value */
+-};
+
+-struct radeon_stipple_state {
+- GLuint mask[32];
+-};
++#define R100_TEX_ALL 0x7
+
+ /* used for both tcl_vtx and vc_frmt tex bits (they are identical) */
+ #define RADEON_ST_BIT(unit) \
+@@ -141,42 +73,6 @@ struct radeon_stipple_state {
+ #define RADEON_Q_BIT(unit) \
+ (unit == 0 ? RADEON_CP_VC_FRMT_Q0 : (RADEON_CP_VC_FRMT_Q1 >> 2) << (2 * unit))
+
+-typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
+-
+-/* Texture object in locally shared texture space.
+- */
+-struct radeon_tex_obj {
+- driTextureObject base;
+-
+- GLuint bufAddr; /* Offset to start of locally
+- shared texture block */
+-
+- GLuint dirty_state; /* Flags (1 per texunit) for
+- whether or not this texobj
+- has dirty hardware state
+- (pp_*) that needs to be
+- brought into the
+- texunit. */
+-
+- drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
+- /* Six, for the cube faces */
+-
+- GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
+-
+- GLuint pp_txfilter; /* hardware register values */
+- GLuint pp_txformat;
+- GLuint pp_txoffset; /* Image location in texmem.
+- All cube faces follow. */
+- GLuint pp_txsize; /* npot only */
+- GLuint pp_txpitch; /* npot only */
+- GLuint pp_border_color;
+- GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */
+-
+- GLboolean border_fallback;
+-
+- GLuint tile_bits; /* hw texture tile bits used on this texture */
+-};
+-
+ struct radeon_texture_env_state {
+ radeonTexObjPtr texobj;
+ GLenum format;
+@@ -187,17 +83,6 @@ struct radeon_texture_state {
+ struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS];
+ };
+
+-struct radeon_state_atom {
+- struct radeon_state_atom *next, *prev;
+- const char *name; /* for debug */
+- int cmd_size; /* size in bytes */
+- GLuint is_tcl;
+- int *cmd; /* one or more cmd's */
+- int *lastcmd; /* one or more cmd's */
+- GLboolean dirty; /* dirty-mark in emit_state_list */
+- GLboolean(*check) (GLcontext *); /* is this state active? */
+-};
+-
+ /* Trying to keep these relatively short as the variables are becoming
+ * extravagently long. Drop the driver name prefix off the front of
+ * everything - I think we know which driver we're in by now, and keep the
+@@ -410,10 +295,7 @@ struct radeon_state_atom {
+ #define SHN_SHININESS 1
+ #define SHN_STATE_SIZE 2
+
+-struct radeon_hw_state {
+- /* Head of the linked list of state atoms. */
+- struct radeon_state_atom atomlist;
+-
++struct r100_hw_state {
+ /* Hardware state, stored as cmdbuf commands:
+ * -- Need to doublebuffer for
+ * - eliding noop statechange loops? (except line stipple count)
+@@ -438,86 +320,16 @@ struct radeon_hw_state {
+ struct radeon_state_atom glt;
+ struct radeon_state_atom txr[3]; /* for NPOT */
+
+- int max_state_size; /* Number of bytes necessary for a full state emit. */
+- GLboolean is_dirty, all_dirty;
+ };
+
+-struct radeon_state {
+- /* Derived state for internal purposes:
+- */
+- struct radeon_colorbuffer_state color;
+- struct radeon_depthbuffer_state depth;
+- struct radeon_scissor_state scissor;
+- struct radeon_stencilbuffer_state stencil;
++
++struct r100_state {
+ struct radeon_stipple_state stipple;
+ struct radeon_texture_state texture;
+ };
+
+-/* Need refcounting on dma buffers:
+- */
+-struct radeon_dma_buffer {
+- int refcount; /* the number of retained regions in buf */
+- drmBufPtr buf;
+-};
+-
+-#define GET_START(rvb) (rmesa->radeonScreen->gart_buffer_offset + \
+- (rvb)->address - rmesa->dma.buf0_address + \
+- (rvb)->start)
+-
+-/* A retained region, eg vertices for indexed vertices.
+- */
+-struct radeon_dma_region {
+- struct radeon_dma_buffer *buf;
+- char *address; /* == buf->address */
+- int start, end, ptr; /* offsets from start of buf */
+- int aos_start;
+- int aos_stride;
+- int aos_size;
+-};
+-
+-struct radeon_dma {
+- /* Active dma region. Allocations for vertices and retained
+- * regions come from here. Also used for emitting random vertices,
+- * these may be flushed by calling flush_current();
+- */
+- struct radeon_dma_region current;
+-
+- void (*flush) (radeonContextPtr);
+-
+- char *buf0_address; /* start of buf[0], for index calcs */
+- GLuint nr_released_bufs; /* flush after so many buffers released */
+-};
+-
+-struct radeon_dri_mirror {
+- __DRIcontextPrivate *context; /* DRI context */
+- __DRIscreenPrivate *screen; /* DRI screen */
+-
+- /**
+- * DRI drawable bound to this context for drawing.
+- */
+- __DRIdrawablePrivate *drawable;
+-
+- /**
+- * DRI drawable bound to this context for reading.
+- */
+- __DRIdrawablePrivate *readable;
+-
+- drm_context_t hwContext;
+- drm_hw_lock_t *hwLock;
+- int fd;
+- int drmMinor;
+-};
+-
+ #define RADEON_CMD_BUF_SZ (8*1024)
+-
+-struct radeon_store {
+- GLuint statenr;
+- GLuint primnr;
+- char cmd_buf[RADEON_CMD_BUF_SZ];
+- int cmd_used;
+- int elts_start;
+-};
+-
++#define R200_ELT_BUF_SZ (8*1024)
+ /* radeon_tcl.c
+ */
+ struct radeon_tcl_info {
+@@ -529,30 +341,23 @@ struct radeon_tcl_info {
+ */
+ GLvector4f ObjClean;
+
+- struct radeon_dma_region *aos_components[8];
++ struct radeon_aos aos[8];
+ GLuint nr_aos_components;
+
+ GLuint *Elts;
+
+- struct radeon_dma_region indexed_verts;
+- struct radeon_dma_region obj;
+- struct radeon_dma_region rgba;
+- struct radeon_dma_region spec;
+- struct radeon_dma_region fog;
+- struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS];
+- struct radeon_dma_region norm;
++ struct radeon_bo *indexed_bo;
++
++ int elt_cmd_offset; /** Offset into the cmdbuf */
++ int elt_cmd_start;
++ int elt_used;
+ };
+
+ /* radeon_swtcl.c
+ */
+-struct radeon_swtcl_info {
+- GLuint RenderIndex;
+- GLuint vertex_size;
++struct r100_swtcl_info {
+ GLuint vertex_format;
+
+- struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+- GLuint vertex_attr_count;
+-
+ GLubyte *verts;
+
+ /* Fallback rasterization functions
+@@ -561,10 +366,6 @@ struct radeon_swtcl_info {
+ radeon_line_func draw_line;
+ radeon_tri_func draw_tri;
+
+- GLuint hw_primitive;
+- GLenum render_primitive;
+- GLuint numverts;
+-
+ /**
+ * Offset of the 4UB color data within a hardware (swtcl) vertex.
+ */
+@@ -576,22 +377,9 @@ struct radeon_swtcl_info {
+ GLuint specoffset;
+
+ GLboolean needproj;
+-
+- struct radeon_dma_region indexed_verts;
+ };
+
+-struct radeon_ioctl {
+- GLuint vertex_offset;
+- GLuint vertex_size;
+-};
+
+-#define RADEON_MAX_PRIMS 64
+-
+-struct radeon_prim {
+- GLuint start;
+- GLuint end;
+- GLuint prim;
+-};
+
+ /* A maximum total of 20 elements per vertex: 3 floats for position, 3
+ * floats for normal, 4 floats for color, 4 bytes for secondary color,
+@@ -602,59 +390,18 @@ struct radeon_prim {
+ */
+ #define RADEON_MAX_VERTEX_SIZE 20
+
+-struct radeon_context {
+- GLcontext *glCtx; /* Mesa context */
++struct r100_context {
++ struct radeon_context radeon;
+
+ /* Driver and hardware state management
+ */
+- struct radeon_hw_state hw;
+- struct radeon_state state;
+-
+- /* Texture object bookkeeping
+- */
+- unsigned nr_heaps;
+- driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
+- driTextureObject swapped;
+- int texture_depth;
+- float initialMaxAnisotropy;
+-
+- /* Rasterization and vertex state:
+- */
+- GLuint TclFallback;
+- GLuint Fallback;
+- GLuint NewGLState;
+- DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */
++ struct r100_hw_state hw;
++ struct r100_state state;
+
+ /* Vertex buffers
+ */
+ struct radeon_ioctl ioctl;
+- struct radeon_dma dma;
+ struct radeon_store store;
+- /* A full state emit as of the first state emit in the main store, in case
+- * the context is lost.
+- */
+- struct radeon_store backup_store;
+-
+- /* Page flipping
+- */
+- GLuint doPageFlip;
+-
+- /* Busy waiting
+- */
+- GLuint do_usleeps;
+- GLuint do_irqs;
+- GLuint irqsEmitted;
+- drm_radeon_irq_wait_t iw;
+-
+- /* Drawable, cliprect and scissor information
+- */
+- GLuint numClipRects; /* Cliprects for the draw buffer */
+- drm_clip_rect_t *pClipRects;
+- unsigned int lastStamp;
+- GLboolean lost_context;
+- GLboolean save_on_next_emit;
+- radeonScreenPtr radeonScreen; /* Screen private DRI data */
+- drm_radeon_sarea_t *sarea; /* Private SAREA data */
+
+ /* TCL stuff
+ */
+@@ -667,29 +414,13 @@ struct radeon_context {
+ GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS];
+ GLuint last_ReallyEnabled;
+
+- /* VBI
+- */
+- int64_t swap_ust;
+- int64_t swap_missed_ust;
+-
+- GLuint swap_count;
+- GLuint swap_missed_count;
+-
+ /* radeon_tcl.c
+ */
+ struct radeon_tcl_info tcl;
+
+ /* radeon_swtcl.c
+ */
+- struct radeon_swtcl_info swtcl;
+-
+- /* Mirrors of some DRI state
+- */
+- struct radeon_dri_mirror dri;
+-
+- /* Configuration cache
+- */
+- driOptionCache optionCache;
++ struct r100_swtcl_info swtcl;
+
+ GLboolean using_hyperz;
+ GLboolean texmicrotile;
+@@ -703,23 +434,11 @@ struct radeon_context {
+ GLuint c_textureSwaps;
+ GLuint c_textureBytes;
+ GLuint c_vertexBuffers;
++
+ };
+
+-#define RADEON_CONTEXT(ctx) ((radeonContextPtr)(ctx->DriverCtx))
+-
+-static INLINE GLuint radeonPackColor(GLuint cpp,
+- GLubyte r, GLubyte g,
+- GLubyte b, GLubyte a)
+-{
+- switch (cpp) {
+- case 2:
+- return PACK_COLOR_565(r, g, b);
+- case 4:
+- return PACK_COLOR_8888(a, r, g, b);
+- default:
+- return 0;
+- }
+-}
++#define R100_CONTEXT(ctx) ((r100ContextPtr)(ctx->DriverCtx))
++
+
+ #define RADEON_OLD_PACKETS 1
+
+@@ -727,37 +446,11 @@ extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv);
+ extern GLboolean radeonCreateContext(const __GLcontextModes * glVisual,
+ __DRIcontextPrivate * driContextPriv,
+ void *sharedContextPrivate);
+-extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
+-extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+- int x, int y, int w, int h);
+ extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+ __DRIdrawablePrivate * driDrawPriv,
+ __DRIdrawablePrivate * driReadPriv);
+ extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
+
+-/* ================================================================
+- * Debugging:
+- */
+-#define DO_DEBUG 1
+-
+-#if DO_DEBUG
+-extern int RADEON_DEBUG;
+-#else
+-#define RADEON_DEBUG 0
+-#endif
+-
+-#define DEBUG_TEXTURE 0x0001
+-#define DEBUG_STATE 0x0002
+-#define DEBUG_IOCTL 0x0004
+-#define DEBUG_PRIMS 0x0008
+-#define DEBUG_VERTS 0x0010
+-#define DEBUG_FALLBACKS 0x0020
+-#define DEBUG_VFMT 0x0040
+-#define DEBUG_CODEGEN 0x0080
+-#define DEBUG_VERBOSE 0x0100
+-#define DEBUG_DRI 0x0200
+-#define DEBUG_DMA 0x0400
+-#define DEBUG_SANITY 0x0800
+-#define DEBUG_SYNC 0x1000
++
+
+ #endif /* __RADEON_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_drm.h b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h
new file mode 100644
index 0000000..984725a
@@ -4516,6 +25387,2480 @@ index 0000000..cee3744
+
+void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize);
+#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+index 09acf6b..b5ab923 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
++++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "swrast/swrast.h"
+
+ #include "radeon_context.h"
++#include "radeon_common.h"
+ #include "radeon_state.h"
+ #include "radeon_ioctl.h"
+ #include "radeon_tcl.h"
+@@ -58,75 +59,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define RADEON_IDLE_RETRY 16
+
+
+-static void radeonWaitForIdle( radeonContextPtr rmesa );
+-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
+- const char * caller );
+-
+-static void print_state_atom( struct radeon_state_atom *state )
+-{
+- int i;
+-
+- fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
+-
+- if (RADEON_DEBUG & DEBUG_VERBOSE)
+- for (i = 0 ; i < state->cmd_size ; i++)
+- fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
+-
+-}
+-
+-static void radeonSaveHwState( radeonContextPtr rmesa )
+-{
+- struct radeon_state_atom *atom;
+- char * dest = rmesa->backup_store.cmd_buf;
+-
+- if (RADEON_DEBUG & DEBUG_STATE)
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- rmesa->backup_store.cmd_used = 0;
+-
+- foreach( atom, &rmesa->hw.atomlist ) {
+- if ( atom->check( rmesa->glCtx ) ) {
+- int size = atom->cmd_size * 4;
+- memcpy( dest, atom->cmd, size);
+- dest += size;
+- rmesa->backup_store.cmd_used += size;
+- if (RADEON_DEBUG & DEBUG_STATE)
+- print_state_atom( atom );
+- }
+- }
+-
+- assert( rmesa->backup_store.cmd_used <= RADEON_CMD_BUF_SZ );
+- if (RADEON_DEBUG & DEBUG_STATE)
+- fprintf(stderr, "Returning to radeonEmitState\n");
+-}
+-
+-/* At this point we were in FlushCmdBufLocked but we had lost our context, so
+- * we need to unwire our current cmdbuf, hook the one with the saved state in
+- * it, flush it, and then put the current one back. This is so commands at the
+- * start of a cmdbuf can rely on the state being kept from the previous one.
+- */
+-static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
+-{
+- GLuint nr_released_bufs;
+- struct radeon_store saved_store;
+-
+- if (rmesa->backup_store.cmd_used == 0)
+- return;
+-
+- if (RADEON_DEBUG & DEBUG_STATE)
+- fprintf(stderr, "Emitting backup state on lost context\n");
+-
+- rmesa->lost_context = GL_FALSE;
+-
+- nr_released_bufs = rmesa->dma.nr_released_bufs;
+- saved_store = rmesa->store;
+- rmesa->dma.nr_released_bufs = 0;
+- rmesa->store = rmesa->backup_store;
+- radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+- rmesa->dma.nr_released_bufs = nr_released_bufs;
+- rmesa->store = saved_store;
+-}
+-
+ /* =============================================================
+ * Kernel command buffer handling
+ */
+@@ -134,893 +66,340 @@ static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
+ /* The state atoms will be emitted in the order they appear in the atom list,
+ * so this step is important.
+ */
+-void radeonSetUpAtomList( radeonContextPtr rmesa )
++void radeonSetUpAtomList( r100ContextPtr rmesa )
+ {
+- int i, mtu = rmesa->glCtx->Const.MaxTextureUnits;
+-
+- make_empty_list(&rmesa->hw.atomlist);
+- rmesa->hw.atomlist.name = "atom-list";
+-
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ctx);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.set);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lin);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msk);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.vpt);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tcl);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msc);
++ int i, mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
++
++ make_empty_list(&rmesa->radeon.hw.atomlist);
++ rmesa->radeon.hw.atomlist.name = "atom-list";
++
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc);
+ for (i = 0; i < mtu; ++i) {
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.cube[i]);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]);
+ }
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl);
+ for (i = 0; i < 3 + mtu; ++i)
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mat[i]);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]);
+ for (i = 0; i < 8; ++i)
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lit[i]);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]);
+ for (i = 0; i < 6; ++i)
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ucp[i]);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.eye);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.grd);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.fog);
+- insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.glt);
+-}
+-
+-void radeonEmitState( radeonContextPtr rmesa )
+-{
+- struct radeon_state_atom *atom;
+- char *dest;
+-
+- if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- if (rmesa->save_on_next_emit) {
+- radeonSaveHwState(rmesa);
+- rmesa->save_on_next_emit = GL_FALSE;
+- }
+-
+- /* this code used to return here but now it emits zbs */
+-
+- /* To avoid going across the entire set of states multiple times, just check
+- * for enough space for the case of emitting all state, and inline the
+- * radeonAllocCmdBuf code here without all the checks.
+- */
+- radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size);
+- dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+-
+- /* We always always emit zbs, this is due to a bug found by keithw in
+- the hardware and rediscovered after Erics changes by me.
+- if you ever touch this code make sure you emit zbs otherwise
+- you get tcl lockups on at least M7/7500 class of chips - airlied */
+- rmesa->hw.zbs.dirty=1;
+-
+- if (RADEON_DEBUG & DEBUG_STATE) {
+- foreach(atom, &rmesa->hw.atomlist) {
+- if (atom->dirty || rmesa->hw.all_dirty) {
+- if (atom->check(rmesa->glCtx))
+- print_state_atom(atom);
+- else
+- fprintf(stderr, "skip state %s\n", atom->name);
+- }
+- }
+- }
+-
+- foreach(atom, &rmesa->hw.atomlist) {
+- if (rmesa->hw.all_dirty)
+- atom->dirty = GL_TRUE;
+- if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) &&
+- atom->is_tcl)
+- atom->dirty = GL_FALSE;
+- if (atom->dirty) {
+- if (atom->check(rmesa->glCtx)) {
+- int size = atom->cmd_size * 4;
+- memcpy(dest, atom->cmd, size);
+- dest += size;
+- rmesa->store.cmd_used += size;
+- atom->dirty = GL_FALSE;
+- }
+- }
+- }
+-
+- assert(rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ);
+-
+- rmesa->hw.is_dirty = GL_FALSE;
+- rmesa->hw.all_dirty = GL_FALSE;
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog);
++ insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt);
+ }
+
+ /* Fire a section of the retained (indexed_verts) buffer as a regular
+ * primtive.
+ */
+-extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
++extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
+ GLuint vertex_format,
+ GLuint primitive,
+ GLuint vertex_nr )
+ {
+- drm_radeon_cmd_header_t *cmd;
+-
++ BATCH_LOCALS(&rmesa->radeon);
+
+ assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+
+- radeonEmitState( rmesa );
++ radeonEmitState(&rmesa->radeon);
+
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
+- rmesa->store.cmd_used/4);
+-
+- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ,
+- __FUNCTION__ );
+ #if RADEON_OLD_PACKETS
+- cmd[0].i = 0;
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+- cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
+- cmd[2].i = rmesa->ioctl.vertex_offset;
+- cmd[3].i = vertex_nr;
+- cmd[4].i = vertex_format;
+- cmd[5].i = (primitive |
+- RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+- (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+-
+- if (RADEON_DEBUG & DEBUG_PRIMS)
+- fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
+- __FUNCTION__,
+- cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
+-#else
+- cmd[0].i = 0;
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+- cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
+- cmd[2].i = vertex_format;
+- cmd[3].i = (primitive |
+- RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+- RADEON_CP_VC_CNTL_MAOS_ENABLE |
+- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+- (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+-
+-
+- if (RADEON_DEBUG & DEBUG_PRIMS)
+- fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
+- __FUNCTION__,
+- cmd[1].i, cmd[2].i, cmd[3].i);
++ BEGIN_BATCH(8);
++ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3);
++ if (!rmesa->radeon.radeonScreen->kernel_mm) {
++ OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
++ } else {
++ OUT_BATCH(rmesa->ioctl.vertex_offset);
++ }
++
++ OUT_BATCH(vertex_nr);
++ OUT_BATCH(vertex_format);
++ OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
++ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
++ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
++ (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
++
++ if (rmesa->radeon.radeonScreen->kernel_mm) {
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->ioctl.bo,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ }
++
++ END_BATCH();
++
++#else
++ BEGIN_BATCH(4);
++ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1);
++ OUT_BATCH(vertex_format);
++ OUT_BATCH(primitive |
++ RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
++ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
++ RADEON_CP_VC_CNTL_MAOS_ENABLE |
++ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
++ (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
++ END_BATCH();
+ #endif
+ }
+
+-
+-void radeonFlushElts( radeonContextPtr rmesa )
++void radeonFlushElts( GLcontext *ctx )
+ {
+- int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
+- int dwords;
+-#if RADEON_OLD_PACKETS
+- int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
+-#else
+- int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
+-#endif
+-
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ BATCH_LOCALS(&rmesa->radeon);
++ int nr;
++ uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start);
++ int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw);
++
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+- assert( rmesa->dma.flush == radeonFlushElts );
+- rmesa->dma.flush = NULL;
++ assert( rmesa->radeon.dma.flush == radeonFlushElts );
++ rmesa->radeon.dma.flush = NULL;
+
+- /* Cope with odd number of elts:
+- */
+- rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
+- dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
++ nr = rmesa->tcl.elt_used;
+
+ #if RADEON_OLD_PACKETS
+- cmd[1] |= (dwords - 3) << 16;
++ if (rmesa->radeon.radeonScreen->kernel_mm) {
++ dwords -= 2;
++ }
++#endif
++
++#if RADEON_OLD_PACKETS
++ cmd[1] |= (dwords + 3) << 16;
+ cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+ #else
+- cmd[1] |= (dwords - 3) << 16;
++ cmd[1] |= (dwords + 2) << 16;
+ cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+ #endif
+
++ rmesa->radeon.cmdbuf.cs->cdw += dwords;
++ rmesa->radeon.cmdbuf.cs->section_cdw += dwords;
++
++#if RADEON_OLD_PACKETS
++ if (rmesa->radeon.radeonScreen->kernel_mm) {
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->ioctl.bo,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ }
++#endif
++
++ END_BATCH();
++
+ if (RADEON_DEBUG & DEBUG_SYNC) {
+ fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+- radeonFinish( rmesa->glCtx );
++ radeonFinish( rmesa->radeon.glCtx );
+ }
+-}
+
++}
+
+-GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
++GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
+ GLuint vertex_format,
+ GLuint primitive,
+ GLuint min_nr )
+ {
+- drm_radeon_cmd_header_t *cmd;
+ GLushort *retval;
++ int align_min_nr;
++ BATCH_LOCALS(&rmesa->radeon);
+
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
++ fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
+
+ assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+
+- radeonEmitState( rmesa );
++ radeonEmitState(&rmesa->radeon);
+
+- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa,
+- ELTS_BUFSZ(min_nr),
+- __FUNCTION__ );
++ rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw;
++
++ /* round up min_nr to align the state */
++ align_min_nr = (min_nr + 1) & ~1;
++
+ #if RADEON_OLD_PACKETS
+- cmd[0].i = 0;
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+- cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
+- cmd[2].i = rmesa->ioctl.vertex_offset;
+- cmd[3].i = 0xffff;
+- cmd[4].i = vertex_format;
+- cmd[5].i = (primitive |
+- RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+-
+- retval = (GLushort *)(cmd+6);
+-#else
+- cmd[0].i = 0;
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+- cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
+- cmd[2].i = vertex_format;
+- cmd[3].i = (primitive |
+- RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+- RADEON_CP_VC_CNTL_MAOS_ENABLE |
+- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+-
+- retval = (GLushort *)(cmd+4);
++ BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4);
++ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0);
++ if (!rmesa->radeon.radeonScreen->kernel_mm) {
++ OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
++ } else {
++ OUT_BATCH(rmesa->ioctl.vertex_offset);
++ }
++ OUT_BATCH(0xffff);
++ OUT_BATCH(vertex_format);
++ OUT_BATCH(primitive |
++ RADEON_CP_VC_CNTL_PRIM_WALK_IND |
++ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
++ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
++
++#else
++ BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4);
++ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0);
++ OUT_BATCH(vertex_format);
++ OUT_BATCH(primitive |
++ RADEON_CP_VC_CNTL_PRIM_WALK_IND |
++ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
++ RADEON_CP_VC_CNTL_MAOS_ENABLE |
++ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+ #endif
+
+- if (RADEON_DEBUG & DEBUG_PRIMS)
+- fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
+- __FUNCTION__,
+- cmd[1].i, vertex_format, primitive);
+
+- assert(!rmesa->dma.flush);
+- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+- rmesa->dma.flush = radeonFlushElts;
++ rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw;
++ rmesa->tcl.elt_used = min_nr;
+
+- rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
++ retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset);
++
++ if (RADEON_DEBUG & DEBUG_PRIMS)
++ fprintf(stderr, "%s: header prim %x \n",
++ __FUNCTION__, primitive);
++
++ assert(!rmesa->radeon.dma.flush);
++ rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
++ rmesa->radeon.dma.flush = radeonFlushElts;
+
+ return retval;
+ }
+
+-
+-
+-void radeonEmitVertexAOS( radeonContextPtr rmesa,
++void radeonEmitVertexAOS( r100ContextPtr rmesa,
+ GLuint vertex_size,
++ struct radeon_bo *bo,
+ GLuint offset )
+ {
+ #if RADEON_OLD_PACKETS
+- rmesa->ioctl.vertex_size = vertex_size;
+ rmesa->ioctl.vertex_offset = offset;
++ rmesa->ioctl.bo = bo;
+ #else
+- drm_radeon_cmd_header_t *cmd;
++ BATCH_LOCALS(&rmesa->radeon);
+
+ if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
+ fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n",
+ __FUNCTION__, vertex_size, offset);
+
+- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
+- __FUNCTION__ );
++ BEGIN_BATCH(7);
++ OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
++ OUT_BATCH(1);
++ OUT_BATCH(vertex_size | (vertex_size << 8));
++ OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
++ END_BATCH();
+
+- cmd[0].i = 0;
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+- cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
+- cmd[2].i = 1;
+- cmd[3].i = vertex_size | (vertex_size << 8);
+- cmd[4].i = offset;
+ #endif
+ }
+
+
+-void radeonEmitAOS( radeonContextPtr rmesa,
+- struct radeon_dma_region **component,
++void radeonEmitAOS( r100ContextPtr rmesa,
+ GLuint nr,
+ GLuint offset )
+ {
+ #if RADEON_OLD_PACKETS
+ assert( nr == 1 );
+- assert( component[0]->aos_size == component[0]->aos_stride );
+- rmesa->ioctl.vertex_size = component[0]->aos_size;
++ rmesa->ioctl.bo = rmesa->tcl.aos[0].bo;
+ rmesa->ioctl.vertex_offset =
+- (component[0]->aos_start + offset * component[0]->aos_stride * 4);
++ (rmesa->tcl.aos[0].offset + offset * rmesa->tcl.aos[0].stride * 4);
+ #else
+- drm_radeon_cmd_header_t *cmd;
+- int sz = AOS_BUFSZ(nr);
++ BATCH_LOCALS(&rmesa->radeon);
++ uint32_t voffset;
++ // int sz = AOS_BUFSZ(nr);
++ int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+ int i;
+- int *tmp;
+
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+-
+- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz,
+- __FUNCTION__ );
+- cmd[0].i = 0;
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+- cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16);
+- cmd[2].i = nr;
+- tmp = &cmd[0].i;
+- cmd += 3;
+-
+- for (i = 0 ; i < nr ; i++) {
+- if (i & 1) {
+- cmd[0].i |= ((component[i]->aos_stride << 24) |
+- (component[i]->aos_size << 16));
+- cmd[2].i = (component[i]->aos_start +
+- offset * component[i]->aos_stride * 4);
+- cmd += 3;
+- }
+- else {
+- cmd[0].i = ((component[i]->aos_stride << 8) |
+- (component[i]->aos_size << 0));
+- cmd[1].i = (component[i]->aos_start +
+- offset * component[i]->aos_stride * 4);
+- }
+- }
+-
+- if (RADEON_DEBUG & DEBUG_VERTS) {
+- fprintf(stderr, "%s:\n", __FUNCTION__);
+- for (i = 0 ; i < sz ; i++)
+- fprintf(stderr, " %d: %x\n", i, tmp[i]);
+- }
+-#endif
+-}
+-
+-/* using already shifted color_fmt! */
+-void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */
+- GLuint color_fmt,
+- GLuint src_pitch,
+- GLuint src_offset,
+- GLuint dst_pitch,
+- GLuint dst_offset,
+- GLint srcx, GLint srcy,
+- GLint dstx, GLint dsty,
+- GLuint w, GLuint h )
+-{
+- drm_radeon_cmd_header_t *cmd;
+-
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+- __FUNCTION__,
+- src_pitch, src_offset, srcx, srcy,
+- dst_pitch, dst_offset, dstx, dsty,
+- w, h);
+-
+- assert( (src_pitch & 63) == 0 );
+- assert( (dst_pitch & 63) == 0 );
+- assert( (src_offset & 1023) == 0 );
+- assert( (dst_offset & 1023) == 0 );
+- assert( w < (1<<16) );
+- assert( h < (1<<16) );
+-
+- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
+- __FUNCTION__ );
+-
+-
+- cmd[0].i = 0;
+- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+- cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16);
+- cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+- RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+- RADEON_GMC_BRUSH_NONE |
+- color_fmt |
+- RADEON_GMC_SRC_DATATYPE_COLOR |
+- RADEON_ROP3_S |
+- RADEON_DP_SRC_SOURCE_MEMORY |
+- RADEON_GMC_CLR_CMP_CNTL_DIS |
+- RADEON_GMC_WR_MSK_DIS );
+-
+- cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
+- cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
+- cmd[5].i = (srcx << 16) | srcy;
+- cmd[6].i = (dstx << 16) | dsty; /* dst */
+- cmd[7].i = (w << 16) | h;
+-}
+-
+-
+-void radeonEmitWait( radeonContextPtr rmesa, GLuint flags )
+-{
+- drm_radeon_cmd_header_t *cmd;
+-
+- assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
+-
+- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
+- __FUNCTION__ );
+- cmd[0].i = 0;
+- cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
+- cmd[0].wait.flags = flags;
+-}
+-
+-
+-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
+- const char * caller )
+-{
+- int ret, i;
+- drm_radeon_cmd_buffer_t cmd;
+-
+- if (rmesa->lost_context)
+- radeonBackUpAndEmitLostStateLocked(rmesa);
+-
+- if (RADEON_DEBUG & DEBUG_IOCTL) {
+- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+-
+- if (RADEON_DEBUG & DEBUG_VERBOSE)
+- for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
+- fprintf(stderr, "%d: %x\n", i/4,
+- *(int *)(&rmesa->store.cmd_buf[i]));
+- }
+-
+- if (RADEON_DEBUG & DEBUG_DMA)
+- fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
+- rmesa->dma.nr_released_bufs);
+-
+-
+- if (RADEON_DEBUG & DEBUG_SANITY) {
+- if (rmesa->state.scissor.enabled)
+- ret = radeonSanityCmdBuffer( rmesa,
+- rmesa->state.scissor.numClipRects,
+- rmesa->state.scissor.pClipRects);
+- else
+- ret = radeonSanityCmdBuffer( rmesa,
+- rmesa->numClipRects,
+- rmesa->pClipRects);
+- if (ret) {
+- fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);
+- goto out;
++ BEGIN_BATCH(sz+2+(nr * 2));
++ OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
++ OUT_BATCH(nr);
++
++ if (!rmesa->radeon.radeonScreen->kernel_mm) {
++ for (i = 0; i + 1 < nr; i += 2) {
++ OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
++ (rmesa->tcl.aos[i].stride << 8) |
++ (rmesa->tcl.aos[i + 1].components << 16) |
++ (rmesa->tcl.aos[i + 1].stride << 24));
++
++ voffset = rmesa->tcl.aos[i + 0].offset +
++ offset * 4 * rmesa->tcl.aos[i + 0].stride;
++ OUT_BATCH_RELOC(voffset,
++ rmesa->tcl.aos[i].bo,
++ voffset,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ voffset = rmesa->tcl.aos[i + 1].offset +
++ offset * 4 * rmesa->tcl.aos[i + 1].stride;
++ OUT_BATCH_RELOC(voffset,
++ rmesa->tcl.aos[i+1].bo,
++ voffset,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
+ }
+- }
+-
+-
+- cmd.bufsz = rmesa->store.cmd_used;
+- cmd.buf = rmesa->store.cmd_buf;
+-
+- if (rmesa->state.scissor.enabled) {
+- cmd.nbox = rmesa->state.scissor.numClipRects;
+- cmd.boxes = rmesa->state.scissor.pClipRects;
+- } else {
+- cmd.nbox = rmesa->numClipRects;
+- cmd.boxes = rmesa->pClipRects;
+- }
+-
+- ret = drmCommandWrite( rmesa->dri.fd,
+- DRM_RADEON_CMDBUF,
+- &cmd, sizeof(cmd) );
+-
+- if (ret)
+- fprintf(stderr, "drmCommandWrite: %d\n", ret);
+-
+- if (RADEON_DEBUG & DEBUG_SYNC) {
+- fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
+- radeonWaitForIdleLocked( rmesa );
+- }
+-
+- out:
+- rmesa->store.primnr = 0;
+- rmesa->store.statenr = 0;
+- rmesa->store.cmd_used = 0;
+- rmesa->dma.nr_released_bufs = 0;
+- rmesa->save_on_next_emit = 1;
+-
+- return ret;
+-}
+-
+-
+-/* Note: does not emit any commands to avoid recursion on
+- * radeonAllocCmdBuf.
+- */
+-void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
+-{
+- int ret;
+-
+-
+- LOCK_HARDWARE( rmesa );
+-
+- ret = radeonFlushCmdBufLocked( rmesa, caller );
+-
+- UNLOCK_HARDWARE( rmesa );
+-
+- if (ret) {
+- fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret);
+- exit(ret);
+- }
+-}
+-
+-/* =============================================================
+- * Hardware vertex buffer handling
+- */
+-
+-
+-void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
+-{
+- struct radeon_dma_buffer *dmabuf;
+- int fd = rmesa->dri.fd;
+- int index = 0;
+- int size = 0;
+- drmDMAReq dma;
+- int ret;
+-
+- if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- if (rmesa->dma.flush) {
+- rmesa->dma.flush( rmesa );
+- }
+-
+- if (rmesa->dma.current.buf)
+- radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+-
+- if (rmesa->dma.nr_released_bufs > 4)
+- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+-
+- dma.context = rmesa->dri.hwContext;
+- dma.send_count = 0;
+- dma.send_list = NULL;
+- dma.send_sizes = NULL;
+- dma.flags = 0;
+- dma.request_count = 1;
+- dma.request_size = RADEON_BUFFER_SIZE;
+- dma.request_list = &index;
+- dma.request_sizes = &size;
+- dma.granted_count = 0;
+-
+- LOCK_HARDWARE(rmesa); /* no need to validate */
+-
+- ret = drmDMA( fd, &dma );
+
+- if (ret != 0) {
+- /* Free some up this way?
+- */
+- if (rmesa->dma.nr_released_bufs) {
+- radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
++ if (nr & 1) {
++ OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
++ (rmesa->tcl.aos[nr - 1].stride << 8));
++ voffset = rmesa->tcl.aos[nr - 1].offset +
++ offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++ OUT_BATCH_RELOC(voffset,
++ rmesa->tcl.aos[nr - 1].bo,
++ voffset,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
+ }
+-
+- if (RADEON_DEBUG & DEBUG_DMA)
+- fprintf(stderr, "Waiting for buffers\n");
+-
+- radeonWaitForIdleLocked( rmesa );
+- ret = drmDMA( fd, &dma );
+-
+- if ( ret != 0 ) {
+- UNLOCK_HARDWARE( rmesa );
+- fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
+- exit( -1 );
++ } else {
++ for (i = 0; i + 1 < nr; i += 2) {
++ OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
++ (rmesa->tcl.aos[i].stride << 8) |
++ (rmesa->tcl.aos[i + 1].components << 16) |
++ (rmesa->tcl.aos[i + 1].stride << 24));
++
++ voffset = rmesa->tcl.aos[i + 0].offset +
++ offset * 4 * rmesa->tcl.aos[i + 0].stride;
++ OUT_BATCH(voffset);
++ voffset = rmesa->tcl.aos[i + 1].offset +
++ offset * 4 * rmesa->tcl.aos[i + 1].stride;
++ OUT_BATCH(voffset);
+ }
+- }
+-
+- UNLOCK_HARDWARE(rmesa);
+-
+- if (RADEON_DEBUG & DEBUG_DMA)
+- fprintf(stderr, "Allocated buffer %d\n", index);
+-
+- dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
+- dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
+- dmabuf->refcount = 1;
+-
+- rmesa->dma.current.buf = dmabuf;
+- rmesa->dma.current.address = dmabuf->buf->address;
+- rmesa->dma.current.end = dmabuf->buf->total;
+- rmesa->dma.current.start = 0;
+- rmesa->dma.current.ptr = 0;
+-
+- rmesa->c_vertexBuffers++;
+-}
+-
+-void radeonReleaseDmaRegion( radeonContextPtr rmesa,
+- struct radeon_dma_region *region,
+- const char *caller )
+-{
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+-
+- if (!region->buf)
+- return;
+-
+- if (rmesa->dma.flush)
+- rmesa->dma.flush( rmesa );
+-
+- if (--region->buf->refcount == 0) {
+- drm_radeon_cmd_header_t *cmd;
+-
+- if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+- fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
+- region->buf->buf->idx);
+
+- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd),
+- __FUNCTION__ );
+- cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
+- cmd->dma.buf_idx = region->buf->buf->idx;
+- FREE(region->buf);
+- rmesa->dma.nr_released_bufs++;
+- }
+-
+- region->buf = NULL;
+- region->start = 0;
+-}
+-
+-/* Allocates a region from rmesa->dma.current. If there isn't enough
+- * space in current, grab a new buffer (and discard what was left of current)
+- */
+-void radeonAllocDmaRegion( radeonContextPtr rmesa,
+- struct radeon_dma_region *region,
+- int bytes,
+- int alignment )
+-{
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+-
+- if (rmesa->dma.flush)
+- rmesa->dma.flush( rmesa );
+-
+- if (region->buf)
+- radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
+-
+- alignment--;
+- rmesa->dma.current.start = rmesa->dma.current.ptr =
+- (rmesa->dma.current.ptr + alignment) & ~alignment;
+-
+- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
+- radeonRefillCurrentDmaRegion( rmesa );
+-
+- region->start = rmesa->dma.current.start;
+- region->ptr = rmesa->dma.current.start;
+- region->end = rmesa->dma.current.start + bytes;
+- region->address = rmesa->dma.current.address;
+- region->buf = rmesa->dma.current.buf;
+- region->buf->refcount++;
+-
+- rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+- rmesa->dma.current.start =
+- rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
+-}
+-
+-/* ================================================================
+- * SwapBuffers with client-side throttling
+- */
+-
+-static uint32_t radeonGetLastFrame (radeonContextPtr rmesa)
+-{
+- drm_radeon_getparam_t gp;
+- int ret;
+- uint32_t frame;
+-
+- gp.param = RADEON_PARAM_LAST_FRAME;
+- gp.value = (int *)&frame;
+- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
+- &gp, sizeof(gp) );
+-
+- if ( ret ) {
+- fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
+- exit(1);
+- }
+-
+- return frame;
+-}
+-
+-static void radeonEmitIrqLocked( radeonContextPtr rmesa )
+-{
+- drm_radeon_irq_emit_t ie;
+- int ret;
+-
+- ie.irq_seq = &rmesa->iw.irq_seq;
+- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT,
+- &ie, sizeof(ie) );
+- if ( ret ) {
+- fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret );
+- exit(1);
+- }
+-}
+-
+-
+-static void radeonWaitIrq( radeonContextPtr rmesa )
+-{
+- int ret;
+-
+- do {
+- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
+- &rmesa->iw, sizeof(rmesa->iw) );
+- } while (ret && (errno == EINTR || errno == EBUSY));
+-
+- if ( ret ) {
+- fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
+- exit(1);
+- }
+-}
+-
+-
+-static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
+-{
+- drm_radeon_sarea_t *sarea = rmesa->sarea;
+-
+- if (rmesa->do_irqs) {
+- if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
+- if (!rmesa->irqsEmitted) {
+- while (radeonGetLastFrame (rmesa) < sarea->last_frame)
+- ;
+- }
+- else {
+- UNLOCK_HARDWARE( rmesa );
+- radeonWaitIrq( rmesa );
+- LOCK_HARDWARE( rmesa );
+- }
+- rmesa->irqsEmitted = 10;
+- }
+-
+- if (rmesa->irqsEmitted) {
+- radeonEmitIrqLocked( rmesa );
+- rmesa->irqsEmitted--;
++ if (nr & 1) {
++ OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
++ (rmesa->tcl.aos[nr - 1].stride << 8));
++ voffset = rmesa->tcl.aos[nr - 1].offset +
++ offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++ OUT_BATCH(voffset);
+ }
+- }
+- else {
+- while (radeonGetLastFrame (rmesa) < sarea->last_frame) {
+- UNLOCK_HARDWARE( rmesa );
+- if (rmesa->do_usleeps)
+- DO_USLEEP( 1 );
+- LOCK_HARDWARE( rmesa );
+- }
+- }
+-}
+-
+-/* Copy the back color buffer to the front color buffer.
+- */
+-void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
+- const drm_clip_rect_t *rect)
+-{
+- radeonContextPtr rmesa;
+- GLint nbox, i, ret;
+- GLboolean missed_target;
+- int64_t ust;
+- __DRIscreenPrivate *psp;
+-
+- assert(dPriv);
+- assert(dPriv->driContextPriv);
+- assert(dPriv->driContextPriv->driverPrivate);
+-
+- rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-
+- if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+- fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
+- }
+-
+- RADEON_FIREVERTICES( rmesa );
+- LOCK_HARDWARE( rmesa );
+-
+- /* Throttle the frame rate -- only allow one pending swap buffers
+- * request at a time.
+- */
+- radeonWaitForFrameCompletion( rmesa );
+- if (!rect)
+- {
+- UNLOCK_HARDWARE( rmesa );
+- driWaitForVBlank( dPriv, & missed_target );
+- LOCK_HARDWARE( rmesa );
+- }
+-
+- nbox = dPriv->numClipRects; /* must be in locked region */
+-
+- for ( i = 0 ; i < nbox ; ) {
+- GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
+- drm_clip_rect_t *box = dPriv->pClipRects;
+- drm_clip_rect_t *b = rmesa->sarea->boxes;
+- GLint n = 0;
+-
+- for ( ; i < nr ; i++ ) {
+-
+- *b = box[i];
+-
+- if (rect)
+- {
+- if (rect->x1 > b->x1)
+- b->x1 = rect->x1;
+- if (rect->y1 > b->y1)
+- b->y1 = rect->y1;
+- if (rect->x2 < b->x2)
+- b->x2 = rect->x2;
+- if (rect->y2 < b->y2)
+- b->y2 = rect->y2;
+-
+- if (b->x1 >= b->x2 || b->y1 >= b->y2)
+- continue;
+- }
+-
+- b++;
+- n++;
++ for (i = 0; i + 1 < nr; i += 2) {
++ voffset = rmesa->tcl.aos[i + 0].offset +
++ offset * 4 * rmesa->tcl.aos[i + 0].stride;
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->tcl.aos[i+0].bo,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
++ voffset = rmesa->tcl.aos[i + 1].offset +
++ offset * 4 * rmesa->tcl.aos[i + 1].stride;
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->tcl.aos[i+1].bo,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
+ }
+- rmesa->sarea->nbox = n;
+-
+- if (!n)
+- continue;
+-
+- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
+-
+- if ( ret ) {
+- fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
+- UNLOCK_HARDWARE( rmesa );
+- exit( 1 );
++ if (nr & 1) {
++ voffset = rmesa->tcl.aos[nr - 1].offset +
++ offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++ rmesa->tcl.aos[nr-1].bo,
++ RADEON_GEM_DOMAIN_GTT,
++ 0, 0);
+ }
+ }
++ END_BATCH();
+
+- UNLOCK_HARDWARE( rmesa );
+- if (!rect)
+- {
+- psp = dPriv->driScreenPriv;
+- rmesa->swap_count++;
+- (*psp->systemTime->getUST)( & ust );
+- if ( missed_target ) {
+- rmesa->swap_missed_count++;
+- rmesa->swap_missed_ust = ust - rmesa->swap_ust;
+- }
+-
+- rmesa->swap_ust = ust;
+- rmesa->hw.all_dirty = GL_TRUE;
+- }
+-}
+-
+-void radeonPageFlip( __DRIdrawablePrivate *dPriv )
+-{
+- radeonContextPtr rmesa;
+- GLint ret;
+- GLboolean missed_target;
+- __DRIscreenPrivate *psp;
+-
+- assert(dPriv);
+- assert(dPriv->driContextPriv);
+- assert(dPriv->driContextPriv->driverPrivate);
+-
+- rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+- psp = dPriv->driScreenPriv;
+-
+- if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+- fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
+- rmesa->sarea->pfCurrentPage);
+- }
+-
+- RADEON_FIREVERTICES( rmesa );
+- LOCK_HARDWARE( rmesa );
+-
+- /* Need to do this for the perf box placement:
+- */
+- if (dPriv->numClipRects)
+- {
+- drm_clip_rect_t *box = dPriv->pClipRects;
+- drm_clip_rect_t *b = rmesa->sarea->boxes;
+- b[0] = box[0];
+- rmesa->sarea->nbox = 1;
+- }
+-
+- /* Throttle the frame rate -- only allow a few pending swap buffers
+- * request at a time.
+- */
+- radeonWaitForFrameCompletion( rmesa );
+- UNLOCK_HARDWARE( rmesa );
+- driWaitForVBlank( dPriv, & missed_target );
+- if ( missed_target ) {
+- rmesa->swap_missed_count++;
+- (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
+- }
+- LOCK_HARDWARE( rmesa );
+-
+- ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
+-
+- UNLOCK_HARDWARE( rmesa );
+-
+- if ( ret ) {
+- fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
+- exit( 1 );
+- }
+-
+- rmesa->swap_count++;
+- (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
+-
+- /* Get ready for drawing next frame. Update the renderbuffers'
+- * flippedOffset/Pitch fields so we draw into the right place.
+- */
+- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+- rmesa->sarea->pfCurrentPage);
+-
+- radeonUpdateDrawBuffer(rmesa->glCtx);
++#endif
+ }
+
+-
+ /* ================================================================
+ * Buffer clear
+ */
+@@ -1028,9 +407,9 @@ void radeonPageFlip( __DRIdrawablePrivate *dPriv )
+
+ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+- drm_radeon_sarea_t *sarea = rmesa->sarea;
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
++ drm_radeon_sarea_t *sarea = rmesa->radeon.sarea;
+ uint32_t clear;
+ GLuint flags = 0;
+ GLuint color_mask = 0;
+@@ -1042,8 +421,8 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+ }
+
+ {
+- LOCK_HARDWARE( rmesa );
+- UNLOCK_HARDWARE( rmesa );
++ LOCK_HARDWARE( &rmesa->radeon );
++ UNLOCK_HARDWARE( &rmesa->radeon );
+ if ( dPriv->numClipRects == 0 )
+ return;
+ }
+@@ -1067,7 +446,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+ mask &= ~BUFFER_BIT_DEPTH;
+ }
+
+- if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
++ if ( (mask & BUFFER_BIT_STENCIL) && rmesa->radeon.state.stencil.hwBuffer ) {
+ flags |= RADEON_STENCIL;
+ mask &= ~BUFFER_BIT_STENCIL;
+ }
+@@ -1083,16 +462,16 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+
+ if (rmesa->using_hyperz) {
+ flags |= RADEON_USE_COMP_ZBUF;
+-/* if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)
++/* if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL)
+ flags |= RADEON_USE_HIERZ; */
+- if (!(rmesa->state.stencil.hwBuffer) ||
++ if (!(rmesa->radeon.state.stencil.hwBuffer) ||
+ ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+- ((rmesa->state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
++ ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
+ flags |= RADEON_CLEAR_FASTZ;
+ }
+ }
+
+- LOCK_HARDWARE( rmesa );
++ LOCK_HARDWARE( &rmesa->radeon );
+
+ /* compute region after locking: */
+ cx = ctx->DrawBuffer->_Xmin;
+@@ -1112,7 +491,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+
+ gp.param = RADEON_PARAM_LAST_CLEAR;
+ gp.value = (int *)&clear;
+- ret = drmCommandWriteRead( rmesa->dri.fd,
++ ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
+ DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
+
+ if ( ret ) {
+@@ -1124,20 +503,20 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+ break;
+ }
+
+- if ( rmesa->do_usleeps ) {
+- UNLOCK_HARDWARE( rmesa );
++ if ( rmesa->radeon.do_usleeps ) {
++ UNLOCK_HARDWARE( &rmesa->radeon );
+ DO_USLEEP( 1 );
+- LOCK_HARDWARE( rmesa );
++ LOCK_HARDWARE( &rmesa->radeon );
+ }
+ }
+
+ /* Send current state to the hardware */
+- radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
++ rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+
+ for ( i = 0 ; i < dPriv->numClipRects ; ) {
+ GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
+ drm_clip_rect_t *box = dPriv->pClipRects;
+- drm_clip_rect_t *b = rmesa->sarea->boxes;
++ drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
+ drm_radeon_clear_t clear;
+ drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
+ GLint n = 0;
+@@ -1172,106 +551,40 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+ }
+ }
+
+- rmesa->sarea->nbox = n;
++ rmesa->radeon.sarea->nbox = n;
+
+ clear.flags = flags;
+- clear.clear_color = rmesa->state.color.clear;
+- clear.clear_depth = rmesa->state.depth.clear;
++ clear.clear_color = rmesa->radeon.state.color.clear;
++ clear.clear_depth = rmesa->radeon.state.depth.clear;
+ clear.color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+- clear.depth_mask = rmesa->state.stencil.clear;
++ clear.depth_mask = rmesa->radeon.state.stencil.clear;
+ clear.depth_boxes = depth_boxes;
+
+ n--;
+- b = rmesa->sarea->boxes;
++ b = rmesa->radeon.sarea->boxes;
+ for ( ; n >= 0 ; n-- ) {
+ depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
+ depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
+ depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2;
+ depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2;
+ depth_boxes[n].f[CLEAR_DEPTH] =
+- (float)rmesa->state.depth.clear;
++ (float)rmesa->radeon.state.depth.clear;
+ }
+
+- ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
++ ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
+ &clear, sizeof(drm_radeon_clear_t));
+
+ if ( ret ) {
+- UNLOCK_HARDWARE( rmesa );
++ UNLOCK_HARDWARE( &rmesa->radeon );
+ fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
+ exit( 1 );
+ }
+ }
+
+- UNLOCK_HARDWARE( rmesa );
+- rmesa->hw.all_dirty = GL_TRUE;
++ UNLOCK_HARDWARE( &rmesa->radeon );
++ rmesa->radeon.hw.all_dirty = GL_TRUE;
+ }
+
+-
+-void radeonWaitForIdleLocked( radeonContextPtr rmesa )
+-{
+- int fd = rmesa->dri.fd;
+- int to = 0;
+- int ret, i = 0;
+-
+- rmesa->c_drawWaits++;
+-
+- do {
+- do {
+- ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE);
+- } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY );
+- } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) );
+-
+- if ( ret < 0 ) {
+- UNLOCK_HARDWARE( rmesa );
+- fprintf( stderr, "Error: Radeon timed out... exiting\n" );
+- exit( -1 );
+- }
+-}
+-
+-
+-static void radeonWaitForIdle( radeonContextPtr rmesa )
+-{
+- LOCK_HARDWARE(rmesa);
+- radeonWaitForIdleLocked( rmesa );
+- UNLOCK_HARDWARE(rmesa);
+-}
+-
+-
+-void radeonFlush( GLcontext *ctx )
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+-
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- if (rmesa->dma.flush)
+- rmesa->dma.flush( rmesa );
+-
+- radeonEmitState( rmesa );
+-
+- if (rmesa->store.cmd_used)
+- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+-}
+-
+-/* Make sure all commands have been sent to the hardware and have
+- * completed processing.
+- */
+-void radeonFinish( GLcontext *ctx )
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- radeonFlush( ctx );
+-
+- if (rmesa->do_irqs) {
+- LOCK_HARDWARE( rmesa );
+- radeonEmitIrqLocked( rmesa );
+- UNLOCK_HARDWARE( rmesa );
+- radeonWaitIrq( rmesa );
+- }
+- else
+- radeonWaitForIdle( rmesa );
+-}
+-
+-
+ void radeonInitIoctlFuncs( GLcontext *ctx )
+ {
+ ctx->Driver.Clear = radeonClear;
+diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
+index 4e3a44d..18805d4 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h
++++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
+@@ -38,31 +38,32 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #include "main/simple_list.h"
+ #include "radeon_lock.h"
++#include "radeon_bocs_wrapper.h"
+
+-
+-extern void radeonEmitState( radeonContextPtr rmesa );
+-extern void radeonEmitVertexAOS( radeonContextPtr rmesa,
++extern void radeonEmitVertexAOS( r100ContextPtr rmesa,
+ GLuint vertex_size,
++ struct radeon_bo *bo,
+ GLuint offset );
+
+-extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
++extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
+ GLuint vertex_format,
+ GLuint primitive,
+ GLuint vertex_nr );
+
+-extern void radeonFlushElts( radeonContextPtr rmesa );
++extern void radeonFlushElts( GLcontext *ctx );
++
+
+-extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
++extern GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
+ GLuint vertex_format,
+ GLuint primitive,
+ GLuint min_nr );
+
+-extern void radeonEmitAOS( radeonContextPtr rmesa,
+- struct radeon_dma_region **regions,
++
++extern void radeonEmitAOS( r100ContextPtr rmesa,
+ GLuint n,
+ GLuint offset );
+
+-extern void radeonEmitBlit( radeonContextPtr rmesa,
++extern void radeonEmitBlit( r100ContextPtr rmesa,
+ GLuint color_fmt,
+ GLuint src_pitch,
+ GLuint src_offset,
+@@ -72,30 +73,15 @@ extern void radeonEmitBlit( radeonContextPtr rmesa,
+ GLint dstx, GLint dsty,
+ GLuint w, GLuint h );
+
+-extern void radeonEmitWait( radeonContextPtr rmesa, GLuint flags );
+-
+-extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * );
+-extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa );
++extern void radeonEmitWait( r100ContextPtr rmesa, GLuint flags );
+
+-extern void radeonAllocDmaRegion( radeonContextPtr rmesa,
+- struct radeon_dma_region *region,
+- int bytes,
+- int alignment );
++extern void radeonFlushCmdBuf( r100ContextPtr rmesa, const char * );
+
+-extern void radeonReleaseDmaRegion( radeonContextPtr rmesa,
+- struct radeon_dma_region *region,
+- const char *caller );
+-
+-extern void radeonCopyBuffer( __DRIdrawablePrivate *drawable,
+- const drm_clip_rect_t *rect);
+-extern void radeonPageFlip( __DRIdrawablePrivate *drawable );
+ extern void radeonFlush( GLcontext *ctx );
+ extern void radeonFinish( GLcontext *ctx );
+-extern void radeonWaitForIdleLocked( radeonContextPtr rmesa );
+-extern void radeonWaitForVBlank( radeonContextPtr rmesa );
+ extern void radeonInitIoctlFuncs( GLcontext *ctx );
+-extern void radeonGetAllParams( radeonContextPtr rmesa );
+-extern void radeonSetUpAtomList( radeonContextPtr rmesa );
++extern void radeonGetAllParams( r100ContextPtr rmesa );
++extern void radeonSetUpAtomList( r100ContextPtr rmesa );
+
+ /* ================================================================
+ * Helper macros:
+@@ -105,33 +91,33 @@ extern void radeonSetUpAtomList( radeonContextPtr rmesa );
+ */
+ #define RADEON_NEWPRIM( rmesa ) \
+ do { \
+- if ( rmesa->dma.flush ) \
+- rmesa->dma.flush( rmesa ); \
++ if ( rmesa->radeon.dma.flush ) \
++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \
+ } while (0)
+
+ /* Can accomodate several state changes and primitive changes without
+ * actually firing the buffer.
+ */
++
+ #define RADEON_STATECHANGE( rmesa, ATOM ) \
+ do { \
+ RADEON_NEWPRIM( rmesa ); \
+ rmesa->hw.ATOM.dirty = GL_TRUE; \
+- rmesa->hw.is_dirty = GL_TRUE; \
++ rmesa->radeon.hw.is_dirty = GL_TRUE; \
+ } while (0)
+
+-#define RADEON_DB_STATE( ATOM ) \
++#define RADEON_DB_STATE( ATOM ) \
+ memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \
+ rmesa->hw.ATOM.cmd_size * 4)
+
+-static INLINE int RADEON_DB_STATECHANGE(
+- radeonContextPtr rmesa,
+- struct radeon_state_atom *atom )
++static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa,
++ struct radeon_state_atom *atom )
+ {
+ if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+- int *tmp;
++ GLuint *tmp;
+ RADEON_NEWPRIM( rmesa );
+ atom->dirty = GL_TRUE;
+- rmesa->hw.is_dirty = GL_TRUE;
++ rmesa->radeon.hw.is_dirty = GL_TRUE;
+ tmp = atom->cmd;
+ atom->cmd = atom->lastcmd;
+ atom->lastcmd = tmp;
+@@ -141,16 +127,6 @@ static INLINE int RADEON_DB_STATECHANGE(
+ return 0;
+ }
+
+-
+-/* Fire the buffered vertices no matter what.
+- */
+-#define RADEON_FIREVERTICES( rmesa ) \
+-do { \
+- if ( rmesa->store.cmd_used || rmesa->dma.flush ) { \
+- radeonFlush( rmesa->glCtx ); \
+- } \
+-} while (0)
+-
+ /* Command lengths. Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
+ * are available, you will also be adding an rmesa->state.max_state_size because
+ * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
+@@ -167,36 +143,37 @@ do { \
+ #define VBUF_BUFSZ (4 * sizeof(int))
+ #endif
+
+-/* Ensure that a minimum amount of space is available in the command buffer.
+- * This is used to ensure atomicity of state updates with the rendering requests
+- * that rely on them.
+- *
+- * An alternative would be to implement a "soft lock" such that when the buffer
+- * wraps at an inopportune time, we grab the lock, flush the current buffer,
+- * and hang on to the lock until the critical section is finished and we flush
+- * the buffer again and unlock.
+- */
+-static INLINE void radeonEnsureCmdBufSpace( radeonContextPtr rmesa,
+- int bytes )
+-{
+- if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
+- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+- assert( bytes <= RADEON_CMD_BUF_SZ );
+-}
+
+-/* Alloc space in the command buffer
+- */
+-static INLINE char *radeonAllocCmdBuf( radeonContextPtr rmesa,
+- int bytes, const char *where )
++static inline uint32_t cmdpacket3(int cmd_type)
+ {
+- if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
+- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
++ drm_radeon_cmd_header_t cmd;
++
++ cmd.i = 0;
++ cmd.header.cmd_type = cmd_type;
++
++ return (uint32_t)cmd.i;
+
+- {
+- char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+- rmesa->store.cmd_used += bytes;
+- return head;
+- }
+ }
+
++#define OUT_BATCH_PACKET3(packet, num_extra) do { \
++ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
++ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3)); \
++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
++ } else { \
++ OUT_BATCH(CP_PACKET2); \
++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
++ } \
++ } while(0)
++
++#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do { \
++ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
++ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP)); \
++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
++ } else { \
++ OUT_BATCH(CP_PACKET2); \
++ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
++ } \
++ } while(0)
++
++
+ #endif /* __RADEON_IOCTL_H__ */
+diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
+index 64bb3ca..9a7e76b 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_lock.c
++++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
+@@ -41,12 +41,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #include "main/glheader.h"
+ #include "main/mtypes.h"
+-#include "radeon_context.h"
++#include "main/colormac.h"
++#include "dri_util.h"
++#include "radeon_screen.h"
++#include "radeon_common.h"
+ #include "radeon_lock.h"
+-#include "radeon_tex.h"
+-#include "radeon_state.h"
+-#include "radeon_ioctl.h"
+-
+ #include "drirenderbuffer.h"
+
+ #if DEBUG_LOCKING
+@@ -56,13 +55,28 @@ int prevLockLine = 0;
+
+ /* Turn on/off page flipping according to the flags in the sarea:
+ */
+-static void radeonUpdatePageFlipping(radeonContextPtr rmesa)
++void radeonUpdatePageFlipping(radeonContextPtr rmesa)
+ {
++ int use_back;
++ __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
++ GLframebuffer *fb = drawable->driverPrivate;
++
+ rmesa->doPageFlip = rmesa->sarea->pfState;
+ if (rmesa->glCtx->WinSysDrawBuffer) {
+- driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+- rmesa->sarea->pfCurrentPage);
++ rmesa->vtbl.update_draw_buffer(rmesa->glCtx);
+ }
++
++ use_back = rmesa->glCtx->DrawBuffer ?
++ (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] ==
++ BUFFER_BACK_LEFT) : 1;
++ use_back ^= (rmesa->sarea->pfCurrentPage == 1);
++
++ if (use_back)
++ rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++ else
++ rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++
++ rmesa->state.depth.rrb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
+ }
+
+ /* Update the hardware state. This is called if another context has
+@@ -80,6 +94,8 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
+ __DRIscreenPrivate *sPriv = rmesa->dri.screen;
+ drm_radeon_sarea_t *sarea = rmesa->sarea;
+
++ assert(drawable != NULL);
++
+ drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
+
+ /* The window might have moved, so we might need to get new clip
+@@ -98,27 +114,11 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
+ if (rmesa->lastStamp != drawable->lastStamp) {
+ radeonUpdatePageFlipping(rmesa);
+ radeonSetCliprects(rmesa);
+- radeonUpdateViewportOffset(rmesa->glCtx);
++ rmesa->vtbl.update_viewport_offset(rmesa->glCtx);
+ driUpdateFramebufferSize(rmesa->glCtx, drawable);
+ }
+
+- RADEON_STATECHANGE(rmesa, ctx);
+- if (rmesa->sarea->tiling_enabled) {
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
+- RADEON_COLOR_TILE_ENABLE;
+- } else {
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
+- ~RADEON_COLOR_TILE_ENABLE;
+- }
+-
+- if (sarea->ctx_owner != rmesa->dri.hwContext) {
+- int i;
+- sarea->ctx_owner = rmesa->dri.hwContext;
+-
+- for (i = 0; i < rmesa->nr_heaps; i++) {
+- DRI_AGE_TEXTURES(rmesa->texture_heaps[i]);
+- }
+- }
++ rmesa->vtbl.get_lock(rmesa);
+
+ rmesa->lost_context = GL_TRUE;
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h
+index 86e96aa..f5ebb8d 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_lock.h
++++ b/src/mesa/drivers/dri/radeon/radeon_lock.h
+@@ -39,8 +39,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * Kevin E. Martin <martin@valinux.com>
+ */
+
+-#ifndef __RADEON_LOCK_H__
+-#define __RADEON_LOCK_H__
++#ifndef COMMON_LOCK_H
++#define COMMON_LOCK_H
++
++#include "main/colormac.h"
++#include "radeon_screen.h"
++#include "radeon_common.h"
+
+ extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
+
+@@ -94,19 +98,23 @@ extern int prevLockLine;
+ do { \
+ char __ret = 0; \
+ DEBUG_CHECK_LOCK(); \
+- DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \
+- (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret ); \
+- if ( __ret ) \
+- radeonGetLock( (rmesa), 0 ); \
+- DEBUG_LOCK(); \
++ if (!(rmesa)->radeonScreen->driScreen->dri2.enabled) { \
++ DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \
++ (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret ); \
++ if ( __ret ) \
++ radeonGetLock( (rmesa), 0 ); \
++ } \
++ DEBUG_LOCK(); \
+ } while (0)
+
+ #define UNLOCK_HARDWARE( rmesa ) \
+ do { \
+- DRM_UNLOCK( (rmesa)->dri.fd, \
+- (rmesa)->dri.hwLock, \
+- (rmesa)->dri.hwContext ); \
+- DEBUG_RESET(); \
++ if (!(rmesa)->radeonScreen->driScreen->dri2.enabled) { \
++ DRM_UNLOCK( (rmesa)->dri.fd, \
++ (rmesa)->dri.hwLock, \
++ (rmesa)->dri.hwContext ); \
++ DEBUG_RESET(); \
++ } \
+ } while (0)
+
+-#endif /* __RADEON_LOCK_H__ */
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
+index de3c3a1..7f5da16 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
++++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
+@@ -40,7 +40,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "swrast_setup/swrast_setup.h"
+ #include "math/m_translate.h"
+ #include "tnl/tnl.h"
+-#include "tnl/tcontext.h"
+
+ #include "radeon_context.h"
+ #include "radeon_ioctl.h"
+@@ -49,160 +48,35 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_maos.h"
+ #include "radeon_tcl.h"
+
+-#if 0
+-/* Usage:
+- * - from radeon_tcl_render
+- * - call radeonEmitArrays to ensure uptodate arrays in dma
+- * - emit primitives (new type?) which reference the data
+- * -- need to use elts for lineloop, quads, quadstrip/flat
+- * -- other primitives are all well-formed (need tristrip-1,fake-poly)
+- *
+- */
+-static void emit_ubyte_rgba3( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
++static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
++ GLvoid *data, int stride, int count)
+ {
+ int i;
+- radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
+-
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d out %p\n",
+- __FUNCTION__, count, stride, (void *)out);
+-
+- for (i = 0; i < count; i++) {
+- out->red = *data;
+- out->green = *(data+1);
+- out->blue = *(data+2);
+- out->alpha = 0xFF;
+- out++;
+- data += stride;
+- }
+-}
+-
+-static void emit_ubyte_rgba4( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
++ uint32_t *out;
++ int size = 1;
++ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s count %d stride %d\n",
+ __FUNCTION__, count, stride);
+
+- if (stride == 4)
+- COPY_DWORDS( out, data, count );
+- else
+- for (i = 0; i < count; i++) {
+- *out++ = LE32_TO_CPU(*(int *)data);
+- data += stride;
+- }
+-}
+-
+-
+-static void emit_ubyte_rgba( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int size,
+- int stride,
+- int count )
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+-
+- assert (!rvb->buf);
+-
+ if (stride == 0) {
+- radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
++ radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
+ count = 1;
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = 0;
+- rvb->aos_size = 1;
++ aos->stride = 0;
+ }
+ else {
+- radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = 1;
+- rvb->aos_size = 1;
++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
++ aos->stride = size;
+ }
+
+- /* Emit the data
+- */
+- switch (size) {
+- case 3:
+- emit_ubyte_rgba3( ctx, rvb, data, stride, count );
+- break;
+- case 4:
+- emit_ubyte_rgba4( ctx, rvb, data, stride, count );
+- break;
+- default:
+- assert(0);
+- exit(1);
+- break;
+- }
+-}
+-#endif
+-
+-#if defined(USE_X86_ASM)
+-#define COPY_DWORDS( dst, src, nr ) \
+-do { \
+- int __tmp; \
+- __asm__ __volatile__( "rep ; movsl" \
+- : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
+- : "0" (nr), \
+- "D" ((long)dst), \
+- "S" ((long)src) ); \
+-} while (0)
+-#else
+-#define COPY_DWORDS( dst, src, nr ) \
+-do { \
+- int j; \
+- for ( j = 0 ; j < nr ; j++ ) \
+- dst[j] = ((int *)src)[j]; \
+- dst += nr; \
+-} while (0)
+-#endif
+-
+-static void emit_vecfog( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- GLfloat *out;
+-
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ aos->components = size;
++ aos->count = count;
+
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d\n",
+- __FUNCTION__, count, stride);
+-
+- assert (!rvb->buf);
+-
+- if (stride == 0) {
+- radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
+- count = 1;
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = 0;
+- rvb->aos_size = 1;
+- }
+- else {
+- radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = 1;
+- rvb->aos_size = 1;
+- }
+
+ /* Emit the data
+ */
+- out = (GLfloat *)(rvb->address + rvb->start);
++ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+ for (i = 0; i < count; i++) {
+ out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
+ out++;
+@@ -210,169 +84,9 @@ static void emit_vecfog( GLcontext *ctx,
+ }
+ }
+
+-static void emit_vec4( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d\n",
+- __FUNCTION__, count, stride);
+-
+- if (stride == 4)
+- COPY_DWORDS( out, data, count );
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out++;
+- data += stride;
+- }
+-}
+-
+-
+-static void emit_vec8( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d\n",
+- __FUNCTION__, count, stride);
+-
+- if (stride == 8)
+- COPY_DWORDS( out, data, count*2 );
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out[1] = *(int *)(data+4);
+- out += 2;
+- data += stride;
+- }
+-}
+-
+-static void emit_vec12( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+- __FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+- if (stride == 12)
+- COPY_DWORDS( out, data, count*3 );
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out[1] = *(int *)(data+4);
+- out[2] = *(int *)(data+8);
+- out += 3;
+- data += stride;
+- }
+-}
+-
+-static void emit_vec16( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
+-{
+- int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d stride %d\n",
+- __FUNCTION__, count, stride);
+-
+- if (stride == 16)
+- COPY_DWORDS( out, data, count*4 );
+- else
+- for (i = 0; i < count; i++) {
+- out[0] = *(int *)data;
+- out[1] = *(int *)(data+4);
+- out[2] = *(int *)(data+8);
+- out[3] = *(int *)(data+12);
+- out += 4;
+- data += stride;
+- }
+-}
+-
+-
+-static void emit_vector( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int size,
+- int stride,
+- int count )
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- fprintf(stderr, "%s count %d size %d stride %d\n",
+- __FUNCTION__, count, size, stride);
+-
+- assert (!rvb->buf);
+-
+- if (stride == 0) {
+- radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
+- count = 1;
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = 0;
+- rvb->aos_size = size;
+- }
+- else {
+- radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = size;
+- rvb->aos_size = size;
+- }
+-
+- /* Emit the data
+- */
+- switch (size) {
+- case 1:
+- emit_vec4( ctx, rvb, data, stride, count );
+- break;
+- case 2:
+- emit_vec8( ctx, rvb, data, stride, count );
+- break;
+- case 3:
+- emit_vec12( ctx, rvb, data, stride, count );
+- break;
+- case 4:
+- emit_vec16( ctx, rvb, data, stride, count );
+- break;
+- default:
+- assert(0);
+- exit(1);
+- break;
+- }
+-
+-}
+-
+-
+-
+-static void emit_s0_vec( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
++static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count)
+ {
+ int i;
+- int *out = (int *)(rvb->address + rvb->start);
+-
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s count %d stride %d\n",
+ __FUNCTION__, count, stride);
+@@ -385,14 +99,9 @@ static void emit_s0_vec( GLcontext *ctx,
+ }
+ }
+
+-static void emit_stq_vec( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int stride,
+- int count )
++static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count)
+ {
+ int i;
+- int *out = (int *)(rvb->address + rvb->start);
+
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s count %d stride %d\n",
+@@ -410,21 +119,16 @@ static void emit_stq_vec( GLcontext *ctx,
+
+
+
+-static void emit_tex_vector( GLcontext *ctx,
+- struct radeon_dma_region *rvb,
+- char *data,
+- int size,
+- int stride,
+- int count )
++static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos,
++ GLvoid *data, int size, int stride, int count)
+ {
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ int emitsize;
++ uint32_t *out;
+
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+
+- assert (!rvb->buf);
+-
+ switch (size) {
+ case 4: emitsize = 3; break;
+ case 3: emitsize = 3; break;
+@@ -433,34 +137,33 @@ static void emit_tex_vector( GLcontext *ctx,
+
+
+ if (stride == 0) {
+- radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * 4, 32);
+ count = 1;
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = 0;
+- rvb->aos_size = emitsize;
++ aos->stride = 0;
+ }
+ else {
+- radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
+- rvb->aos_start = GET_START(rvb);
+- rvb->aos_stride = emitsize;
+- rvb->aos_size = emitsize;
++ radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * count * 4, 32);
++ aos->stride = emitsize;
+ }
+
++ aos->components = emitsize;
++ aos->count = count;
+
+ /* Emit the data
+ */
++ out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+ switch (size) {
+ case 1:
+- emit_s0_vec( ctx, rvb, data, stride, count );
++ emit_s0_vec( out, data, stride, count );
+ break;
+ case 2:
+- emit_vec8( ctx, rvb, data, stride, count );
++ radeonEmitVec8( out, data, stride, count );
+ break;
+ case 3:
+- emit_vec12( ctx, rvb, data, stride, count );
++ radeonEmitVec12( out, data, stride, count );
+ break;
+ case 4:
+- emit_stq_vec( ctx, rvb, data, stride, count );
++ emit_stq_vec( out, data, stride, count );
+ break;
+ default:
+ assert(0);
+@@ -477,9 +180,8 @@ static void emit_tex_vector( GLcontext *ctx,
+ */
+ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++ r100ContextPtr rmesa = R100_CONTEXT( ctx );
+ struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+- struct radeon_dma_region **component = rmesa->tcl.aos_components;
+ GLuint nr = 0;
+ GLuint vfmt = 0;
+ GLuint count = VB->Count;
+@@ -492,12 +194,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+
+ if (1) {
+ if (!rmesa->tcl.obj.buf)
+- emit_vector( ctx,
+- &rmesa->tcl.obj,
+- (char *)VB->ObjPtr->data,
+- VB->ObjPtr->size,
+- VB->ObjPtr->stride,
+- count);
++ rcommon_emit_vector( ctx,
++ &(rmesa->tcl.aos[nr]),
++ (char *)VB->ObjPtr->data,
++ VB->ObjPtr->size,
++ VB->ObjPtr->stride,
++ count);
+
+ switch( VB->ObjPtr->size ) {
+ case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
+@@ -506,21 +208,21 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ default:
+ break;
+ }
+- component[nr++] = &rmesa->tcl.obj;
++ nr++;
+ }
+
+
+ if (inputs & VERT_BIT_NORMAL) {
+ if (!rmesa->tcl.norm.buf)
+- emit_vector( ctx,
+- &(rmesa->tcl.norm),
+- (char *)VB->NormalPtr->data,
+- 3,
+- VB->NormalPtr->stride,
+- count);
++ rcommon_emit_vector( ctx,
++ &(rmesa->tcl.aos[nr]),
++ (char *)VB->NormalPtr->data,
++ 3,
++ VB->NormalPtr->stride,
++ count);
+
+ vfmt |= RADEON_CP_VC_FRMT_N0;
+- component[nr++] = &rmesa->tcl.norm;
++ nr++;
+ }
+
+ if (inputs & VERT_BIT_COLOR0) {
+@@ -538,31 +240,30 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ }
+
+ if (!rmesa->tcl.rgba.buf)
+- emit_vector( ctx,
+- &(rmesa->tcl.rgba),
+- (char *)VB->ColorPtr[0]->data,
+- emitsize,
+- VB->ColorPtr[0]->stride,
+- count);
+-
+-
+- component[nr++] = &rmesa->tcl.rgba;
++ rcommon_emit_vector( ctx,
++ &(rmesa->tcl.aos[nr]),
++ (char *)VB->ColorPtr[0]->data,
++ emitsize,
++ VB->ColorPtr[0]->stride,
++ count);
++
++ nr++;
+ }
+
+
+ if (inputs & VERT_BIT_COLOR1) {
+ if (!rmesa->tcl.spec.buf) {
+
+- emit_vector( ctx,
+- &rmesa->tcl.spec,
+- (char *)VB->SecondaryColorPtr[0]->data,
+- 3,
+- VB->SecondaryColorPtr[0]->stride,
+- count);
++ rcommon_emit_vector( ctx,
++ &(rmesa->tcl.aos[nr]),
++ (char *)VB->SecondaryColorPtr[0]->data,
++ 3,
++ VB->SecondaryColorPtr[0]->stride,
++ count);
+ }
+
+ vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
+- component[nr++] = &rmesa->tcl.spec;
++ nr++;
+ }
+
+ /* FIXME: not sure if this is correct. May need to stitch this together with
+@@ -571,13 +272,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ if (inputs & VERT_BIT_FOG) {
+ if (!rmesa->tcl.fog.buf)
+ emit_vecfog( ctx,
+- &(rmesa->tcl.fog),
++ &(rmesa->tcl.aos[nr]),
+ (char *)VB->FogCoordPtr->data,
+ VB->FogCoordPtr->stride,
+ count);
+
+ vfmt |= RADEON_CP_VC_FRMT_FPFOG;
+- component[nr++] = &rmesa->tcl.fog;
++ nr++;
+ }
+
+
+@@ -588,11 +289,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ if (inputs & VERT_BIT_TEX(unit)) {
+ if (!rmesa->tcl.tex[unit].buf)
+ emit_tex_vector( ctx,
+- &(rmesa->tcl.tex[unit]),
++ &(rmesa->tcl.aos[nr]),
+ (char *)VB->TexCoordPtr[unit]->data,
+ VB->TexCoordPtr[unit]->size,
+ VB->TexCoordPtr[unit]->stride,
+ count );
++ nr++;
+
+ vfmt |= RADEON_ST_BIT(unit);
+ /* assume we need the 3rd coord if texgen is active for r/q OR at least
+@@ -610,7 +312,6 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
+ radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
+ }
+- component[nr++] = &rmesa->tcl.tex[unit];
+ }
+ }
+
+@@ -626,31 +327,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+
+ void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+- GLuint unit;
+-
+-#if 0
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- _tnl_print_vert_flags( __FUNCTION__, newinputs );
+-#endif
+-
+- if (newinputs & VERT_BIT_POS)
+- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
+-
+- if (newinputs & VERT_BIT_NORMAL)
+- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
+-
+- if (newinputs & VERT_BIT_COLOR0)
+- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
+-
+- if (newinputs & VERT_BIT_COLOR1)
+- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
+-
+- if (newinputs & VERT_BIT_FOG)
+- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
++ r100ContextPtr rmesa = R100_CONTEXT( ctx );
++ int i;
+
+- for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
+- if (newinputs & VERT_BIT_TEX(unit))
+- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
++ for (i = 0; i < rmesa->tcl.nr_aos_components; i++) {
++ if (rmesa->tcl.aos[i].bo) {
++ radeon_bo_unref(rmesa->tcl.aos[i].bo);
++ rmesa->tcl.aos[i].bo = NULL;
++ }
+ }
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
+index 126d072..d468a97 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
++++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
+@@ -310,7 +310,7 @@ static void init_tcl_verts( void )
+
+ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+ GLuint req = 0;
+ GLuint unit;
+@@ -374,14 +374,15 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ break;
+
+ if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
+- rmesa->tcl.indexed_verts.buf)
++ rmesa->tcl.aos[0].bo)
+ return;
+
+- if (rmesa->tcl.indexed_verts.buf)
++ if (rmesa->tcl.aos[0].bo)
+ radeonReleaseArrays( ctx, ~0 );
+
+- radeonAllocDmaRegion( rmesa,
+- &rmesa->tcl.indexed_verts,
++ radeonAllocDmaRegion( &rmesa->radeon,
++ &rmesa->tcl.aos[0].bo,
++ &rmesa->tcl.aos[0].offset,
+ VB->Count * setup_tab[i].vertex_size * 4,
+ 4);
+
+@@ -421,15 +422,11 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+
+
+ setup_tab[i].emit( ctx, 0, VB->Count,
+- rmesa->tcl.indexed_verts.address +
+- rmesa->tcl.indexed_verts.start );
++ rmesa->tcl.aos[0].bo->ptr + rmesa->tcl.aos[0].offset);
+
++ // rmesa->tcl.aos[0].size = setup_tab[i].vertex_size;
++ rmesa->tcl.aos[0].stride = setup_tab[i].vertex_size;
+ rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
+- rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
+- rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
+- rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
+-
+- rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts;
+ rmesa->tcl.nr_aos_components = 1;
+ }
+
+@@ -437,13 +434,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+
+ void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+-
+-#if 0
+- if (RADEON_DEBUG & DEBUG_VERTS)
+- _tnl_print_vert_flags( __FUNCTION__, newinputs );
+-#endif
++ r100ContextPtr rmesa = R100_CONTEXT( ctx );
++ int i;
+
+- if (newinputs)
+- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
++ for (i = 0; i < rmesa->tcl.nr_aos_components; i++) {
++ if (rmesa->tcl.aos[i].bo) {
++ radeon_bo_unref(rmesa->tcl.aos[i].bo);
++ rmesa->tcl.aos[i].bo = NULL;
++ }
++ }
+ }
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
new file mode 100644
index 0000000..3203ee1
@@ -4985,6 +28330,5885 @@ index 0000000..43dfa48
+
+
+#endif /* __RADEON_MIPMAP_TREE_H_ */
+diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.c b/src/mesa/drivers/dri/radeon/radeon_sanity.c
+index 6613757..bbed838 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_sanity.c
++++ b/src/mesa/drivers/dri/radeon/radeon_sanity.c
+@@ -973,7 +973,7 @@ static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf )
+ }
+
+
+-int radeonSanityCmdBuffer( radeonContextPtr rmesa,
++int radeonSanityCmdBuffer( r100ContextPtr rmesa,
+ int nbox,
+ drm_clip_rect_t *boxes )
+ {
+diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.h b/src/mesa/drivers/dri/radeon/radeon_sanity.h
+index 1ec06bc..f30eb1c 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_sanity.h
++++ b/src/mesa/drivers/dri/radeon/radeon_sanity.h
+@@ -1,7 +1,7 @@
+ #ifndef RADEON_SANITY_H
+ #define RADEON_SANITY_H
+
+-extern int radeonSanityCmdBuffer( radeonContextPtr rmesa,
++extern int radeonSanityCmdBuffer( r100ContextPtr rmesa,
+ int nbox,
+ drm_clip_rect_t *boxes );
+
+diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
+index e3afaa9..c591e9f 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
++++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
+@@ -35,6 +35,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * \author Gareth Hughes <gareth@valinux.com>
+ */
+
++#include <errno.h>
+ #include "main/glheader.h"
+ #include "main/imports.h"
+ #include "main/mtypes.h"
+@@ -45,32 +46,39 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_chipset.h"
+ #include "radeon_macros.h"
+ #include "radeon_screen.h"
++#include "radeon_common.h"
++#include "radeon_span.h"
+ #if !RADEON_COMMON
+ #include "radeon_context.h"
+-#include "radeon_span.h"
+ #include "radeon_tex.h"
+ #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+ #include "r200_context.h"
+ #include "r200_ioctl.h"
+-#include "r200_span.h"
+ #include "r200_tex.h"
+ #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+ #include "r300_context.h"
+ #include "r300_fragprog.h"
+ #include "r300_tex.h"
+-#include "radeon_span.h"
+ #endif
+
+ #include "utils.h"
+ #include "vblank.h"
+ #include "drirenderbuffer.h"
+
++#include "radeon_bocs_wrapper.h"
++
+ #include "GL/internal/dri_interface.h"
+
+ /* Radeon configuration
+ */
+ #include "xmlpool.h"
+
++#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
++DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
++ DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
++ DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
++DRI_CONF_OPT_END
++
+ #if !RADEON_COMMON /* R100 */
+ PUBLIC const char __driConfigOptions[] =
+ DRI_CONF_BEGIN
+@@ -80,6 +88,7 @@ DRI_CONF_BEGIN
+ DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+ DRI_CONF_MAX_TEXTURE_UNITS(3,2,3)
+ DRI_CONF_HYPERZ(false)
++ DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
+ DRI_CONF_SECTION_END
+ DRI_CONF_SECTION_QUALITY
+ DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+@@ -95,7 +104,7 @@ DRI_CONF_BEGIN
+ DRI_CONF_NO_RAST(false)
+ DRI_CONF_SECTION_END
+ DRI_CONF_END;
+-static const GLuint __driNConfigOptions = 14;
++static const GLuint __driNConfigOptions = 15;
+
+ #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+
+@@ -107,6 +116,7 @@ DRI_CONF_BEGIN
+ DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+ DRI_CONF_MAX_TEXTURE_UNITS(6,2,6)
+ DRI_CONF_HYPERZ(false)
++ DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
+ DRI_CONF_SECTION_END
+ DRI_CONF_SECTION_QUALITY
+ DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+@@ -126,7 +136,7 @@ DRI_CONF_BEGIN
+ DRI_CONF_NV_VERTEX_PROGRAM(false)
+ DRI_CONF_SECTION_END
+ DRI_CONF_END;
+-static const GLuint __driNConfigOptions = 16;
++static const GLuint __driNConfigOptions = 17;
+
+ extern const struct dri_extension blend_extensions[];
+ extern const struct dri_extension ARB_vp_extension[];
+@@ -149,11 +159,7 @@ DRI_CONF_OPT_BEGIN_V(texture_coord_units,int,def, # min ":" # max ) \
+ DRI_CONF_DESC(de,"Anzahl der Texturkoordinateneinheiten") \
+ DRI_CONF_OPT_END
+
+-#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
+-DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
+- DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
+- DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
+-DRI_CONF_OPT_END
++
+
+ #define DRI_CONF_DISABLE_S3TC(def) \
+ DRI_CONF_OPT_BEGIN(disable_s3tc,bool,def) \
+@@ -206,8 +212,9 @@ DRI_CONF_BEGIN
+ DRI_CONF_END;
+ static const GLuint __driNConfigOptions = 17;
+
++extern const struct dri_extension gl_20_extension[];
++
+ #ifndef RADEON_DEBUG
+-int RADEON_DEBUG = 0;
+
+ static const struct dri_debug_control debug_control[] = {
+ {"fall", DEBUG_FALLBACKS},
+@@ -349,137 +356,17 @@ static const __DRItexOffsetExtension r300texOffsetExtension = {
+ { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+ r300SetTexOffset,
+ };
+-#endif
+-
+-/* Create the device specific screen private data struct.
+- */
+-static radeonScreenPtr
+-radeonCreateScreen( __DRIscreenPrivate *sPriv )
+-{
+- radeonScreenPtr screen;
+- RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
+- unsigned char *RADEONMMIO;
+- int i;
+- int ret;
+- uint32_t temp;
+-
+- if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
+- fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n");
+- return GL_FALSE;
+- }
+
+- /* Allocate the private area */
+- screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
+- if ( !screen ) {
+- __driUtilMessage("%s: Could not allocate memory for screen structure",
+- __FUNCTION__);
+- return NULL;
+- }
+-
+-#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+- RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
++static const __DRItexBufferExtension r300TexBufferExtension = {
++ { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
++ r300SetTexBuffer,
++};
+ #endif
+
+- /* parse information in __driConfigOptions */
+- driParseOptionInfo (&screen->optionCache,
+- __driConfigOptions, __driNConfigOptions);
+-
+- /* This is first since which regions we map depends on whether or
+- * not we are using a PCI card.
+- */
+- screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
+- {
+- int ret;
+- ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
+- &screen->gart_buffer_offset);
+-
+- if (ret) {
+- FREE( screen );
+- fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
+- return NULL;
+- }
+-
+- ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE,
+- &screen->gart_base);
+- if (ret) {
+- FREE( screen );
+- fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
+- return NULL;
+- }
+-
+- ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
+- &screen->irq);
+- if (ret) {
+- FREE( screen );
+- fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
+- return NULL;
+- }
+- screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
+- screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
+- screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
+- screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
+- screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
+- screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
+- screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
+- }
+-
+- screen->mmio.handle = dri_priv->registerHandle;
+- screen->mmio.size = dri_priv->registerSize;
+- if ( drmMap( sPriv->fd,
+- screen->mmio.handle,
+- screen->mmio.size,
+- &screen->mmio.map ) ) {
+- FREE( screen );
+- __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
+- return NULL;
+- }
+-
+- RADEONMMIO = screen->mmio.map;
+-
+- screen->status.handle = dri_priv->statusHandle;
+- screen->status.size = dri_priv->statusSize;
+- if ( drmMap( sPriv->fd,
+- screen->status.handle,
+- screen->status.size,
+- &screen->status.map ) ) {
+- drmUnmap( screen->mmio.map, screen->mmio.size );
+- FREE( screen );
+- __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
+- return NULL;
+- }
+- screen->scratch = (__volatile__ uint32_t *)
+- ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
+-
+- screen->buffers = drmMapBufs( sPriv->fd );
+- if ( !screen->buffers ) {
+- drmUnmap( screen->status.map, screen->status.size );
+- drmUnmap( screen->mmio.map, screen->mmio.size );
+- FREE( screen );
+- __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
+- return NULL;
+- }
+-
+- if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
+- screen->gartTextures.handle = dri_priv->gartTexHandle;
+- screen->gartTextures.size = dri_priv->gartTexMapSize;
+- if ( drmMap( sPriv->fd,
+- screen->gartTextures.handle,
+- screen->gartTextures.size,
+- (drmAddressPtr)&screen->gartTextures.map ) ) {
+- drmUnmapBufs( screen->buffers );
+- drmUnmap( screen->status.map, screen->status.size );
+- drmUnmap( screen->mmio.map, screen->mmio.size );
+- FREE( screen );
+- __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
+- return NULL;
+- }
+-
+- screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
+- }
+-
++static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
++{
+ screen->chip_flags = 0;
+- /* XXX: add more chipsets */
+- switch ( dri_priv->deviceID ) {
++ switch ( device_id ) {
+ case PCI_CHIP_RADEON_LY:
+ case PCI_CHIP_RADEON_LZ:
+ case PCI_CHIP_RADEON_QY:
+@@ -683,12 +570,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+ screen->chip_family = CHIP_FAMILY_RS400;
+ break;
+
+- case PCI_CHIP_RS600_793F:
+- case PCI_CHIP_RS600_7941:
+- case PCI_CHIP_RS600_7942:
+- screen->chip_family = CHIP_FAMILY_RS600;
+- break;
+-
+ case PCI_CHIP_RS690_791E:
+ case PCI_CHIP_RS690_791F:
+ screen->chip_family = CHIP_FAMILY_RS690;
+@@ -817,9 +698,162 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+
+ default:
+ fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
+- dri_priv->deviceID);
++ device_id);
++ return -1;
++ }
++
++ return 0;
++}
++
++
++/* Create the device specific screen private data struct.
++ */
++static radeonScreenPtr
++radeonCreateScreen( __DRIscreenPrivate *sPriv )
++{
++ radeonScreenPtr screen;
++ RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
++ unsigned char *RADEONMMIO = NULL;
++ int i;
++ int ret;
++ uint32_t temp;
++
++ if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
++ fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n");
++ return GL_FALSE;
++ }
++
++ /* Allocate the private area */
++ screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
++ if ( !screen ) {
++ __driUtilMessage("%s: Could not allocate memory for screen structure",
++ __FUNCTION__);
+ return NULL;
+ }
++
++#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
++ RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
++#endif
++
++ /* parse information in __driConfigOptions */
++ driParseOptionInfo (&screen->optionCache,
++ __driConfigOptions, __driNConfigOptions);
++
++ /* This is first since which regions we map depends on whether or
++ * not we are using a PCI card.
++ */
++ screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
++ {
++ int ret;
++
++#ifdef RADEON_PARAM_KERNEL_MM
++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_KERNEL_MM,
++ &screen->kernel_mm);
++
++ if (ret && ret != -EINVAL) {
++ FREE( screen );
++ fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret);
++ return NULL;
++ }
++
++ if (ret == -EINVAL)
++ screen->kernel_mm = 0;
++#endif
++
++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
++ &screen->gart_buffer_offset);
++
++ if (ret) {
++ FREE( screen );
++ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
++ return NULL;
++ }
++
++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE,
++ &screen->gart_base);
++ if (ret) {
++ FREE( screen );
++ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
++ return NULL;
++ }
++
++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
++ &screen->irq);
++ if (ret) {
++ FREE( screen );
++ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
++ return NULL;
++ }
++ screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
++ screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
++ screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
++ screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
++ screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
++ screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
++ screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
++ }
++
++ if (!screen->kernel_mm) {
++ screen->mmio.handle = dri_priv->registerHandle;
++ screen->mmio.size = dri_priv->registerSize;
++ if ( drmMap( sPriv->fd,
++ screen->mmio.handle,
++ screen->mmio.size,
++ &screen->mmio.map ) ) {
++ FREE( screen );
++ __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
++ return NULL;
++ }
++
++ RADEONMMIO = screen->mmio.map;
++
++ screen->status.handle = dri_priv->statusHandle;
++ screen->status.size = dri_priv->statusSize;
++ if ( drmMap( sPriv->fd,
++ screen->status.handle,
++ screen->status.size,
++ &screen->status.map ) ) {
++ drmUnmap( screen->mmio.map, screen->mmio.size );
++ FREE( screen );
++ __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
++ return NULL;
++ }
++ screen->scratch = (__volatile__ uint32_t *)
++ ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
++
++ screen->buffers = drmMapBufs( sPriv->fd );
++ if ( !screen->buffers ) {
++ drmUnmap( screen->status.map, screen->status.size );
++ drmUnmap( screen->mmio.map, screen->mmio.size );
++ FREE( screen );
++ __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
++ return NULL;
++ }
++
++ if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
++ screen->gartTextures.handle = dri_priv->gartTexHandle;
++ screen->gartTextures.size = dri_priv->gartTexMapSize;
++ if ( drmMap( sPriv->fd,
++ screen->gartTextures.handle,
++ screen->gartTextures.size,
++ (drmAddressPtr)&screen->gartTextures.map ) ) {
++ drmUnmapBufs( screen->buffers );
++ drmUnmap( screen->status.map, screen->status.size );
++ drmUnmap( screen->mmio.map, screen->mmio.size );
++ FREE( screen );
++ __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
++ return NULL;
++ }
++
++ screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
++ }
++ }
++
++
++ ret = radeon_set_screen_flags(screen, dri_priv->deviceID);
++ if (ret == -1)
++ return NULL;
++
+ if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) &&
+ sPriv->ddx_version.minor < 2) {
+ fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n");
+@@ -847,7 +881,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
+ &temp);
+ if (ret) {
+- if (screen->chip_family < CHIP_FAMILY_RS600)
++ if (screen->chip_family < CHIP_FAMILY_RS690 && !screen->kernel_mm)
+ screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
+ else {
+ FREE( screen );
+@@ -858,7 +892,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+ screen->fbLocation = (temp & 0xffff) << 16;
+ }
+
+- if (screen->chip_family >= CHIP_FAMILY_R300) {
++ if (screen->chip_family >= CHIP_FAMILY_RV515) {
+ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES,
+ &temp);
+ if (ret) {
+@@ -949,6 +983,103 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+ screen->extensions[i++] = &driMediaStreamCounterExtension.base;
+ }
+
++ if (!screen->kernel_mm) {
++#if !RADEON_COMMON
++ screen->extensions[i++] = &radeonTexOffsetExtension.base;
++#endif
++
++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
++ if (IS_R200_CLASS(screen))
++ screen->extensions[i++] = &r200AllocateExtension.base;
++
++ screen->extensions[i++] = &r200texOffsetExtension.base;
++#endif
++
++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
++ screen->extensions[i++] = &r300texOffsetExtension.base;
++#endif
++ }
++
++ screen->extensions[i++] = NULL;
++ sPriv->extensions = screen->extensions;
++
++ screen->driScreen = sPriv;
++ screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
++ screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
++ screen->sarea_priv_offset);
++
++ if (screen->kernel_mm)
++ screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
++ else
++ screen->bom = radeon_bo_manager_legacy_ctor(screen);
++ if (screen->bom == NULL) {
++ free(screen);
++ return NULL;
++ }
++
++ return screen;
++}
++
++static radeonScreenPtr
++radeonCreateScreen2(__DRIscreenPrivate *sPriv)
++{
++ radeonScreenPtr screen;
++ int i;
++ int ret;
++ uint32_t device_id;
++
++ /* Allocate the private area */
++ screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
++ if ( !screen ) {
++ __driUtilMessage("%s: Could not allocate memory for screen structure",
++ __FUNCTION__);
++ fprintf(stderr, "leaving here\n");
++ return NULL;
++ }
++
++#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
++ RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
++#endif
++
++ /* parse information in __driConfigOptions */
++ driParseOptionInfo (&screen->optionCache,
++ __driConfigOptions, __driNConfigOptions);
++
++ screen->kernel_mm = 1;
++ screen->chip_flags = 0;
++
++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
++ &screen->irq);
++
++ ret = radeonGetParam( sPriv->fd, RADEON_PARAM_DEVICE_ID,
++ &device_id);
++ if (ret) {
++ FREE( screen );
++ fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_DEVICE_ID): %d\n", ret);
++ return NULL;
++ }
++
++ ret = radeon_set_screen_flags(screen, device_id);
++ if (ret == -1)
++ return NULL;
++
++ if (screen->chip_family <= CHIP_FAMILY_RS200)
++ screen->chip_flags |= RADEON_CLASS_R100;
++ else if (screen->chip_family <= CHIP_FAMILY_RV280)
++ screen->chip_flags |= RADEON_CLASS_R200;
++ else
++ screen->chip_flags |= RADEON_CLASS_R300;
++
++ i = 0;
++ screen->extensions[i++] = &driCopySubBufferExtension.base;
++ screen->extensions[i++] = &driFrameTrackingExtension.base;
++ screen->extensions[i++] = &driReadDrawableExtension;
++
++ if ( screen->irq != 0 ) {
++ screen->extensions[i++] = &driSwapControlExtension.base;
++ screen->extensions[i++] = &driMediaStreamCounterExtension.base;
++ }
++
+ #if !RADEON_COMMON
+ screen->extensions[i++] = &radeonTexOffsetExtension.base;
+ #endif
+@@ -961,14 +1092,19 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+ #endif
+
+ #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+- screen->extensions[i++] = &r300texOffsetExtension.base;
++ //screen->extensions[i++] = &r300texOffsetExtension.base;
++ screen->extensions[i++] = &r300TexBufferExtension.base;
+ #endif
+
+ screen->extensions[i++] = NULL;
+ sPriv->extensions = screen->extensions;
+
+ screen->driScreen = sPriv;
+- screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
++ screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
++ if (screen->bom == NULL) {
++ free(screen);
++ return NULL;
++ }
+ return screen;
+ }
+
+@@ -977,23 +1113,32 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+ static void
+ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
+ {
+- radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
++ radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
+
+- if (!screen)
+- return;
++ if (!screen)
++ return;
+
+- if ( screen->gartTextures.map ) {
+- drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
+- }
+- drmUnmapBufs( screen->buffers );
+- drmUnmap( screen->status.map, screen->status.size );
+- drmUnmap( screen->mmio.map, screen->mmio.size );
++ if (screen->kernel_mm) {
++#ifdef RADEON_BO_TRACK
++ radeon_tracker_print(&screen->bom->tracker, stderr);
++#endif
++ radeon_bo_manager_gem_dtor(screen->bom);
++ } else {
++ radeon_bo_manager_legacy_dtor(screen->bom);
++
++ if ( screen->gartTextures.map ) {
++ drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
++ }
++ drmUnmapBufs( screen->buffers );
++ drmUnmap( screen->status.map, screen->status.size );
++ drmUnmap( screen->mmio.map, screen->mmio.size );
++ }
+
+- /* free all option information */
+- driDestroyOptionInfo (&screen->optionCache);
++ /* free all option information */
++ driDestroyOptionInfo (&screen->optionCache);
+
+- FREE( screen );
+- sPriv->private = NULL;
++ FREE( screen );
++ sPriv->private = NULL;
+ }
+
+
+@@ -1002,15 +1147,102 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
+ static GLboolean
+ radeonInitDriver( __DRIscreenPrivate *sPriv )
+ {
+- sPriv->private = (void *) radeonCreateScreen( sPriv );
+- if ( !sPriv->private ) {
+- radeonDestroyScreen( sPriv );
+- return GL_FALSE;
+- }
++ if (sPriv->dri2.enabled) {
++ sPriv->private = (void *) radeonCreateScreen2( sPriv );
++ } else {
++ sPriv->private = (void *) radeonCreateScreen( sPriv );
++ }
++ if ( !sPriv->private ) {
++ radeonDestroyScreen( sPriv );
++ return GL_FALSE;
++ }
+
+- return GL_TRUE;
++ return GL_TRUE;
+ }
+
++static GLboolean
++radeon_alloc_window_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
++ GLenum intFormat, GLuint w, GLuint h)
++{
++ rb->Width = w;
++ rb->Height = h;
++ rb->_ActualFormat = intFormat;
++
++ return GL_TRUE;
++}
++
++
++static struct radeon_renderbuffer *
++radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv)
++{
++ struct radeon_renderbuffer *ret;
++
++ ret = CALLOC_STRUCT(radeon_renderbuffer);
++ if (!ret)
++ return NULL;
++
++ _mesa_init_renderbuffer(&ret->base, 0);
++
++ /* XXX format junk */
++ switch (format) {
++ case GL_RGB5:
++ ret->base._ActualFormat = GL_RGB5;
++ ret->base._BaseFormat = GL_RGBA;
++ ret->base.RedBits = 5;
++ ret->base.GreenBits = 6;
++ ret->base.BlueBits = 5;
++ ret->base.DataType = GL_UNSIGNED_BYTE;
++ break;
++ case GL_RGBA8:
++ ret->base._ActualFormat = GL_RGBA8;
++ ret->base._BaseFormat = GL_RGBA;
++ ret->base.RedBits = 8;
++ ret->base.GreenBits = 8;
++ ret->base.BlueBits = 8;
++ ret->base.AlphaBits = 8;
++ ret->base.DataType = GL_UNSIGNED_BYTE;
++ break;
++ case GL_STENCIL_INDEX8_EXT:
++ ret->base._ActualFormat = GL_STENCIL_INDEX8_EXT;
++ ret->base._BaseFormat = GL_STENCIL_INDEX;
++ ret->base.StencilBits = 8;
++ ret->base.DataType = GL_UNSIGNED_BYTE;
++ break;
++ case GL_DEPTH_COMPONENT16:
++ ret->base._ActualFormat = GL_DEPTH_COMPONENT16;
++ ret->base._BaseFormat = GL_DEPTH_COMPONENT;
++ ret->base.DepthBits = 16;
++ ret->base.DataType = GL_UNSIGNED_SHORT;
++ break;
++ case GL_DEPTH_COMPONENT24:
++ ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
++ ret->base._BaseFormat = GL_DEPTH_COMPONENT;
++ ret->base.DepthBits = 24;
++ ret->base.DataType = GL_UNSIGNED_INT;
++ break;
++ case GL_DEPTH24_STENCIL8_EXT:
++ ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
++ ret->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
++ ret->base.DepthBits = 24;
++ ret->base.StencilBits = 8;
++ ret->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
++ break;
++ default:
++ fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format);
++ _mesa_delete_renderbuffer(&ret->base);
++ return NULL;
++ }
++
++ ret->dPriv = driDrawPriv;
++ ret->base.InternalFormat = format;
++
++ ret->base.AllocStorage = radeon_alloc_window_storage;
++
++ radeonSetSpanFunctions(ret);
++
++ ret->bo = NULL;
++ return ret;
++}
+
+ /**
+ * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
+@@ -1026,95 +1258,86 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
+ {
+ radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
+
+- if (isPixmap) {
+- return GL_FALSE; /* not implemented */
+- }
+- else {
+- const GLboolean swDepth = GL_FALSE;
+- const GLboolean swAlpha = GL_FALSE;
+- const GLboolean swAccum = mesaVis->accumRedBits > 0;
+- const GLboolean swStencil = mesaVis->stencilBits > 0 &&
+- mesaVis->depthBits != 24;
+- struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+-
+- /* front color renderbuffer */
+- {
+- driRenderbuffer *frontRb
+- = driNewRenderbuffer(GL_RGBA,
+- driScrnPriv->pFB + screen->frontOffset,
+- screen->cpp,
+- screen->frontOffset, screen->frontPitch,
+- driDrawPriv);
+- radeonSetSpanFunctions(frontRb, mesaVis);
+- _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+- }
++ const GLboolean swDepth = GL_FALSE;
++ const GLboolean swAlpha = GL_FALSE;
++ const GLboolean swAccum = mesaVis->accumRedBits > 0;
++ const GLboolean swStencil = mesaVis->stencilBits > 0 &&
++ mesaVis->depthBits != 24;
++ GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8);
++ GLenum depthFormat = GL_NONE;
++ struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
++
++ if (mesaVis->depthBits == 16)
++ depthFormat = GL_DEPTH_COMPONENT16;
++ else if (mesaVis->depthBits == 24)
++ depthFormat = GL_DEPTH_COMPONENT24;
++
++ /* front color renderbuffer */
++ {
++ struct radeon_renderbuffer *front =
++ radeon_create_renderbuffer(rgbFormat, driDrawPriv);
++ _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &front->base);
++ front->has_surface = 1;
++ }
+
+- /* back color renderbuffer */
+- if (mesaVis->doubleBufferMode) {
+- driRenderbuffer *backRb
+- = driNewRenderbuffer(GL_RGBA,
+- driScrnPriv->pFB + screen->backOffset,
+- screen->cpp,
+- screen->backOffset, screen->backPitch,
+- driDrawPriv);
+- radeonSetSpanFunctions(backRb, mesaVis);
+- _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+- }
++ /* back color renderbuffer */
++ if (mesaVis->doubleBufferMode) {
++ struct radeon_renderbuffer *back =
++ radeon_create_renderbuffer(rgbFormat, driDrawPriv);
++ _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &back->base);
++ back->has_surface = 1;
++ }
+
+- /* depth renderbuffer */
+- if (mesaVis->depthBits == 16) {
+- driRenderbuffer *depthRb
+- = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
+- driScrnPriv->pFB + screen->depthOffset,
+- screen->cpp,
+- screen->depthOffset, screen->depthPitch,
+- driDrawPriv);
+- radeonSetSpanFunctions(depthRb, mesaVis);
+- _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+- depthRb->depthHasSurface = screen->depthHasSurface;
+- }
+- else if (mesaVis->depthBits == 24) {
+- driRenderbuffer *depthRb
+- = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
+- driScrnPriv->pFB + screen->depthOffset,
+- screen->cpp,
+- screen->depthOffset, screen->depthPitch,
+- driDrawPriv);
+- radeonSetSpanFunctions(depthRb, mesaVis);
+- _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+- depthRb->depthHasSurface = screen->depthHasSurface;
+- }
++ /* depth renderbuffer */
++ if (depthFormat != GL_NONE) {
++ struct radeon_renderbuffer *depth =
++ radeon_create_renderbuffer(depthFormat, driDrawPriv);
++ _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depth->base);
++ depth->has_surface = screen->depthHasSurface;
++ }
+
+- /* stencil renderbuffer */
+- if (mesaVis->stencilBits > 0 && !swStencil) {
+- driRenderbuffer *stencilRb
+- = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
+- driScrnPriv->pFB + screen->depthOffset,
+- screen->cpp,
+- screen->depthOffset, screen->depthPitch,
+- driDrawPriv);
+- radeonSetSpanFunctions(stencilRb, mesaVis);
+- _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
+- stencilRb->depthHasSurface = screen->depthHasSurface;
+- }
++ /* stencil renderbuffer */
++ if (mesaVis->stencilBits > 0 && !swStencil) {
++ struct radeon_renderbuffer *stencil =
++ radeon_create_renderbuffer(GL_STENCIL_INDEX8_EXT, driDrawPriv);
++ _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencil->base);
++ stencil->has_surface = screen->depthHasSurface;
++ }
+
+- _mesa_add_soft_renderbuffers(fb,
+- GL_FALSE, /* color */
+- swDepth,
+- swStencil,
+- swAccum,
+- swAlpha,
+- GL_FALSE /* aux */);
+- driDrawPriv->driverPrivate = (void *) fb;
++ _mesa_add_soft_renderbuffers(fb,
++ GL_FALSE, /* color */
++ swDepth,
++ swStencil,
++ swAccum,
++ swAlpha,
++ GL_FALSE /* aux */);
++ driDrawPriv->driverPrivate = (void *) fb;
+
+- return (driDrawPriv->driverPrivate != NULL);
+- }
++ return (driDrawPriv->driverPrivate != NULL);
+ }
+
+-
+ static void
+ radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
+ {
++ struct radeon_renderbuffer *rb;
++ GLframebuffer *fb;
++
++ fb = (void*)driDrawPriv->driverPrivate;
++ rb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++ if (rb && rb->bo) {
++ radeon_bo_unref(rb->bo);
++ rb->bo = NULL;
++ }
++ rb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++ if (rb && rb->bo) {
++ radeon_bo_unref(rb->bo);
++ rb->bo = NULL;
++ }
++ rb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
++ if (rb && rb->bo) {
++ radeon_bo_unref(rb->bo);
++ rb->bo = NULL;
++ }
+ _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)));
+ }
+
+@@ -1149,6 +1372,7 @@ static void radeonDestroyContext(__DRIcontextPrivate * driContextPriv)
+
+ #endif
+
++
+ /**
+ * This is the driver specific part of the createNewScreen entry point.
+ *
+@@ -1201,18 +1425,109 @@ radeonInitScreen(__DRIscreenPrivate *psp)
+ driInitSingleExtension( NULL, NV_vp_extension );
+ driInitSingleExtension( NULL, ATI_fs_extension );
+ driInitExtensions( NULL, point_extensions, GL_FALSE );
++#elif defined(RADEON_COMMON_FOR_R300)
++ driInitSingleExtension( NULL, gl_20_extension );
+ #endif
+
+ if (!radeonInitDriver(psp))
+ return NULL;
+
++ /* for now fill in all modes */
+ return radeonFillInModes( psp,
+ dri_priv->bpp,
+ (dri_priv->bpp == 16) ? 16 : 24,
+- (dri_priv->bpp == 16) ? 0 : 8,
+- (dri_priv->backOffset != dri_priv->depthOffset) );
++ (dri_priv->bpp == 16) ? 0 : 8, 1);
+ }
++#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+
++/**
++ * This is the driver specific part of the createNewScreen entry point.
++ * Called when using DRI2.
++ *
++ * \return the __GLcontextModes supported by this driver
++ */
++static const
++__DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp)
++{
++ GLenum fb_format[3];
++ GLenum fb_type[3];
++ /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
++ * support pageflipping at all.
++ */
++ static const GLenum back_buffer_modes[] = {
++ GLX_NONE, GLX_SWAP_UNDEFINED_OML, /*, GLX_SWAP_COPY_OML*/
++ };
++ uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1];
++ int color;
++ __DRIconfig **configs = NULL;
++
++ /* Calling driInitExtensions here, with a NULL context pointer,
++ * does not actually enable the extensions. It just makes sure
++ * that all the dispatch offsets for all the extensions that
++ * *might* be enables are known. This is needed because the
++ * dispatch offsets need to be known when _mesa_context_create
++ * is called, but we can't enable the extensions until we have a
++ * context pointer.
++ *
++ * Hello chicken. Hello egg. How are you two today?
++ */
++ driInitExtensions( NULL, card_extensions, GL_FALSE );
++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
++ driInitExtensions( NULL, blend_extensions, GL_FALSE );
++ driInitSingleExtension( NULL, ARB_vp_extension );
++ driInitSingleExtension( NULL, NV_vp_extension );
++ driInitSingleExtension( NULL, ATI_fs_extension );
++ driInitExtensions( NULL, point_extensions, GL_FALSE );
++#endif
++
++ if (!radeonInitDriver(psp)) {
++ return NULL;
++ }
++ depth_bits[0] = 0;
++ stencil_bits[0] = 0;
++ depth_bits[1] = 16;
++ stencil_bits[1] = 0;
++ depth_bits[2] = 24;
++ stencil_bits[2] = 0;
++ depth_bits[3] = 24;
++ stencil_bits[3] = 8;
++
++ msaa_samples_array[0] = 0;
++
++ fb_format[0] = GL_RGB;
++ fb_type[0] = GL_UNSIGNED_SHORT_5_6_5;
++
++ fb_format[1] = GL_BGR;
++ fb_type[1] = GL_UNSIGNED_INT_8_8_8_8_REV;
++
++ fb_format[2] = GL_BGRA;
++ fb_type[2] = GL_UNSIGNED_INT_8_8_8_8_REV;
++
++ for (color = 0; color < ARRAY_SIZE(fb_format); color++) {
++ __DRIconfig **new_configs;
++
++ new_configs = driCreateConfigs(fb_format[color], fb_type[color],
++ depth_bits,
++ stencil_bits,
++ ARRAY_SIZE(depth_bits),
++ back_buffer_modes,
++ ARRAY_SIZE(back_buffer_modes),
++ msaa_samples_array,
++ ARRAY_SIZE(msaa_samples_array));
++ if (configs == NULL)
++ configs = new_configs;
++ else
++ configs = driConcatConfigs(configs, new_configs);
++ }
++
++ if (configs == NULL) {
++ fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
++ __LINE__);
++ return NULL;
++ }
++
++ return (const __DRIconfig **)configs;
++}
+
+ /**
+ * Get information about previous buffer swaps.
+@@ -1220,11 +1535,7 @@ radeonInitScreen(__DRIscreenPrivate *psp)
+ static int
+ getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
+ {
+-#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
+ radeonContextPtr rmesa;
+-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+- r200ContextPtr rmesa;
+-#endif
+
+ if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
+ || (dPriv->driContextPriv->driverPrivate == NULL)
+@@ -1261,6 +1572,8 @@ const struct __DriverAPIRec driDriverAPI = {
+ .WaitForSBC = NULL,
+ .SwapBuffersMSC = NULL,
+ .CopySubBuffer = radeonCopySubBuffer,
++ /* DRI2 */
++ .InitScreen2 = radeonInitScreen2,
+ };
+ #else
+ const struct __DriverAPIRec driDriverAPI = {
+@@ -1270,14 +1583,16 @@ const struct __DriverAPIRec driDriverAPI = {
+ .DestroyContext = r200DestroyContext,
+ .CreateBuffer = radeonCreateBuffer,
+ .DestroyBuffer = radeonDestroyBuffer,
+- .SwapBuffers = r200SwapBuffers,
+- .MakeCurrent = r200MakeCurrent,
+- .UnbindContext = r200UnbindContext,
++ .SwapBuffers = radeonSwapBuffers,
++ .MakeCurrent = radeonMakeCurrent,
++ .UnbindContext = radeonUnbindContext,
+ .GetSwapInfo = getSwapInfo,
+ .GetDrawableMSC = driDrawableGetMSC32,
+ .WaitForMSC = driWaitForMSC32,
+ .WaitForSBC = NULL,
+ .SwapBuffersMSC = NULL,
+- .CopySubBuffer = r200CopySubBuffer,
++ .CopySubBuffer = radeonCopySubBuffer,
++ .InitScreen2 = radeonInitScreen2,
+ };
+ #endif
++
+diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
+index b84c70b..1c0f5bb 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
++++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
+@@ -54,7 +54,7 @@ typedef struct {
+ drmAddress map; /* Mapping of the DRM region */
+ } radeonRegionRec, *radeonRegionPtr;
+
+-typedef struct {
++typedef struct radeon_screen {
+ int chip_family;
+ int chip_flags;
+ int cpp;
+@@ -103,9 +103,12 @@ typedef struct {
+ /* Configuration cache with default values for all contexts */
+ driOptionCache optionCache;
+
+- const __DRIextension *extensions[8];
++ const __DRIextension *extensions[16];
+
+ int num_gb_pipes;
++ int kernel_mm;
++ drm_radeon_sarea_t *sarea; /* Private SAREA data */
++ struct radeon_bo_manager *bom;
+ } radeonScreenRec, *radeonScreenPtr;
+
+ #define IS_R100_CLASS(screen) \
+diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
+index 12051ff..49ec2c3 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_span.c
++++ b/src/mesa/drivers/dri/radeon/radeon_span.c
+@@ -43,37 +43,168 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/glheader.h"
+ #include "swrast/swrast.h"
+
+-#include "radeon_context.h"
+-#include "radeon_ioctl.h"
+-#include "radeon_state.h"
++#include "radeon_common.h"
++#include "radeon_lock.h"
+ #include "radeon_span.h"
+-#include "radeon_tex.h"
+-
+-#include "drirenderbuffer.h"
+
+ #define DBG 0
+
++static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
++ GLint x, GLint y)
++{
++ GLubyte *ptr = rrb->bo->ptr;
++ const __DRIdrawablePrivate *dPriv = rrb->dPriv;
++ uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
++ GLint offset;
++ GLint nmacroblkpl;
++ GLint nmicroblkpl;
++
++ x += dPriv->x;
++ y += dPriv->y;
++
++ if (rrb->has_surface || !(rrb->bo->flags & mask)) {
++ offset = x * rrb->cpp + y * rrb->pitch;
++ } else {
++ offset = 0;
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
++ nmacroblkpl = rrb->pitch >> 5;
++ offset += ((y >> 4) * nmacroblkpl) << 11;
++ offset += ((y & 15) >> 1) << 8;
++ offset += (y & 1) << 4;
++ offset += (x >> 5) << 11;
++ offset += ((x & 31) >> 2) << 5;
++ offset += (x & 3) << 2;
++ } else {
++ nmacroblkpl = rrb->pitch >> 6;
++ offset += ((y >> 3) * nmacroblkpl) << 11;
++ offset += (y & 7) << 8;
++ offset += (x >> 6) << 11;
++ offset += ((x & 63) >> 3) << 5;
++ offset += (x & 7) << 2;
++ }
++ } else {
++ nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
++ offset += (y * nmicroblkpl) << 5;
++ offset += (x >> 3) << 5;
++ offset += (x & 7) << 2;
++ }
++ }
++ return &ptr[offset];
++}
++
++static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
++ GLint x, GLint y)
++{
++ GLubyte *ptr = rrb->bo->ptr;
++ const __DRIdrawablePrivate *dPriv = rrb->dPriv;
++ uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
++ GLint offset;
++ GLint nmacroblkpl;
++ GLint nmicroblkpl;
++
++ x += dPriv->x;
++ y += dPriv->y;
++
++ if (rrb->has_surface || !(rrb->bo->flags & mask)) {
++ offset = x * rrb->cpp + y * rrb->pitch;
++ } else {
++ offset = 0;
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
++ nmacroblkpl = rrb->pitch >> 6;
++ offset += ((y >> 4) * nmacroblkpl) << 11;
++ offset += ((y & 15) >> 1) << 8;
++ offset += (y & 1) << 4;
++ offset += (x >> 6) << 11;
++ offset += ((x & 63) >> 3) << 5;
++ offset += (x & 7) << 1;
++ } else {
++ nmacroblkpl = rrb->pitch >> 7;
++ offset += ((y >> 3) * nmacroblkpl) << 11;
++ offset += (y & 7) << 8;
++ offset += (x >> 7) << 11;
++ offset += ((x & 127) >> 4) << 5;
++ offset += (x & 15) << 2;
++ }
++ } else {
++ nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
++ offset += (y * nmicroblkpl) << 5;
++ offset += (x >> 4) << 5;
++ offset += (x & 15) << 2;
++ }
++ }
++ return &ptr[offset];
++}
++
++static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
++ GLint x, GLint y)
++{
++ GLubyte *ptr = rrb->bo->ptr;
++ const __DRIdrawablePrivate *dPriv = rrb->dPriv;
++ uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
++ GLint offset;
++ GLint microblkxs;
++ GLint macroblkxs;
++ GLint nmacroblkpl;
++ GLint nmicroblkpl;
++
++ x += dPriv->x;
++ y += dPriv->y;
++
++ if (rrb->has_surface || !(rrb->bo->flags & mask)) {
++ offset = x * rrb->cpp + y * rrb->pitch;
++ } else {
++ offset = 0;
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
++ microblkxs = 16 / rrb->cpp;
++ macroblkxs = 128 / rrb->cpp;
++ nmacroblkpl = rrb->pitch / macroblkxs;
++ offset += ((y >> 4) * nmacroblkpl) << 11;
++ offset += ((y & 15) >> 1) << 8;
++ offset += (y & 1) << 4;
++ offset += (x / macroblkxs) << 11;
++ offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
++ offset += (x & (microblkxs - 1)) * rrb->cpp;
++ } else {
++ microblkxs = 32 / rrb->cpp;
++ macroblkxs = 256 / rrb->cpp;
++ nmacroblkpl = rrb->pitch / macroblkxs;
++ offset += ((y >> 3) * nmacroblkpl) << 11;
++ offset += (y & 7) << 8;
++ offset += (x / macroblkxs) << 11;
++ offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
++ offset += (x & (microblkxs - 1)) * rrb->cpp;
++ }
++ } else {
++ microblkxs = 32 / rrb->cpp;
++ nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
++ offset += (y * nmicroblkpl) << 5;
++ offset += (x / microblkxs) << 5;
++ offset += (x & (microblkxs - 1)) * rrb->cpp;
++ }
++ }
++ return &ptr[offset];
++}
++
++
+ /*
+ * Note that all information needed to access pixels in a renderbuffer
+ * should be obtained through the gl_renderbuffer parameter, not per-context
+ * information.
+ */
+ #define LOCAL_VARS \
+- driRenderbuffer *drb = (driRenderbuffer *) rb; \
+- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
++ struct radeon_renderbuffer *rrb = (void *) rb; \
++ const __DRIdrawablePrivate *dPriv = rrb->dPriv; \
+ const GLuint bottom = dPriv->h - 1; \
+- GLubyte *buf = (GLubyte *) drb->flippedData \
+- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \
+- GLuint p; \
+- (void) p;
++ GLuint p; \
++ (void)p;
+
+ #define LOCAL_DEPTH_VARS \
+- driRenderbuffer *drb = (driRenderbuffer *) rb; \
+- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
+- const GLuint bottom = dPriv->h - 1; \
+- GLuint xo = dPriv->x; \
+- GLuint yo = dPriv->y; \
+- GLubyte *buf = (GLubyte *) drb->Base.Data;
++ struct radeon_renderbuffer *rrb = (void *) rb; \
++ const __DRIdrawablePrivate *dPriv = rrb->dPriv; \
++ const GLuint bottom = dPriv->h - 1;
+
+ #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+
+@@ -94,7 +225,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #define TAG(x) radeon##x##_RGB565
+ #define TAG2(x,y) radeon##x##_RGB565##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
++#define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
+ #include "spantmp2.h"
+
+ /* 32 bit, ARGB8888 color spanline and pixel functions
+@@ -104,7 +235,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #define TAG(x) radeon##x##_ARGB8888
+ #define TAG2(x,y) radeon##x##_ARGB8888##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
++#define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
+ #include "spantmp2.h"
+
+ /* ================================================================
+@@ -121,65 +252,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * too...
+ */
+
+-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
+-{
+- GLuint pitch = drb->pitch;
+- if (drb->depthHasSurface) {
+- return 4 * (x + y * pitch);
+- } else {
+- GLuint ba, address = 0; /* a[0..1] = 0 */
+-
+-#ifdef COMPILE_R300
+- ba = (y / 8) * (pitch / 8) + (x / 8);
+-#else
+- ba = (y / 16) * (pitch / 16) + (x / 16);
+-#endif
+-
+- address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */
+- address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */
+- address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */
+- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
+-
+- address |= (y & 0x8) << 7; /* a[10] = y[3] */
+- address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */
+- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
+-
+- return address;
+- }
+-}
+-
+-static INLINE GLuint
+-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
+-{
+- GLuint pitch = drb->pitch;
+- if (drb->depthHasSurface) {
+- return 2 * (x + y * pitch);
+- } else {
+- GLuint ba, address = 0; /* a[0] = 0 */
+-
+- ba = (y / 16) * (pitch / 32) + (x / 32);
+-
+- address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */
+- address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */
+- address |= (x & 0x8) << 4; /* a[7] = x[3] */
+- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
+- address |= (y & 0x8) << 7; /* a[10] = y[3] */
+- address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */
+- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
+-
+- return address;
+- }
+-}
+-
+ /* 16-bit depth buffer functions
+ */
+ #define VALUE_TYPE GLushort
+
+ #define WRITE_DEPTH( _x, _y, d ) \
+- *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
++ *(GLushort *)radeon_ptr(rrb, _x, _y) = d
+
+ #define READ_DEPTH( d, _x, _y ) \
+- d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
++ d = *(GLushort *)radeon_ptr(rrb, _x, _y)
+
+ #define TAG(x) radeon##x##_z16
+ #include "depthtmp.h"
+@@ -194,35 +275,36 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
+ #ifdef COMPILE_R300
+ #define WRITE_DEPTH( _x, _y, d ) \
+ do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
++ GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \
++ GLuint tmp = *_ptr; \
+ tmp &= 0x000000ff; \
+ tmp |= ((d << 8) & 0xffffff00); \
+- *(GLuint *)(buf + offset) = tmp; \
++ *_ptr = tmp; \
+ } while (0)
+ #else
+ #define WRITE_DEPTH( _x, _y, d ) \
+ do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
++ GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \
++ GLuint tmp = *_ptr; \
+ tmp &= 0xff000000; \
+ tmp |= ((d) & 0x00ffffff); \
+- *(GLuint *)(buf + offset) = tmp; \
++ *_ptr = tmp; \
+ } while (0)
+ #endif
+
+ #ifdef COMPILE_R300
+ #define READ_DEPTH( d, _x, _y ) \
+ do { \
+- d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
+- _y + yo )) & 0xffffff00) >> 8; \
++ d = (*(GLuint*)(radeon_ptr32(rrb, _x, _y)) & 0xffffff00) >> 8; \
+ }while(0)
+ #else
+ #define READ_DEPTH( d, _x, _y ) \
+- d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
+- _y + yo )) & 0x00ffffff;
++ d = *(GLuint*)(radeon_ptr32(rrb, _x, _y )) & 0x00ffffff;
+ #endif
+-
++/*
++ fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
++ d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff;
++*/
+ #define TAG(x) radeon##x##_z24_s8
+ #include "depthtmp.h"
+
+@@ -235,35 +317,35 @@ do { \
+ #ifdef COMPILE_R300
+ #define WRITE_STENCIL( _x, _y, d ) \
+ do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
++ GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y); \
++ GLuint tmp = *_ptr; \
+ tmp &= 0xffffff00; \
+ tmp |= (d) & 0xff; \
+- *(GLuint *)(buf + offset) = tmp; \
++ *_ptr = tmp; \
+ } while (0)
+ #else
+ #define WRITE_STENCIL( _x, _y, d ) \
+ do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
++ GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y); \
++ GLuint tmp = *_ptr; \
+ tmp &= 0x00ffffff; \
+ tmp |= (((d) & 0xff) << 24); \
+- *(GLuint *)(buf + offset) = tmp; \
++ *_ptr = tmp; \
+ } while (0)
+ #endif
+
+ #ifdef COMPILE_R300
+ #define READ_STENCIL( d, _x, _y ) \
+ do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
++ GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \
++ GLuint tmp = *_ptr; \
+ d = tmp & 0x000000ff; \
+ } while (0)
+ #else
+ #define READ_STENCIL( d, _x, _y ) \
+ do { \
+- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
+- GLuint tmp = *(GLuint *)(buf + offset); \
++ GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \
++ GLuint tmp = *_ptr; \
+ d = (tmp & 0xff000000) >> 24; \
+ } while (0)
+ #endif
+@@ -271,20 +353,60 @@ do { \
+ #define TAG(x) radeon##x##_z24_s8
+ #include "stenciltmp.h"
+
+-/* Move locking out to get reasonable span performance (10x better
+- * than doing this in HW_LOCK above). WaitForIdle() is the main
+- * culprit.
+- */
++
++static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
++{
++ struct radeon_renderbuffer *rrb = (void*)rb;
++ int r;
++
++ if (rrb->bo) {
++ r = radeon_bo_map(rrb->bo, write);
++ if (r) {
++ fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
++ __FUNCTION__, r);
++ }
++ }
++}
++
++static void unmap_buffer(struct gl_renderbuffer *rb)
++{
++ struct radeon_renderbuffer *rrb = (void*)rb;
++
++ if (rrb->bo) {
++ radeon_bo_unmap(rrb->bo);
++ }
++}
+
+ static void radeonSpanRenderStart(GLcontext * ctx)
+ {
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-#ifdef COMPILE_R300
+- r300ContextPtr r300 = (r300ContextPtr) rmesa;
+- R300_FIREVERTICES(r300);
+-#else
+- RADEON_FIREVERTICES(rmesa);
+-#endif
++ int i;
++
++ radeon_firevertices(rmesa);
++
++ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
++ if (ctx->Texture.Unit[i]._ReallyEnabled)
++ ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
++ }
++
++ /* color draw buffers */
++ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
++ map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
++ }
++
++ map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
++
++ if (ctx->DrawBuffer->_DepthBuffer) {
++ map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
++ }
++ if (ctx->DrawBuffer->_StencilBuffer)
++ map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
++
++ /* The locking and wait for idle should really only be needed in classic mode.
++ * In a future memory manager based implementation, this should become
++ * unnecessary due to the fact that mapping our buffers, textures, etc.
++ * should implicitly wait for any previous rendering commands that must
++ * be waited on. */
+ LOCK_HARDWARE(rmesa);
+ radeonWaitForIdleLocked(rmesa);
+ }
+@@ -292,8 +414,25 @@ static void radeonSpanRenderStart(GLcontext * ctx)
+ static void radeonSpanRenderFinish(GLcontext * ctx)
+ {
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ int i;
+ _swrast_flush(ctx);
+ UNLOCK_HARDWARE(rmesa);
++
++ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
++ if (ctx->Texture.Unit[i]._ReallyEnabled)
++ ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
++ }
++
++ /* color draw buffers */
++ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
++ unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
++
++ unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
++
++ if (ctx->DrawBuffer->_DepthBuffer)
++ unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
++ if (ctx->DrawBuffer->_StencilBuffer)
++ unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
+ }
+
+ void radeonInitSpanFuncs(GLcontext * ctx)
+@@ -307,20 +446,17 @@ void radeonInitSpanFuncs(GLcontext * ctx)
+ /**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
++void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
+ {
+- if (drb->Base.InternalFormat == GL_RGBA) {
+- if (vis->redBits == 5 && vis->greenBits == 6
+- && vis->blueBits == 5) {
+- radeonInitPointers_RGB565(&drb->Base);
+- } else {
+- radeonInitPointers_ARGB8888(&drb->Base);
+- }
+- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+- radeonInitDepthPointers_z16(&drb->Base);
+- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+- radeonInitDepthPointers_z24_s8(&drb->Base);
+- } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+- radeonInitStencilPointers_z24_s8(&drb->Base);
++ if (rrb->base.InternalFormat == GL_RGB5) {
++ radeonInitPointers_RGB565(&rrb->base);
++ } else if (rrb->base.InternalFormat == GL_RGBA8) {
++ radeonInitPointers_ARGB8888(&rrb->base);
++ } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) {
++ radeonInitDepthPointers_z16(&rrb->base);
++ } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) {
++ radeonInitDepthPointers_z24_s8(&rrb->base);
++ } else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
++ radeonInitStencilPointers_z24_s8(&rrb->base);
+ }
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h
+index 9abe086..dd44ab5 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_span.h
++++ b/src/mesa/drivers/dri/radeon/radeon_span.h
+@@ -42,9 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #ifndef __RADEON_SPAN_H__
+ #define __RADEON_SPAN_H__
+
+-#include "drirenderbuffer.h"
+-
+ extern void radeonInitSpanFuncs(GLcontext * ctx);
+-extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
+
++extern void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
+ #endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
+index 32bcff3..5fffa28 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_state.c
++++ b/src/mesa/drivers/dri/radeon/radeon_state.c
+@@ -62,7 +62,7 @@ static void radeonUpdateSpecular( GLcontext *ctx );
+
+ static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
+ GLubyte refByte;
+
+@@ -106,7 +106,7 @@ static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+ static void radeonBlendEquationSeparate( GLcontext *ctx,
+ GLenum modeRGB, GLenum modeA )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
+ GLboolean fallback = GL_FALSE;
+
+@@ -147,7 +147,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
+ GLenum sfactorRGB, GLenum dfactorRGB,
+ GLenum sfactorA, GLenum dfactorA )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] &
+ ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
+ GLboolean fallback = GL_FALSE;
+@@ -257,7 +257,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
+
+ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+ RADEON_STATECHANGE( rmesa, ctx );
+ rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
+@@ -293,7 +293,7 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
+
+ static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ RADEON_STATECHANGE( rmesa, ctx );
+
+ if ( ctx->Depth.Mask ) {
+@@ -305,16 +305,16 @@ static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
+
+ static void radeonClearDepth( GLcontext *ctx, GLclampd d )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
+ RADEON_DEPTH_FORMAT_MASK);
+
+ switch ( format ) {
+ case RADEON_DEPTH_FORMAT_16BIT_INT_Z:
+- rmesa->state.depth.clear = d * 0x0000ffff;
++ rmesa->radeon.state.depth.clear = d * 0x0000ffff;
+ break;
+ case RADEON_DEPTH_FORMAT_24BIT_INT_Z:
+- rmesa->state.depth.clear = d * 0x00ffffff;
++ rmesa->radeon.state.depth.clear = d * 0x00ffffff;
+ break;
+ }
+ }
+@@ -327,7 +327,7 @@ static void radeonClearDepth( GLcontext *ctx, GLclampd d )
+
+ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ union { int i; float f; } c, d;
+ GLchan col[4];
+
+@@ -406,109 +406,13 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+ }
+ }
+
+-
+-/* =============================================================
+- * Scissoring
+- */
+-
+-
+-static GLboolean intersect_rect( drm_clip_rect_t *out,
+- drm_clip_rect_t *a,
+- drm_clip_rect_t *b )
+-{
+- *out = *a;
+- if ( b->x1 > out->x1 ) out->x1 = b->x1;
+- if ( b->y1 > out->y1 ) out->y1 = b->y1;
+- if ( b->x2 < out->x2 ) out->x2 = b->x2;
+- if ( b->y2 < out->y2 ) out->y2 = b->y2;
+- if ( out->x1 >= out->x2 ) return GL_FALSE;
+- if ( out->y1 >= out->y2 ) return GL_FALSE;
+- return GL_TRUE;
+-}
+-
+-
+-void radeonRecalcScissorRects( radeonContextPtr rmesa )
+-{
+- drm_clip_rect_t *out;
+- int i;
+-
+- /* Grow cliprect store?
+- */
+- if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+- while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+- rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */
+- rmesa->state.scissor.numAllocedClipRects *= 2;
+- }
+-
+- if (rmesa->state.scissor.pClipRects)
+- FREE(rmesa->state.scissor.pClipRects);
+-
+- rmesa->state.scissor.pClipRects =
+- MALLOC( rmesa->state.scissor.numAllocedClipRects *
+- sizeof(drm_clip_rect_t) );
+-
+- if ( rmesa->state.scissor.pClipRects == NULL ) {
+- rmesa->state.scissor.numAllocedClipRects = 0;
+- return;
+- }
+- }
+-
+- out = rmesa->state.scissor.pClipRects;
+- rmesa->state.scissor.numClipRects = 0;
+-
+- for ( i = 0 ; i < rmesa->numClipRects ; i++ ) {
+- if ( intersect_rect( out,
+- &rmesa->pClipRects[i],
+- &rmesa->state.scissor.rect ) ) {
+- rmesa->state.scissor.numClipRects++;
+- out++;
+- }
+- }
+-}
+-
+-
+-static void radeonUpdateScissor( GLcontext *ctx )
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-
+- if ( rmesa->dri.drawable ) {
+- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+-
+- int x = ctx->Scissor.X;
+- int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
+- int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
+- int h = dPriv->h - ctx->Scissor.Y - 1;
+-
+- rmesa->state.scissor.rect.x1 = x + dPriv->x;
+- rmesa->state.scissor.rect.y1 = y + dPriv->y;
+- rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
+- rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
+-
+- radeonRecalcScissorRects( rmesa );
+- }
+-}
+-
+-
+-static void radeonScissor( GLcontext *ctx,
+- GLint x, GLint y, GLsizei w, GLsizei h )
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-
+- if ( ctx->Scissor.Enabled ) {
+- RADEON_FIREVERTICES( rmesa ); /* don't pipeline cliprect changes */
+- radeonUpdateScissor( ctx );
+- }
+-
+-}
+-
+-
+ /* =============================================================
+ * Culling
+ */
+
+ static void radeonCullFace( GLcontext *ctx, GLenum unused )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+ GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
+
+@@ -545,7 +449,7 @@ static void radeonCullFace( GLcontext *ctx, GLenum unused )
+
+ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+ RADEON_STATECHANGE( rmesa, set );
+ rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
+@@ -570,7 +474,7 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
+ */
+ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+ RADEON_STATECHANGE( rmesa, lin );
+ RADEON_STATECHANGE( rmesa, set );
+@@ -587,7 +491,7 @@ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
+
+ static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+ RADEON_STATECHANGE( rmesa, lin );
+ rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] =
+@@ -602,8 +506,8 @@ static void radeonColorMask( GLcontext *ctx,
+ GLboolean r, GLboolean g,
+ GLboolean b, GLboolean a )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- GLuint mask = radeonPackColor( rmesa->radeonScreen->cpp,
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
+ ctx->Color.ColorMask[RCOMP],
+ ctx->Color.ColorMask[GCOMP],
+ ctx->Color.ColorMask[BCOMP],
+@@ -623,8 +527,8 @@ static void radeonColorMask( GLcontext *ctx,
+ static void radeonPolygonOffset( GLcontext *ctx,
+ GLfloat factor, GLfloat units )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- float_ui32_type constant = { units * rmesa->state.depth.scale };
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ float_ui32_type constant = { units * rmesa->radeon.state.depth.scale };
+ float_ui32_type factoru = { factor };
+
+ RADEON_STATECHANGE( rmesa, zbs );
+@@ -634,7 +538,7 @@ static void radeonPolygonOffset( GLcontext *ctx,
+
+ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint i;
+ drm_radeon_stipple_t stipple;
+
+@@ -646,27 +550,27 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
+
+ /* TODO: push this into cmd mechanism
+ */
+- RADEON_FIREVERTICES( rmesa );
+- LOCK_HARDWARE( rmesa );
++ radeon_firevertices(&rmesa->radeon);
++ LOCK_HARDWARE( &rmesa->radeon );
+
+ /* FIXME: Use window x,y offsets into stipple RAM.
+ */
+ stipple.mask = rmesa->state.stipple.mask;
+- drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE,
++ drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE,
+ &stipple, sizeof(drm_radeon_stipple_t) );
+- UNLOCK_HARDWARE( rmesa );
++ UNLOCK_HARDWARE( &rmesa->radeon );
+ }
+
+ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
+
+ /* Can't generally do unfilled via tcl, but some good special
+ * cases work.
+ */
+ TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag);
+- if (rmesa->TclFallback) {
++ if (rmesa->radeon.TclFallback) {
+ radeonChooseRenderState( ctx );
+ radeonChooseVertexState( ctx );
+ }
+@@ -686,7 +590,7 @@ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+ */
+ static void radeonUpdateSpecular( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+ GLuint flag = 0;
+
+@@ -757,7 +661,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
+
+ /* Update vertex/render formats
+ */
+- if (rmesa->TclFallback) {
++ if (rmesa->radeon.TclFallback) {
+ radeonChooseRenderState( ctx );
+ radeonChooseVertexState( ctx );
+ }
+@@ -774,7 +678,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
+ */
+ static void update_global_ambient( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ float *fcmd = (float *)RADEON_DB_STATE( glt );
+
+ /* Need to do more if both emmissive & ambient are PREMULT:
+@@ -809,7 +713,7 @@ static void update_light_colors( GLcontext *ctx, GLuint p )
+ /* fprintf(stderr, "%s\n", __FUNCTION__); */
+
+ if (l->Enabled) {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
+
+ COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );
+@@ -849,7 +753,7 @@ static void check_twoside_fallback( GLcontext *ctx )
+
+ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+
+ light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+@@ -913,7 +817,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+
+ void radeonUpdateMaterial( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+ GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
+ GLuint mask = ~0;
+@@ -978,7 +882,7 @@ void radeonUpdateMaterial( GLcontext *ctx )
+ */
+ static void update_light( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+ /* Have to check these, or have an automatic shortcircuit mechanism
+ * to remove noop statechanges. (Or just do a better job on the
+@@ -1043,7 +947,7 @@ static void update_light( GLcontext *ctx )
+ static void radeonLightfv( GLcontext *ctx, GLenum light,
+ GLenum pname, const GLfloat *params )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLint p = light - GL_LIGHT0;
+ struct gl_light *l = &ctx->Light.Light[p];
+ GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+@@ -1164,7 +1068,7 @@ static void radeonLightfv( GLcontext *ctx, GLenum light,
+ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
+ const GLfloat *param )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+ switch (pname) {
+ case GL_LIGHT_MODEL_AMBIENT:
+@@ -1188,7 +1092,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
+
+ check_twoside_fallback( ctx );
+
+- if (rmesa->TclFallback) {
++ if (rmesa->radeon.TclFallback) {
+ radeonChooseRenderState( ctx );
+ radeonChooseVertexState( ctx );
+ }
+@@ -1205,7 +1109,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
+
+ static void radeonShadeModel( GLcontext *ctx, GLenum mode )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+
+ s &= ~(RADEON_DIFFUSE_SHADE_MASK |
+@@ -1244,7 +1148,7 @@ static void radeonShadeModel( GLcontext *ctx, GLenum mode )
+ static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+ {
+ GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+ RADEON_STATECHANGE( rmesa, ucp[p] );
+@@ -1256,7 +1160,7 @@ static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+
+ static void radeonUpdateClipPlanes( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint p;
+
+ for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+@@ -1281,7 +1185,7 @@ static void
+ radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
+ GLint ref, GLuint mask )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) |
+ ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT));
+
+@@ -1325,7 +1229,7 @@ radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
+ static void
+ radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+ RADEON_STATECHANGE( rmesa, msk );
+ rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
+@@ -1336,7 +1240,7 @@ radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
+ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+ GLenum zfail, GLenum zpass )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+ /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP,
+ and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC,
+@@ -1349,7 +1253,7 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+ GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP;
+ GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP;
+
+- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
++ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
+ tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC;
+ tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC;
+ tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC;
+@@ -1455,9 +1359,9 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+
+ static void radeonClearStencil( GLcontext *ctx, GLint s )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+- rmesa->state.stencil.clear =
++ rmesa->radeon.state.stencil.clear =
+ ((GLuint) (ctx->Stencil.Clear & 0xff) |
+ (0xff << RADEON_STENCIL_MASK_SHIFT) |
+ ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT));
+@@ -1481,20 +1385,20 @@ static void radeonClearStencil( GLcontext *ctx, GLint s )
+ */
+ void radeonUpdateWindow( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+- GLfloat xoffset = (GLfloat)dPriv->x;
+- GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
++ GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
++ GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+ float_ui32_type sx = { v[MAT_SX] };
+ float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
+ float_ui32_type sy = { - v[MAT_SY] };
+ float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
+- float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
+- float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
++ float_ui32_type sz = { v[MAT_SZ] * rmesa->radeon.state.depth.scale };
++ float_ui32_type tz = { v[MAT_TZ] * rmesa->radeon.state.depth.scale };
+
+- RADEON_FIREVERTICES( rmesa );
++ radeon_firevertices(&rmesa->radeon);
+ RADEON_STATECHANGE( rmesa, vpt );
+
+ rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = sx.ui32;
+@@ -1524,8 +1428,8 @@ static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
+
+ void radeonUpdateViewportOffset( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+ GLfloat xoffset = (GLfloat)dPriv->x;
+ GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+@@ -1555,8 +1459,8 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
+ RADEON_STIPPLE_Y_OFFSET_MASK);
+
+ /* add magic offsets, then invert */
+- stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
+- sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
++ stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
++ sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1)
+ & RADEON_STIPPLE_COORD_MASK);
+
+ m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
+@@ -1580,20 +1484,20 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
+
+ static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLubyte c[4];
+ CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+ CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+ CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+ CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+- rmesa->state.color.clear = radeonPackColor( rmesa->radeonScreen->cpp,
++ rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
+ c[0], c[1], c[2], c[3] );
+ }
+
+
+ static void radeonRenderMode( GLcontext *ctx, GLenum mode )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+ }
+
+@@ -1619,7 +1523,7 @@ static GLuint radeon_rop_tab[] = {
+
+ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint rop = (GLuint)opcode - GL_CLEAR;
+
+ ASSERT( rop < 16 );
+@@ -1630,66 +1534,17 @@ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
+
+
+ /**
+- * Set up the cliprects for either front or back-buffer drawing.
+- */
+-void radeonSetCliprects( radeonContextPtr rmesa )
+-{
+- __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
+- __DRIdrawablePrivate *const readable = rmesa->dri.readable;
+- GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
+- GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
+-
+- if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+- /* Can't ignore 2d windows if we are page flipping.
+- */
+- if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
+- rmesa->numClipRects = drawable->numClipRects;
+- rmesa->pClipRects = drawable->pClipRects;
+- }
+- else {
+- rmesa->numClipRects = drawable->numBackClipRects;
+- rmesa->pClipRects = drawable->pBackClipRects;
+- }
+- }
+- else {
+- /* front buffer (or none, or multiple buffers */
+- rmesa->numClipRects = drawable->numClipRects;
+- rmesa->pClipRects = drawable->pClipRects;
+- }
+-
+- if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
+- _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
+- drawable->w, drawable->h);
+- draw_fb->Initialized = GL_TRUE;
+- }
+-
+- if (drawable != readable) {
+- if ((read_fb->Width != readable->w) || (read_fb->Height != readable->h)) {
+- _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
+- readable->w, readable->h);
+- read_fb->Initialized = GL_TRUE;
+- }
+- }
+-
+- if (rmesa->state.scissor.enabled)
+- radeonRecalcScissorRects( rmesa );
+-
+- rmesa->lastStamp = drawable->lastStamp;
+-}
+-
+-
+-/**
+ * Called via glDrawBuffer.
+ */
+ static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+ if (RADEON_DEBUG & DEBUG_DRI)
+ fprintf(stderr, "%s %s\n", __FUNCTION__,
+ _mesa_lookup_enum_by_nr( mode ));
+
+- RADEON_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */
++ radeon_firevertices(&rmesa->radeon); /* don't pipeline cliprect changes */
+
+ if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+ /* 0 (GL_NONE) buffers or multiple color drawing buffers */
+@@ -1707,8 +1562,9 @@ static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
+ return;
+ }
+
+- radeonSetCliprects( rmesa );
+-
++ radeonSetCliprects( &rmesa->radeon );
++ if (!rmesa->radeon.radeonScreen->driScreen->dri2.enabled)
++ radeonUpdatePageFlipping(&rmesa->radeon);
+ /* We'll set the drawing engine's offset/pitch parameters later
+ * when we update other state.
+ */
+@@ -1726,7 +1582,7 @@ static void radeonReadBuffer( GLcontext *ctx, GLenum mode )
+
+ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint p, flag;
+
+ if ( RADEON_DEBUG & DEBUG_STATE )
+@@ -1821,10 +1677,10 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+ RADEON_STATECHANGE(rmesa, ctx );
+ if ( state ) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE;
+- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
+ } else {
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
+- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
+ }
+ break;
+
+@@ -1971,13 +1827,13 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+ }
+
+ case GL_SCISSOR_TEST:
+- RADEON_FIREVERTICES( rmesa );
+- rmesa->state.scissor.enabled = state;
++ radeon_firevertices(&rmesa->radeon);
++ rmesa->radeon.state.scissor.enabled = state;
+ radeonUpdateScissor( ctx );
+ break;
+
+ case GL_STENCIL_TEST:
+- if ( rmesa->state.stencil.hwBuffer ) {
++ if ( rmesa->radeon.state.stencil.hwBuffer ) {
+ RADEON_STATECHANGE( rmesa, ctx );
+ if ( state ) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_STENCIL_ENABLE;
+@@ -2010,7 +1866,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+
+ static void radeonLightingSpaceChange( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLboolean tmp;
+ RADEON_STATECHANGE( rmesa, tcl );
+
+@@ -2039,7 +1895,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
+ */
+
+
+-void radeonUploadTexMatrix( radeonContextPtr rmesa,
++void radeonUploadTexMatrix( r100ContextPtr rmesa,
+ int unit, GLboolean swapcols )
+ {
+ /* Here's how this works: on r100, only 3 tex coords can be submitted, so the
+@@ -2065,7 +1921,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
+ int idx = TEXMAT_0 + unit;
+ float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0;
+ int i;
+- struct gl_texture_unit tUnit = rmesa->glCtx->Texture.Unit[unit];
++ struct gl_texture_unit tUnit = rmesa->radeon.glCtx->Texture.Unit[unit];
+ GLfloat *src = rmesa->tmpmat[unit].m;
+
+ rmesa->TexMatColSwap &= ~(1 << unit);
+@@ -2119,7 +1975,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
+ }
+
+
+-static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
++static void upload_matrix( r100ContextPtr rmesa, GLfloat *src, int idx )
+ {
+ float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+ int i;
+@@ -2135,7 +1991,7 @@ static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
+ RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+ }
+
+-static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
++static void upload_matrix_t( r100ContextPtr rmesa, GLfloat *src, int idx )
+ {
+ float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+ memcpy(dest, src, 16*sizeof(float));
+@@ -2145,7 +2001,7 @@ static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
+
+ static void update_texturematrix( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++ r100ContextPtr rmesa = R100_CONTEXT( ctx );
+ GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
+ GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
+ int unit;
+@@ -2217,43 +2073,32 @@ static void update_texturematrix( GLcontext *ctx )
+ void
+ radeonUpdateDrawBuffer(GLcontext *ctx)
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+- driRenderbuffer *drb;
++ struct radeon_renderbuffer *rrb;
+
+ if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+- /* draw to front */
+- drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+- }
+- else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+- /* draw to back */
+- drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+- }
+- else {
+- /* drawing to multiple buffers, or none */
+- return;
++ /* draw to front */
++ rrb = (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++ } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
++ /* draw to back */
++ rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++ } else {
++ /* drawing to multiple buffers, or none */
++ return;
+ }
+
+- assert(drb);
+- assert(drb->flippedPitch);
++ assert(rrb);
++ assert(rrb->pitch);
+
+ RADEON_STATECHANGE( rmesa, ctx );
+-
+- /* Note: we used the (possibly) page-flipped values */
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
+- = ((drb->flippedOffset + rmesa->radeonScreen->fbLocation)
+- & RADEON_COLOROFFSET_MASK);
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
+- if (rmesa->sarea->tiling_enabled) {
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
+- }
+ }
+
+
+ void radeonValidateState( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- GLuint new_state = rmesa->NewGLState;
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ GLuint new_state = rmesa->radeon.NewGLState;
+
+ if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+ radeonUpdateDrawBuffer(ctx);
+@@ -2261,7 +2106,7 @@ void radeonValidateState( GLcontext *ctx )
+
+ if (new_state & _NEW_TEXTURE) {
+ radeonUpdateTextureState( ctx );
+- new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
++ new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
+ }
+
+ /* Need an event driven matrix update?
+@@ -2295,7 +2140,7 @@ void radeonValidateState( GLcontext *ctx )
+ }
+
+
+- rmesa->NewGLState = 0;
++ rmesa->radeon.NewGLState = 0;
+ }
+
+
+@@ -2306,7 +2151,7 @@ static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
+ _vbo_InvalidateState( ctx, new_state );
+ _tnl_InvalidateState( ctx, new_state );
+ _ae_invalidate_state( ctx, new_state );
+- RADEON_CONTEXT(ctx)->NewGLState |= new_state;
++ R100_CONTEXT(ctx)->radeon.NewGLState |= new_state;
+ }
+
+
+@@ -2330,15 +2175,15 @@ static GLboolean check_material( GLcontext *ctx )
+
+ static void radeonWrapRunPipeline( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLboolean has_material;
+
+ if (0)
+- fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
++ fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
+
+ /* Validate state:
+ */
+- if (rmesa->NewGLState)
++ if (rmesa->radeon.NewGLState)
+ radeonValidateState( ctx );
+
+ has_material = (ctx->Light.Enabled && check_material( ctx ));
+diff --git a/src/mesa/drivers/dri/radeon/radeon_state.h b/src/mesa/drivers/dri/radeon/radeon_state.h
+index 2171879..17c2b11 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_state.h
++++ b/src/mesa/drivers/dri/radeon/radeon_state.h
+@@ -39,22 +39,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ #include "radeon_context.h"
+
+-extern void radeonInitState( radeonContextPtr rmesa );
++extern void radeonInitState( r100ContextPtr rmesa );
+ extern void radeonInitStateFuncs( GLcontext *ctx );
+
+ extern void radeonUpdateMaterial( GLcontext *ctx );
+
+-extern void radeonSetCliprects( radeonContextPtr rmesa );
+-extern void radeonRecalcScissorRects( radeonContextPtr rmesa );
+ extern void radeonUpdateViewportOffset( GLcontext *ctx );
+ extern void radeonUpdateWindow( GLcontext *ctx );
+ extern void radeonUpdateDrawBuffer( GLcontext *ctx );
+-extern void radeonUploadTexMatrix( radeonContextPtr rmesa,
++extern void radeonUploadTexMatrix( r100ContextPtr rmesa,
+ int unit, GLboolean swapcols );
+
+ extern void radeonValidateState( GLcontext *ctx );
+
+-extern void radeonPrintDirty( radeonContextPtr rmesa,
++extern void radeonPrintDirty( r100ContextPtr rmesa,
+ const char *msg );
+
+
+@@ -62,7 +60,7 @@ extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+ #define FALLBACK( rmesa, bit, mode ) do { \
+ if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \
+ __FUNCTION__, bit, mode ); \
+- radeonFallback( rmesa->glCtx, bit, mode ); \
++ radeonFallback( rmesa->radeon.glCtx, bit, mode ); \
+ } while (0)
+
+
+diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
+index 57dc380..7ff0eb4 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_state_init.c
++++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
+@@ -38,39 +38,156 @@
+ #include "swrast_setup/swrast_setup.h"
+
+ #include "radeon_context.h"
++#include "radeon_mipmap_tree.h"
+ #include "radeon_ioctl.h"
+ #include "radeon_state.h"
+ #include "radeon_tcl.h"
+ #include "radeon_tex.h"
+ #include "radeon_swtcl.h"
+
++#include "../r200/r200_reg.h"
++
+ #include "xmlpool.h"
+
++/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
++ * 1.3 cmdbuffers allow all previous state to be updated as well as
++ * the tcl scalar and vector areas.
++ */
++static struct {
++ int start;
++ int len;
++ const char *name;
++} packet[RADEON_MAX_STATE_PACKETS] = {
++ {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
++ {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
++ {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
++ {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
++ {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
++ {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
++ {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
++ {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
++ {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
++ {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
++ {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
++ {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
++ {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
++ {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
++ {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
++ {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
++ {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
++ {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
++ {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
++ {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
++ {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
++ "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
++ {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
++ {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
++ {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
++ {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
++ {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
++ {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
++ {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
++ {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
++ {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
++ {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
++ {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
++ {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
++ {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
++ {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
++ {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
++ {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
++ {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
++ {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
++ {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
++ {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
++ {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
++ {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
++ {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
++ {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
++ {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
++ {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
++ {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
++ {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
++ {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
++ "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
++ {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
++ {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
++ {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
++ {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
++ {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
++ {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
++ {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
++ {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
++ {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
++ {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
++ {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
++ "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
++ {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
++ {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
++ {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
++ {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
++ {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
++ {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
++ {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
++ {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
++ {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
++ {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
++ {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
++ {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
++ {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
++ {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
++ {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
++ {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
++ {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
++ {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
++ {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
++ {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
++ {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
++ {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
++ {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
++ {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
++ {R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"}, /* 85 */
++ {R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
++ {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
++ {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
++ {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
++ {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
++ {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
++ {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
++ {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
++ {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
++};
++
+ /* =============================================================
+ * State initialization
+ */
+
+-void radeonPrintDirty( radeonContextPtr rmesa, const char *msg )
++void radeonPrintDirty( r100ContextPtr rmesa, const char *msg )
+ {
+ struct radeon_state_atom *l;
+
+ fprintf(stderr, msg);
+ fprintf(stderr, ": ");
+
+- foreach(l, &rmesa->hw.atomlist) {
+- if (l->dirty || rmesa->hw.all_dirty)
++ foreach(l, &rmesa->radeon.hw.atomlist) {
++ if (l->dirty || rmesa->radeon.hw.all_dirty)
+ fprintf(stderr, "%s, ", l->name);
+ }
+
+ fprintf(stderr, "\n");
+ }
+
+-static int cmdpkt( int id )
++static int cmdpkt( r100ContextPtr rmesa, int id )
+ {
+ drm_radeon_cmd_header_t h;
+- h.i = 0;
+- h.packet.cmd_type = RADEON_CMD_PACKET;
+- h.packet.packet_id = id;
++
++ if (rmesa->radeon.radeonScreen->kernel_mm) {
++ return CP_PACKET0(packet[id].start, packet[id].len - 1);
++ } else {
++ h.i = 0;
++ h.packet.cmd_type = RADEON_CMD_PACKET;
++ h.packet.packet_id = id;
++ }
+ return h.i;
+ }
+
+@@ -96,17 +213,17 @@ static int cmdscl( int offset, int stride, int count )
+ return h.i;
+ }
+
+-#define CHECK( NM, FLAG ) \
+-static GLboolean check_##NM( GLcontext *ctx ) \
+-{ \
+- return FLAG; \
++#define CHECK( NM, FLAG ) \
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
++{ \
++ return FLAG ? atom->cmd_size : 0; \
+ }
+
+ #define TCL_CHECK( NM, FLAG ) \
+-static GLboolean check_##NM( GLcontext *ctx ) \
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
+ { \
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \
+- return !rmesa->TclFallback && (FLAG); \
++ r100ContextPtr rmesa = R100_CONTEXT(ctx); \
++ return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0; \
+ }
+
+
+@@ -146,42 +263,290 @@ CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT))
+ CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT))
+ CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT))
+
++#define OUT_VEC(hdr, data) do { \
++ drm_radeon_cmd_header_t h; \
++ h.i = hdr; \
++ OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \
++ OUT_BATCH(0); \
++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \
++ OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1)); \
++ OUT_BATCH_TABLE((data), h.vectors.count); \
++ } while(0)
++
++#define OUT_SCL(hdr, data) do { \
++ drm_radeon_cmd_header_t h; \
++ h.i = hdr; \
++ OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \
++ OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
++ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \
++ OUT_BATCH_TABLE((data), h.scalars.count); \
++ } while(0)
++
++static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r100ContextPtr r100 = R100_CONTEXT(ctx);
++ BATCH_LOCALS(&r100->radeon);
++ uint32_t dwords = atom->cmd_size;
++
++ dwords += 2;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_SCL(atom->cmd[0], atom->cmd+1);
++ END_BATCH();
++}
+
+
+-/* Initialize the context's hardware state.
+- */
+-void radeonInitState( radeonContextPtr rmesa )
++static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+ {
+- GLcontext *ctx = rmesa->glCtx;
+- GLuint color_fmt, depth_fmt, i;
+- GLint drawPitch, drawOffset;
++ r100ContextPtr r100 = R100_CONTEXT(ctx);
++ BATCH_LOCALS(&r100->radeon);
++ uint32_t dwords = atom->cmd_size;
++
++ dwords += 4;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_VEC(atom->cmd[0], atom->cmd+1);
++ END_BATCH();
++}
+
+- switch ( rmesa->radeonScreen->cpp ) {
+- case 2:
+- color_fmt = RADEON_COLOR_FORMAT_RGB565;
+- break;
+- case 4:
+- color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
+- break;
+- default:
+- fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
+- exit( -1 );
++
++static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r100ContextPtr r100 = R100_CONTEXT(ctx);
++ BATCH_LOCALS(&r100->radeon);
++ uint32_t dwords = atom->cmd_size;
++
++ dwords += 6;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
++ OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
++ END_BATCH();
++}
++
++static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r100ContextPtr r100 = R100_CONTEXT(ctx);
++ BATCH_LOCALS(&r100->radeon);
++ struct radeon_renderbuffer *rrb;
++ uint32_t cbpitch;
++ uint32_t zbpitch, depth_fmt;
++ uint32_t dwords = atom->cmd_size;
++
++ /* output the first 7 bytes of context */
++ BEGIN_BATCH_NO_AUTOSTATE(dwords + 4);
++ OUT_BATCH_TABLE(atom->cmd, 5);
++
++ rrb = radeon_get_depthbuffer(&r100->radeon);
++ if (!rrb) {
++ OUT_BATCH(0);
++ OUT_BATCH(0);
++ } else {
++ zbpitch = (rrb->pitch / rrb->cpp);
++ if (r100->using_hyperz)
++ zbpitch |= RADEON_DEPTH_HYPERZ;
++
++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++ OUT_BATCH(zbpitch);
++ if (rrb->cpp == 4)
++ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
++ else
++ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
++ }
++
++ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
++ OUT_BATCH(atom->cmd[CTX_CMD_1]);
++ OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
++
++ rrb = radeon_get_colorbuffer(&r100->radeon);
++ if (!rrb || !rrb->bo) {
++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
++ OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
++ } else {
++ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
++ if (rrb->cpp == 4)
++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
++ else
++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
++
++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++ }
++
++ OUT_BATCH(atom->cmd[CTX_CMD_2]);
++
++ if (!rrb || !rrb->bo) {
++ OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
++ } else {
++ cbpitch = (rrb->pitch / rrb->cpp);
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
++ cbpitch |= RADEON_COLOR_TILE_ENABLE;
++ OUT_BATCH(cbpitch);
++ }
++
++ END_BATCH();
++}
++
++static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r100ContextPtr r100 = R100_CONTEXT(ctx);
++ BATCH_LOCALS(&r100->radeon);
++ struct radeon_renderbuffer *rrb, *drb;
++ uint32_t cbpitch = 0;
++ uint32_t zbpitch = 0;
++ uint32_t dwords = atom->cmd_size;
++ uint32_t depth_fmt;
++
++ rrb = radeon_get_colorbuffer(&r100->radeon);
++ if (!rrb || !rrb->bo) {
++ fprintf(stderr, "no rrb\n");
++ return;
++ }
++
++ atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
++ if (rrb->cpp == 4)
++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
++ else
++ atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
++
++ cbpitch = (rrb->pitch / rrb->cpp);
++ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
++ cbpitch |= R200_COLOR_TILE_ENABLE;
++
++ drb = radeon_get_depthbuffer(&r100->radeon);
++ if (drb) {
++ zbpitch = (drb->pitch / drb->cpp);
++ if (drb->cpp == 4)
++ depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
++ else
++ depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
++ atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
++
++ }
++
++ /* output the first 7 bytes of context */
++ if (drb)
++ dwords += 4;
++ if (rrb)
++ dwords += 4;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++
++ /* In the CS case we need to split this up */
++ OUT_BATCH(CP_PACKET0(packet[0].start, 3));
++ OUT_BATCH_TABLE((atom->cmd + 1), 4);
++
++ if (drb) {
++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++
++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
++ OUT_BATCH(zbpitch);
++ }
++
++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
++ OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
++ OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
++ OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
++ OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
++
++ if (rrb) {
++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
++ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ }
+
+- rmesa->state.color.clear = 0x00000000;
++ if (rrb) {
++ OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
++ OUT_BATCH(cbpitch);
++ }
++
++ // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
++ // OUT_BATCH_TABLE((atom->cmd + 14), 4);
++ // }
++
++ END_BATCH();
++}
++
++static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r100ContextPtr r100 = R100_CONTEXT(ctx);
++ BATCH_LOCALS(&r100->radeon);
++ uint32_t dwords = atom->cmd_size;
++ int i = atom->idx, j;
++ radeonTexObj *t = r100->state.texture.unit[i].texobj;
++ radeon_mipmap_level *lvl;
++
++ if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT))
++ return;
++
++ if (!t)
++ return;
++
++ if (!t->mt)
++ return;
++
++ BEGIN_BATCH_NO_AUTOSTATE(dwords + 10);
++ OUT_BATCH_TABLE(atom->cmd, 3);
++ lvl = &t->mt->levels[0];
++ for (j = 0; j < 5; j++) {
++ OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
++ RADEON_GEM_DOMAIN_VRAM, 0, 0);
++ }
++ END_BATCH();
++}
++
++static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++ r100ContextPtr r100 = R100_CONTEXT(ctx);
++ BATCH_LOCALS(&r100->radeon);
++ uint32_t dwords = atom->cmd_size;
++ int i = atom->idx;
++ radeonTexObj *t = r100->state.texture.unit[i].texobj;
++ radeon_mipmap_level *lvl;
++
++ if (t && t->mt && !t->image_override)
++ dwords += 2;
++ BEGIN_BATCH_NO_AUTOSTATE(dwords);
++ OUT_BATCH_TABLE(atom->cmd, 3);
++ if (t && t->mt && !t->image_override) {
++ if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
++ lvl = &t->mt->levels[0];
++ OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
++ RADEON_GEM_DOMAIN_VRAM, 0, 0);
++ } else {
++ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
++ RADEON_GEM_DOMAIN_VRAM, 0, 0);
++ }
++ } else if (!t) {
++ /* workaround for old CS mechanism */
++ OUT_BATCH(r100->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
++ // OUT_BATCH(r100->radeon.radeonScreen);
++ } else if (t->image_override)
++ OUT_BATCH(t->override_offset);
++
++ OUT_BATCH_TABLE((atom->cmd+4), 5);
++ END_BATCH();
++}
++
++/* Initialize the context's hardware state.
++ */
++void radeonInitState( r100ContextPtr rmesa )
++{
++ GLcontext *ctx = rmesa->radeon.glCtx;
++ GLuint i;
++
++ rmesa->radeon.state.color.clear = 0x00000000;
+
+ switch ( ctx->Visual.depthBits ) {
+ case 16:
+- rmesa->state.depth.clear = 0x0000ffff;
+- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
+- depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+- rmesa->state.stencil.clear = 0x00000000;
++ rmesa->radeon.state.depth.clear = 0x0000ffff;
++ rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffff;
++ rmesa->radeon.state.stencil.clear = 0x00000000;
+ break;
+ case 24:
+- rmesa->state.depth.clear = 0x00ffffff;
+- rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
+- depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+- rmesa->state.stencil.clear = 0xffff0000;
++ rmesa->radeon.state.depth.clear = 0x00ffffff;
++ rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffffff;
++ rmesa->radeon.state.stencil.clear = 0xffff0000;
+ break;
+ default:
+ fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
+@@ -190,37 +555,37 @@ void radeonInitState( radeonContextPtr rmesa )
+ }
+
+ /* Only have hw stencil when depth buffer is 24 bits deep */
+- rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
++ rmesa->radeon.state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
+ ctx->Visual.depthBits == 24 );
+
+- rmesa->Fallback = 0;
++ rmesa->radeon.Fallback = 0;
+
+- if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
+- drawOffset = rmesa->radeonScreen->backOffset;
+- drawPitch = rmesa->radeonScreen->backPitch;
+- } else {
+- drawOffset = rmesa->radeonScreen->frontOffset;
+- drawPitch = rmesa->radeonScreen->frontPitch;
+- }
+
+- rmesa->hw.max_state_size = 0;
++ rmesa->radeon.hw.max_state_size = 0;
+
+-#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG ) \
++#define ALLOC_STATE_IDX( ATOM, CHK, SZ, NM, FLAG, IDX ) \
+ do { \
+ rmesa->hw.ATOM.cmd_size = SZ; \
+- rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \
+- rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \
+- rmesa->hw.ATOM.name = NM; \
++ rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
++ rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
++ rmesa->hw.ATOM.name = NM; \
+ rmesa->hw.ATOM.is_tcl = FLAG; \
+ rmesa->hw.ATOM.check = check_##CHK; \
+- rmesa->hw.ATOM.dirty = GL_TRUE; \
+- rmesa->hw.max_state_size += SZ * sizeof(int); \
++ rmesa->hw.ATOM.dirty = GL_TRUE; \
++ rmesa->hw.ATOM.idx = IDX; \
++ rmesa->radeon.hw.max_state_size += SZ * sizeof(int); \
+ } while (0)
+-
+-
++
++#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG ) \
++ ALLOC_STATE_IDX(ATOM, CHK, SZ, NM, FLAG, 0)
++
+ /* Allocate state buffers:
+ */
+ ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 );
++ if (rmesa->radeon.radeonScreen->kernel_mm)
++ rmesa->hw.ctx.emit = ctx_emit_cs;
++ else
++ rmesa->hw.ctx.emit = ctx_emit;
+ ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+ ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+ ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
+@@ -233,20 +598,25 @@ void radeonInitState( radeonContextPtr rmesa )
+ ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
+ ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
+ ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+- ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 );
+- ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 );
+- ALLOC_STATE( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0 );
+- if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
++ ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
++ ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
++ ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2 );
++
++ for (i = 0; i < 3; i++)
++ rmesa->hw.tex[i].emit = tex_emit;
++ if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
+ {
+- ALLOC_STATE( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
+- ALLOC_STATE( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
+- ALLOC_STATE( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
++ ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
++ ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
++ ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
++ for (i = 0; i < 3; i++)
++ rmesa->hw.cube[i].emit = cube_emit;
+ }
+ else
+ {
+- ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
+- ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
+- ALLOC_STATE( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
++ ALLOC_STATE_IDX( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
++ ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
++ ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+ }
+ ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+ ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
+@@ -268,43 +638,43 @@ void radeonInitState( radeonContextPtr rmesa )
+ ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
+ ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+ ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
+- ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 );
+- ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 );
+- ALLOC_STATE( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0 );
++ ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 );
++ ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 );
++ ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 );
+
+ radeonSetUpAtomList( rmesa );
+
+ /* Fill in the packet headers:
+ */
+- rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
+- rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
+- rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
+- rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
+- rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
+- rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
+- rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
+- rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
+- rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS);
+- rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
+- rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0);
+- rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0);
+- rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1);
+- rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1);
+- rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_2);
+- rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_2);
+- rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_0);
+- rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
+- rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_1);
+- rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
+- rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_2);
+- rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
+- rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
+- rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
++ rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
++ rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
++ rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
++ rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
++ rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
++ rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
++ rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
++ rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
++ rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL_STATUS);
++ rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
++ rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_0);
++ rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_0);
++ rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_1);
++ rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_1);
++ rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_2);
++ rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_2);
++ rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_0);
++ rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
++ rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_1);
++ rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
++ rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_2);
++ rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
++ rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
++ rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
+ rmesa->hw.mtl.cmd[MTL_CMD_0] =
+- cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
+- rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0);
+- rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1);
+- rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_2);
++ cmdpkt(rmesa, RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
++ rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_0);
++ rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_1);
++ rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_2);
+ rmesa->hw.grd.cmd[GRD_CMD_0] =
+ cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
+ rmesa->hw.fog.cmd[FOG_CMD_0] =
+@@ -331,6 +701,22 @@ void radeonInitState( radeonContextPtr rmesa )
+ cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 );
+ }
+
++ if (rmesa->radeon.radeonScreen->kernel_mm) {
++ rmesa->hw.grd.emit = scl_emit;
++ rmesa->hw.fog.emit = vec_emit;
++ rmesa->hw.glt.emit = vec_emit;
++ rmesa->hw.eye.emit = vec_emit;
++
++ for (i = 0; i <= 6; i++)
++ rmesa->hw.mat[i].emit = vec_emit;
++
++ for (i = 0; i < 8; i++)
++ rmesa->hw.lit[i].emit = lit_emit;
++
++ for (i = 0; i < 6; i++)
++ rmesa->hw.ucp[i].emit = vec_emit;
++ }
++
+ rmesa->last_ReallyEnabled = -1;
+
+ /* Initial Harware state:
+@@ -352,19 +738,7 @@ void radeonInitState( radeonContextPtr rmesa )
+ RADEON_SRC_BLEND_GL_ONE |
+ RADEON_DST_BLEND_GL_ZERO );
+
+- rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
+- rmesa->radeonScreen->depthOffset + rmesa->radeonScreen->fbLocation;
+-
+- rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] =
+- ((rmesa->radeonScreen->depthPitch &
+- RADEON_DEPTHPITCH_MASK) |
+- RADEON_DEPTH_ENDIAN_NO_SWAP);
+-
+- if (rmesa->using_hyperz)
+- rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= RADEON_DEPTH_HYPERZ;
+-
+- rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
+- RADEON_Z_TEST_LESS |
++ rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (RADEON_Z_TEST_LESS |
+ RADEON_STENCIL_TEST_ALWAYS |
+ RADEON_STENCIL_FAIL_KEEP |
+ RADEON_STENCIL_ZPASS_KEEP |
+@@ -374,7 +748,7 @@ void radeonInitState( radeonContextPtr rmesa )
+ if (rmesa->using_hyperz) {
+ rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE |
+ RADEON_Z_DECOMPRESSION_ENABLE;
+- if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
++ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ /* works for q3, but slight rendering errors with glxgears ? */
+ /* rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
+ /* need this otherwise get lots of lockups with q3 ??? */
+@@ -386,10 +760,9 @@ void radeonInitState( radeonContextPtr rmesa )
+ RADEON_ANTI_ALIAS_NONE);
+
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE |
+- color_fmt |
+ RADEON_ZBLOCK16);
+
+- switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) {
++ switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
+ case DRI_CONF_DITHER_XERRORDIFFRESET:
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT;
+ break;
+@@ -397,30 +770,17 @@ void radeonInitState( radeonContextPtr rmesa )
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE;
+ break;
+ }
+- if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) ==
++ if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
+ DRI_CONF_ROUND_ROUND )
+- rmesa->state.color.roundEnable = RADEON_ROUND_ENABLE;
++ rmesa->radeon.state.color.roundEnable = RADEON_ROUND_ENABLE;
+ else
+- rmesa->state.color.roundEnable = 0;
+- if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) ==
++ rmesa->radeon.state.color.roundEnable = 0;
++ if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
+ DRI_CONF_COLOR_REDUCTION_DITHER )
+ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE;
+ else
+- rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
++ rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
+
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
+- rmesa->radeonScreen->fbLocation)
+- & RADEON_COLOROFFSET_MASK);
+-
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
+- RADEON_COLORPITCH_MASK) |
+- RADEON_COLOR_ENDIAN_NO_SWAP);
+-
+-
+- /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
+- if (rmesa->sarea->tiling_enabled) {
+- rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
+- }
+
+ rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW |
+ RADEON_BFACE_SOLID |
+@@ -444,7 +804,7 @@ void radeonInitState( radeonContextPtr rmesa )
+ RADEON_VC_NO_SWAP;
+ #endif
+
+- if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
++ if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+ rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS;
+ }
+
+@@ -491,8 +851,8 @@ void radeonInitState( radeonContextPtr rmesa )
+ (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
+
+ /* Initialize the texture offset to the start of the card texture heap */
+- rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
+- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ // rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
++ // rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+
+ rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
+ rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] =
+@@ -513,15 +873,15 @@ void radeonInitState( radeonContextPtr rmesa )
+
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] =
+- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] =
+- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] =
+- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] =
+- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] =
+- rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++ rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ }
+
+ /* Can only add ST1 at the time of doing some multitex but can keep
+@@ -613,5 +973,7 @@ void radeonInitState( radeonContextPtr rmesa )
+ rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
+ rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
+
+- rmesa->hw.all_dirty = GL_TRUE;
++ rmesa->radeon.hw.all_dirty = GL_TRUE;
++
++ rcommonInitCmdBuf(&rmesa->radeon);
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+index ebea1fe..af933a3 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
++++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+@@ -52,8 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_tcl.h"
+
+
+-static void flush_last_swtcl_prim( radeonContextPtr rmesa );
+-
+ /* R100: xyzw, c0, c1/fog, stq[0..2] = 4+1+1+3*3 = 15 right? */
+ /* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
+ #define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat)) /* for mesa _tnl stage */
+@@ -64,18 +62,18 @@ static void flush_last_swtcl_prim( radeonContextPtr rmesa );
+
+ #define EMIT_ATTR( ATTR, STYLE, F0 ) \
+ do { \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \
+- rmesa->swtcl.vertex_attr_count++; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \
++ rmesa->radeon.swtcl.vertex_attr_count++; \
+ fmt_0 |= F0; \
+ } while (0)
+
+ #define EMIT_PAD( N ) \
+ do { \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \
+- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \
+- rmesa->swtcl.vertex_attr_count++; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \
++ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \
++ rmesa->radeon.swtcl.vertex_attr_count++; \
+ } while (0)
+
+ static GLuint radeon_cp_vc_frmts[3][2] =
+@@ -87,7 +85,7 @@ static GLuint radeon_cp_vc_frmts[3][2] =
+
+ static void radeonSetVertexFormat( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++ r100ContextPtr rmesa = R100_CONTEXT( ctx );
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ DECLARE_RENDERINPUTS(index_bitset);
+@@ -106,7 +104,7 @@ static void radeonSetVertexFormat( GLcontext *ctx )
+ }
+
+ assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+- rmesa->swtcl.vertex_attr_count = 0;
++ rmesa->radeon.swtcl.vertex_attr_count = 0;
+
+ /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+ * build up a hardware vertex.
+@@ -204,33 +202,33 @@ static void radeonSetVertexFormat( GLcontext *ctx )
+ }
+ }
+
+- if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) ||
++ if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
+ fmt_0 != rmesa->swtcl.vertex_format) {
+ RADEON_NEWPRIM(rmesa);
+ rmesa->swtcl.vertex_format = fmt_0;
+- rmesa->swtcl.vertex_size =
++ rmesa->radeon.swtcl.vertex_size =
+ _tnl_install_attrs( ctx,
+- rmesa->swtcl.vertex_attrs,
+- rmesa->swtcl.vertex_attr_count,
++ rmesa->radeon.swtcl.vertex_attrs,
++ rmesa->radeon.swtcl.vertex_attr_count,
+ NULL, 0 );
+- rmesa->swtcl.vertex_size /= 4;
+- RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
++ rmesa->radeon.swtcl.vertex_size /= 4;
++ RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
+ if (RADEON_DEBUG & DEBUG_VERTS)
+ fprintf( stderr, "%s: vertex_size= %d floats\n",
+- __FUNCTION__, rmesa->swtcl.vertex_size);
++ __FUNCTION__, rmesa->radeon.swtcl.vertex_size);
+ }
+ }
+
+
+ static void radeonRenderStart( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++ r100ContextPtr rmesa = R100_CONTEXT( ctx );
+
+ radeonSetVertexFormat( ctx );
+
+- if (rmesa->dma.flush != 0 &&
+- rmesa->dma.flush != flush_last_swtcl_prim)
+- rmesa->dma.flush( rmesa );
++ if (rmesa->radeon.dma.flush != 0 &&
++ rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
++ rmesa->radeon.dma.flush( ctx );
+ }
+
+
+@@ -241,7 +239,7 @@ static void radeonRenderStart( GLcontext *ctx )
+ */
+ void radeonChooseVertexState( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++ r100ContextPtr rmesa = R100_CONTEXT( ctx );
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+ GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+@@ -254,7 +252,7 @@ void radeonChooseVertexState( GLcontext *ctx )
+ * rasterization fallback. As this function will be called again when we
+ * leave a rasterization fallback, we can just skip it for now.
+ */
+- if (rmesa->Fallback != 0)
++ if (rmesa->radeon.Fallback != 0)
+ return;
+
+ /* HW perspective divide is a win, but tiny vertex formats are a
+@@ -281,80 +279,29 @@ void radeonChooseVertexState( GLcontext *ctx )
+ }
+ }
+
+-
+-/* Flush vertices in the current dma region.
+- */
+-static void flush_last_swtcl_prim( radeonContextPtr rmesa )
++void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
+ {
+- if (RADEON_DEBUG & DEBUG_IOCTL)
+- fprintf(stderr, "%s\n", __FUNCTION__);
+-
+- rmesa->dma.flush = NULL;
+-
+- if (rmesa->dma.current.buf) {
+- struct radeon_dma_region *current = &rmesa->dma.current;
+- GLuint current_offset = (rmesa->radeonScreen->gart_buffer_offset +
+- current->buf->buf->idx * RADEON_BUFFER_SIZE +
+- current->start);
+-
+- assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+- assert (current->start +
+- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+- current->ptr);
++ rcommonEnsureCmdBufSpace(&rmesa->radeon,
++ rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
++ __FUNCTION__);
+
+- if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+- radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
+- rmesa->hw.max_state_size + VBUF_BUFSZ );
+
+- radeonEmitVertexAOS( rmesa,
+- rmesa->swtcl.vertex_size,
+- current_offset);
++ radeonEmitState(&rmesa->radeon);
++ radeonEmitVertexAOS( rmesa,
++ rmesa->radeon.swtcl.vertex_size,
++ rmesa->radeon.dma.current,
++ current_offset);
+
+- radeonEmitVbufPrim( rmesa,
+- rmesa->swtcl.vertex_format,
+- rmesa->swtcl.hw_primitive,
+- rmesa->swtcl.numverts);
+- }
++
++ radeonEmitVbufPrim( rmesa,
++ rmesa->swtcl.vertex_format,
++ rmesa->radeon.swtcl.hw_primitive,
++ rmesa->radeon.swtcl.numverts);
+
+- rmesa->swtcl.numverts = 0;
+- current->start = current->ptr;
+- }
+ }
+
+-
+-/* Alloc space in the current dma region.
+- */
+-static INLINE void *
+-radeonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
+-{
+- GLuint bytes = vsize * nverts;
+-
+- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
+- radeonRefillCurrentDmaRegion( rmesa );
+-
+- if (!rmesa->dma.flush) {
+- rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+- rmesa->dma.flush = flush_last_swtcl_prim;
+- }
+-
+- assert( vsize == rmesa->swtcl.vertex_size * 4 );
+- assert( rmesa->dma.flush == flush_last_swtcl_prim );
+- assert (rmesa->dma.current.start +
+- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+- rmesa->dma.current.ptr);
+-
+-
+- {
+- GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
+- rmesa->dma.current.ptr += bytes;
+- rmesa->swtcl.numverts += nverts;
+- return head;
+- }
+-
+-}
+-
+-
+ /*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers. Use strip/fan hardware primitives where possible.
+@@ -387,22 +334,22 @@ static const GLuint hw_prim[GL_POLYGON+1] = {
+ };
+
+ static INLINE void
+-radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
++radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim )
+ {
+ RADEON_NEWPRIM( rmesa );
+- rmesa->swtcl.hw_primitive = hw_prim[prim];
+- assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
++ rmesa->radeon.swtcl.hw_primitive = hw_prim[prim];
++ // assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start);
+ }
+
+-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
++#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
+ #define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
+ #define FLUSH() RADEON_NEWPRIM( rmesa )
+-#define GET_CURRENT_VB_MAX_VERTS() \
+- (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
++#define GET_CURRENT_VB_MAX_VERTS() 10\
++// (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4))
+ #define GET_SUBSEQUENT_VB_MAX_VERTS() \
+- ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
++ ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4))
+ #define ALLOC_VERTS( nr ) \
+- radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 )
++ rcommonAllocDmaLowVerts( &rmesa->radeon, nr, rmesa->radeon.swtcl.vertex_size * 4 )
+ #define EMIT_VERTS( ctx, j, nr, buf ) \
+ _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
+
+@@ -418,16 +365,13 @@ radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
+ static GLboolean radeon_run_render( GLcontext *ctx,
+ struct tnl_pipeline_stage *stage )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ tnl_render_func *tab = TAG(render_tab_verts);
+ GLuint i;
+
+- if (rmesa->swtcl.indexed_verts.buf)
+- RELEASE_ELT_VERTS();
+-
+- if (rmesa->swtcl.RenderIndex != 0 ||
++ if (rmesa->radeon.swtcl.RenderIndex != 0 ||
+ !radeon_dma_validate_render( ctx, VB ))
+ return GL_TRUE;
+
+@@ -496,13 +440,13 @@ static void radeonResetLineStipple( GLcontext *ctx );
+
+ #undef LOCAL_VARS
+ #undef ALLOC_VERTS
+-#define CTX_ARG radeonContextPtr rmesa
+-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
+-#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 )
++#define CTX_ARG r100ContextPtr rmesa
++#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
++#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, (size) * 4 )
+ #undef LOCAL_VARS
+ #define LOCAL_VARS \
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \
+- const char *radeonverts = (char *)rmesa->swtcl.verts;
++ r100ContextPtr rmesa = R100_CONTEXT(ctx); \
++ const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;
+ #define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int)))
+ #define VERTEX radeonVertex
+ #undef TAG
+@@ -560,7 +504,7 @@ static struct {
+ #define VERT_Y(_v) _v->v.y
+ #define VERT_Z(_v) _v->v.z
+ #define AREA_IS_CCW( a ) (a < 0)
+-#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int)))
++#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int)))
+
+ #define VERT_SET_RGBA( v, c ) \
+ do { \
+@@ -606,7 +550,7 @@ do { \
+ #undef INIT
+
+ #define LOCAL_VARS(n) \
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \
++ r100ContextPtr rmesa = R100_CONTEXT(ctx); \
+ GLuint color[n], spec[n]; \
+ GLuint coloroffset = rmesa->swtcl.coloroffset; \
+ GLuint specoffset = rmesa->swtcl.specoffset; \
+@@ -617,7 +561,7 @@ do { \
+ ***********************************************************************/
+
+ #define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
+-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
++#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+ #undef TAG
+ #define TAG(x) x
+ #include "tnl_dd/t_dd_unfilled.h"
+@@ -673,9 +617,9 @@ static void init_rast_tab( void )
+ } while (0)
+ #undef LOCAL_VARS
+ #define LOCAL_VARS \
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \
+- const GLuint vertsize = rmesa->swtcl.vertex_size; \
+- const char *radeonverts = (char *)rmesa->swtcl.verts; \
++ r100ContextPtr rmesa = R100_CONTEXT(ctx); \
++ const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \
++ const char *radeonverts = (char *)rmesa->radeon.swtcl.verts; \
+ const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \
+ const GLboolean stipple = ctx->Line.StippleFlag; \
+ (void) elt; (void) stipple;
+@@ -700,17 +644,17 @@ static void init_rast_tab( void )
+ void radeonChooseRenderState( GLcontext *ctx )
+ {
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint index = 0;
+ GLuint flags = ctx->_TriangleCaps;
+
+- if (!rmesa->TclFallback || rmesa->Fallback)
++ if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback)
+ return;
+
+ if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
+ if (flags & DD_TRI_UNFILLED) index |= RADEON_UNFILLED_BIT;
+
+- if (index != rmesa->swtcl.RenderIndex) {
++ if (index != rmesa->radeon.swtcl.RenderIndex) {
+ tnl->Driver.Render.Points = rast_tab[index].points;
+ tnl->Driver.Render.Line = rast_tab[index].line;
+ tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+@@ -727,7 +671,7 @@ void radeonChooseRenderState( GLcontext *ctx )
+ tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+ }
+
+- rmesa->swtcl.RenderIndex = index;
++ rmesa->radeon.swtcl.RenderIndex = index;
+ }
+ }
+
+@@ -739,18 +683,18 @@ void radeonChooseRenderState( GLcontext *ctx )
+
+ static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+- if (rmesa->swtcl.hw_primitive != hwprim) {
++ if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+ RADEON_NEWPRIM( rmesa );
+- rmesa->swtcl.hw_primitive = hwprim;
++ rmesa->radeon.swtcl.hw_primitive = hwprim;
+ }
+ }
+
+ static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- rmesa->swtcl.render_primitive = prim;
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ rmesa->radeon.swtcl.render_primitive = prim;
+ if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED))
+ radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
+ }
+@@ -761,7 +705,7 @@ static void radeonRenderFinish( GLcontext *ctx )
+
+ static void radeonResetLineStipple( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ RADEON_STATECHANGE( rmesa, lin );
+ }
+
+@@ -795,17 +739,17 @@ static const char *getFallbackString(GLuint bit)
+
+ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+- GLuint oldfallback = rmesa->Fallback;
++ GLuint oldfallback = rmesa->radeon.Fallback;
+
+ if (mode) {
+- rmesa->Fallback |= bit;
++ rmesa->radeon.Fallback |= bit;
+ if (oldfallback == 0) {
+- RADEON_FIREVERTICES( rmesa );
++ radeon_firevertices(&rmesa->radeon);
+ TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
+ _swsetup_Wakeup( ctx );
+- rmesa->swtcl.RenderIndex = ~0;
++ rmesa->radeon.swtcl.RenderIndex = ~0;
+ if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+ fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
+ bit, getFallbackString(bit));
+@@ -813,7 +757,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ }
+ }
+ else {
+- rmesa->Fallback &= ~bit;
++ rmesa->radeon.Fallback &= ~bit;
+ if (oldfallback == bit) {
+ _swrast_flush( ctx );
+ tnl->Driver.Render.Start = radeonRenderStart;
+@@ -826,14 +770,14 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+
+ tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+ TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
+- if (rmesa->TclFallback) {
+- /* These are already done if rmesa->TclFallback goes to
++ if (rmesa->radeon.TclFallback) {
++ /* These are already done if rmesa->radeon.TclFallback goes to
+ * zero above. But not if it doesn't (RADEON_NO_TCL for
+ * example?)
+ */
+ _tnl_invalidate_vertex_state( ctx, ~0 );
+ _tnl_invalidate_vertices( ctx, ~0 );
+- RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
++ RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
+ radeonChooseVertexState( ctx );
+ radeonChooseRenderState( ctx );
+ }
+@@ -853,7 +797,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ void radeonInitSwtcl( GLcontext *ctx )
+ {
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ static int firsttime = 1;
+
+ if (firsttime) {
+@@ -872,18 +816,15 @@ void radeonInitSwtcl( GLcontext *ctx )
+ _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+ RADEON_MAX_TNL_VERTEX_SIZE);
+
+- rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+- rmesa->swtcl.RenderIndex = ~0;
+- rmesa->swtcl.render_primitive = GL_TRIANGLES;
+- rmesa->swtcl.hw_primitive = 0;
++ rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
++ rmesa->radeon.swtcl.RenderIndex = ~0;
++ rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
++ rmesa->radeon.swtcl.hw_primitive = 0;
+ }
+
+
+ void radeonDestroySwtcl( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+- if (rmesa->swtcl.indexed_verts.buf)
+- radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
+- __FUNCTION__ );
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.h b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
+index e485052..3ada989 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.h
++++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
+@@ -63,5 +63,5 @@ extern void radeon_translate_vertex( GLcontext *ctx,
+
+ extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
+
+-
++extern void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
+ #endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
+index 779e9ae..5887ab3 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
++++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
+@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "tnl/tnl.h"
+ #include "tnl/t_pipeline.h"
+
++#include "radeon_common.h"
+ #include "radeon_context.h"
+ #include "radeon_state.h"
+ #include "radeon_ioctl.h"
+@@ -104,7 +105,7 @@ static GLboolean discrete_prim[0x10] = {
+ };
+
+
+-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
++#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
+ #define ELT_TYPE GLushort
+
+ #define ELT_INIT(prim, hw_prim) \
+@@ -125,7 +126,7 @@ static GLboolean discrete_prim[0x10] = {
+
+ #define RESET_STIPPLE() do { \
+ RADEON_STATECHANGE( rmesa, lin ); \
+- radeonEmitState( rmesa ); \
++ radeonEmitState(&rmesa->radeon); \
+ } while (0)
+
+ #define AUTO_STIPPLE( mode ) do { \
+@@ -136,31 +137,29 @@ static GLboolean discrete_prim[0x10] = {
+ else \
+ rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
+ ~RADEON_LINE_PATTERN_AUTO_RESET; \
+- radeonEmitState( rmesa ); \
++ radeonEmitState(&rmesa->radeon); \
+ } while (0)
+
+
+
+ #define ALLOC_ELTS(nr) radeonAllocElts( rmesa, nr )
+
+-static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr )
++static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
+ {
+- if (rmesa->dma.flush)
+- rmesa->dma.flush( rmesa );
++ if (rmesa->radeon.dma.flush)
++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+- radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
+- rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
++ rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->radeon.hw.max_state_size + ELTS_BUFSZ(nr) +
++ AOS_BUFSZ(rmesa->tcl.nr_aos_components), __FUNCTION__);
+
+- radeonEmitAOS( rmesa,
+- rmesa->tcl.aos_components,
+- rmesa->tcl.nr_aos_components, 0 );
++ radeonEmitAOS( rmesa,
++ rmesa->tcl.nr_aos_components, 0 );
+
+- return radeonAllocEltsOpenEnded( rmesa,
+- rmesa->tcl.vertex_format,
+- rmesa->tcl.hw_primitive, nr );
++ return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
++ rmesa->tcl.hw_primitive, nr );
+ }
+
+-#define CLOSE_ELTS() RADEON_NEWPRIM( rmesa )
++#define CLOSE_ELTS() if (0) RADEON_NEWPRIM( rmesa )
+
+
+
+@@ -174,14 +173,14 @@ static void radeonEmitPrim( GLcontext *ctx,
+ GLuint start,
+ GLuint count)
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++ r100ContextPtr rmesa = R100_CONTEXT( ctx );
+ radeonTclPrimitive( ctx, prim, hwprim );
+
+- radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
+- rmesa->hw.max_state_size + VBUF_BUFSZ );
++ rcommonEnsureCmdBufSpace( &rmesa->radeon,
++ AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
++ rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ );
+
+ radeonEmitAOS( rmesa,
+- rmesa->tcl.aos_components,
+ rmesa->tcl.nr_aos_components,
+ start );
+
+@@ -254,7 +253,7 @@ void radeonTclPrimitive( GLcontext *ctx,
+ GLenum prim,
+ int hw_prim )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint se_cntl;
+ GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
+
+@@ -371,7 +370,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
+ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
+ struct tnl_pipeline_stage *stage )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
+@@ -379,7 +378,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
+
+ /* TODO: separate this from the swtnl pipeline
+ */
+- if (rmesa->TclFallback)
++ if (rmesa->radeon.TclFallback)
+ return GL_TRUE; /* fallback to software t&l */
+
+ if (VB->Count == 0)
+@@ -461,7 +460,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage =
+
+ static void transition_to_swtnl( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ GLuint se_cntl;
+
+@@ -490,7 +489,7 @@ static void transition_to_swtnl( GLcontext *ctx )
+
+ static void transition_to_hwtnl( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+
+@@ -509,15 +508,15 @@ static void transition_to_hwtnl( GLcontext *ctx )
+
+ tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+
+- if ( rmesa->dma.flush )
+- rmesa->dma.flush( rmesa );
++ if ( rmesa->radeon.dma.flush )
++ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+- rmesa->dma.flush = NULL;
++ rmesa->radeon.dma.flush = NULL;
+ rmesa->swtcl.vertex_format = 0;
+
+- if (rmesa->swtcl.indexed_verts.buf)
+- radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
+- __FUNCTION__ );
++ // if (rmesa->swtcl.indexed_verts.buf)
++ // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
++ // __FUNCTION__ );
+
+ if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "Radeon end tcl fallback\n");
+@@ -550,11 +549,11 @@ static char *getFallbackString(GLuint bit)
+
+ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- GLuint oldfallback = rmesa->TclFallback;
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ GLuint oldfallback = rmesa->radeon.TclFallback;
+
+ if (mode) {
+- rmesa->TclFallback |= bit;
++ rmesa->radeon.TclFallback |= bit;
+ if (oldfallback == 0) {
+ if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "Radeon begin tcl fallback %s\n",
+@@ -563,7 +562,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ }
+ }
+ else {
+- rmesa->TclFallback &= ~bit;
++ rmesa->radeon.TclFallback &= ~bit;
+ if (oldfallback == bit) {
+ if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "Radeon end tcl fallback %s\n",
+diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
+index b0aec21..2dfb504 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
++++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
+@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/texobj.h"
+
+ #include "radeon_context.h"
++#include "radeon_mipmap_tree.h"
+ #include "radeon_state.h"
+ #include "radeon_ioctl.h"
+ #include "radeon_swtcl.h"
+@@ -170,10 +171,13 @@ static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
+ {
+ GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK);
+
++ /* Force revalidation to account for switches from/to mipmapping. */
++ t->validated = GL_FALSE;
++
+ t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK);
+
+ /* r100 chips can't handle mipmaps/aniso for cubemap/volume textures */
+- if ( t->base.tObj->Target == GL_TEXTURE_CUBE_MAP ) {
++ if ( t->base.Target == GL_TEXTURE_CUBE_MAP ) {
+ switch ( minf ) {
+ case GL_NEAREST:
+ case GL_NEAREST_MIPMAP_NEAREST:
+@@ -244,433 +248,13 @@ static void radeonSetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] )
+ t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+ }
+
+-
+-/**
+- * Allocate space for and load the mesa images into the texture memory block.
+- * This will happen before drawing with a new texture, or drawing with a
+- * texture after it was swapped out or teximaged again.
+- */
+-
+-static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj )
+-{
+- radeonTexObjPtr t;
+-
+- t = CALLOC_STRUCT( radeon_tex_obj );
+- texObj->DriverData = t;
+- if ( t != NULL ) {
+- if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, (void *)t );
+- }
+-
+- /* Initialize non-image-dependent parts of the state:
+- */
+- t->base.tObj = texObj;
+- t->border_fallback = GL_FALSE;
+-
+- t->pp_txfilter = RADEON_BORDER_MODE_OGL;
+- t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+- RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
+-
+- make_empty_list( & t->base );
+-
+- radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
+- radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
+- radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+- radeonSetTexBorderColor( t, texObj->_BorderChan );
+- }
+-
+- return t;
+-}
+-
+-
+-static const struct gl_texture_format *
+-radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+- GLenum format, GLenum type )
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- const GLboolean do32bpt =
+- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
+- const GLboolean force16bpt =
+- ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
+- (void) format;
+-
+- switch ( internalFormat ) {
+- case 4:
+- case GL_RGBA:
+- case GL_COMPRESSED_RGBA:
+- switch ( type ) {
+- case GL_UNSIGNED_INT_10_10_10_2:
+- case GL_UNSIGNED_INT_2_10_10_10_REV:
+- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555;
+- case GL_UNSIGNED_SHORT_4_4_4_4:
+- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+- return _dri_texformat_argb4444;
+- case GL_UNSIGNED_SHORT_5_5_5_1:
+- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+- return _dri_texformat_argb1555;
+- default:
+- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb4444;
+- }
+-
+- case 3:
+- case GL_RGB:
+- case GL_COMPRESSED_RGB:
+- switch ( type ) {
+- case GL_UNSIGNED_SHORT_4_4_4_4:
+- case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+- return _dri_texformat_argb4444;
+- case GL_UNSIGNED_SHORT_5_5_5_1:
+- case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+- return _dri_texformat_argb1555;
+- case GL_UNSIGNED_SHORT_5_6_5:
+- case GL_UNSIGNED_SHORT_5_6_5_REV:
+- return _dri_texformat_rgb565;
+- default:
+- return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
+- }
+-
+- case GL_RGBA8:
+- case GL_RGB10_A2:
+- case GL_RGBA12:
+- case GL_RGBA16:
+- return !force16bpt ?
+- _dri_texformat_argb8888 : _dri_texformat_argb4444;
+-
+- case GL_RGBA4:
+- case GL_RGBA2:
+- return _dri_texformat_argb4444;
+-
+- case GL_RGB5_A1:
+- return _dri_texformat_argb1555;
+-
+- case GL_RGB8:
+- case GL_RGB10:
+- case GL_RGB12:
+- case GL_RGB16:
+- return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
+-
+- case GL_RGB5:
+- case GL_RGB4:
+- case GL_R3_G3_B2:
+- return _dri_texformat_rgb565;
+-
+- case GL_ALPHA:
+- case GL_ALPHA4:
+- case GL_ALPHA8:
+- case GL_ALPHA12:
+- case GL_ALPHA16:
+- case GL_COMPRESSED_ALPHA:
+- return _dri_texformat_a8;
+-
+- case 1:
+- case GL_LUMINANCE:
+- case GL_LUMINANCE4:
+- case GL_LUMINANCE8:
+- case GL_LUMINANCE12:
+- case GL_LUMINANCE16:
+- case GL_COMPRESSED_LUMINANCE:
+- return _dri_texformat_l8;
+-
+- case 2:
+- case GL_LUMINANCE_ALPHA:
+- case GL_LUMINANCE4_ALPHA4:
+- case GL_LUMINANCE6_ALPHA2:
+- case GL_LUMINANCE8_ALPHA8:
+- case GL_LUMINANCE12_ALPHA4:
+- case GL_LUMINANCE12_ALPHA12:
+- case GL_LUMINANCE16_ALPHA16:
+- case GL_COMPRESSED_LUMINANCE_ALPHA:
+- return _dri_texformat_al88;
+-
+- case GL_INTENSITY:
+- case GL_INTENSITY4:
+- case GL_INTENSITY8:
+- case GL_INTENSITY12:
+- case GL_INTENSITY16:
+- case GL_COMPRESSED_INTENSITY:
+- return _dri_texformat_i8;
+-
+- case GL_YCBCR_MESA:
+- if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+- type == GL_UNSIGNED_BYTE)
+- return &_mesa_texformat_ycbcr;
+- else
+- return &_mesa_texformat_ycbcr_rev;
+-
+- case GL_RGB_S3TC:
+- case GL_RGB4_S3TC:
+- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+- return &_mesa_texformat_rgb_dxt1;
+-
+- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+- return &_mesa_texformat_rgba_dxt1;
+-
+- case GL_RGBA_S3TC:
+- case GL_RGBA4_S3TC:
+- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+- return &_mesa_texformat_rgba_dxt3;
+-
+- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+- return &_mesa_texformat_rgba_dxt5;
+-
+- default:
+- _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__);
+- return NULL;
+- }
+-
+- return NULL; /* never get here */
+-}
+-
+-
+-static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level,
+- GLint internalFormat,
+- GLint width, GLint border,
+- GLenum format, GLenum type, const GLvoid *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+- if ( t ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) radeonAllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+- return;
+- }
+- }
+-
+- /* Note, this will call ChooseTextureFormat */
+- _mesa_store_teximage1d(ctx, target, level, internalFormat,
+- width, border, format, type, pixels,
+- &ctx->Unpack, texObj, texImage);
+-
+- t->dirty_images[0] |= (1 << level);
+-}
+-
+-
+-static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
+- GLint xoffset,
+- GLsizei width,
+- GLenum format, GLenum type,
+- const GLvoid *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+- assert( t ); /* this _should_ be true */
+- if ( t ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) radeonAllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+- return;
+- }
+- }
+-
+- _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+- format, type, pixels, packing, texObj,
+- texImage);
+-
+- t->dirty_images[0] |= (1 << level);
+-}
+-
+-
+-static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+- GLint internalFormat,
+- GLint width, GLint height, GLint border,
+- GLenum format, GLenum type, const GLvoid *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- if ( t != NULL ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) radeonAllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+- return;
+- }
+- }
+-
+- /* Note, this will call ChooseTextureFormat */
+- _mesa_store_teximage2d(ctx, target, level, internalFormat,
+- width, height, border, format, type, pixels,
+- &ctx->Unpack, texObj, texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+-}
+-
+-
+-static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+- GLint xoffset, GLint yoffset,
+- GLsizei width, GLsizei height,
+- GLenum format, GLenum type,
+- const GLvoid *pixels,
+- const struct gl_pixelstore_attrib *packing,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- assert( t ); /* this _should_ be true */
+- if ( t ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) radeonAllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+- return;
+- }
+- }
+-
+- _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+- height, format, type, pixels, packing, texObj,
+- texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+-}
+-
+-static void radeonCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+- GLint internalFormat,
+- GLint width, GLint height, GLint border,
+- GLsizei imageSize, const GLvoid *data,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- if ( t != NULL ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) radeonAllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
+- return;
+- }
+- }
+-
+- /* Note, this will call ChooseTextureFormat */
+- _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
+- height, border, imageSize, data, texObj, texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+-}
+-
+-
+-static void radeonCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+- GLint xoffset, GLint yoffset,
+- GLsizei width, GLsizei height,
+- GLenum format,
+- GLsizei imageSize, const GLvoid *data,
+- struct gl_texture_object *texObj,
+- struct gl_texture_image *texImage )
+-{
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
+- GLuint face;
+-
+-
+- /* which cube face or ordinary 2D image */
+- switch (target) {
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+- face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+- ASSERT(face < 6);
+- break;
+- default:
+- face = 0;
+- }
+-
+- assert( t ); /* this _should_ be true */
+- if ( t ) {
+- driSwapOutTextureObject( t );
+- }
+- else {
+- t = (driTextureObject *) radeonAllocTexObj( texObj );
+- if (!t) {
+- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D");
+- return;
+- }
+- }
+-
+- _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+- height, format, imageSize, data, texObj, texImage);
+-
+- t->dirty_images[face] |= (1 << level);
+-}
+-
+ #define SCALED_FLOAT_TO_BYTE( x, scale ) \
+ (((GLuint)((255.0F / scale) * (x))) / 2)
+
+ static void radeonTexEnv( GLcontext *ctx, GLenum target,
+ GLenum pname, const GLfloat *param )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint unit = ctx->Texture.CurrentUnit;
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+@@ -701,7 +285,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
+ * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping
+ * [0.0,4.0] to [0,127].
+ */
+- min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ?
++ min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
+ 0.0 : -1.0;
+ bias = CLAMP( *param, min, 4.0 );
+ if ( bias == 0 ) {
+@@ -734,7 +318,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj,
+ GLenum pname, const GLfloat *params )
+ {
+- radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
++ radeonTexObj* t = radeon_tex_obj(texObj);
+
+ if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+ fprintf( stderr, "%s( %s )\n", __FUNCTION__,
+@@ -762,57 +346,51 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
+ case GL_TEXTURE_MAX_LEVEL:
+ case GL_TEXTURE_MIN_LOD:
+ case GL_TEXTURE_MAX_LOD:
++
+ /* This isn't the most efficient solution but there doesn't appear to
+ * be a nice alternative. Since there's no LOD clamping,
+ * we just have to rely on loading the right subset of mipmap levels
+ * to simulate a clamped LOD.
+ */
+- driSwapOutTextureObject( (driTextureObject *) t );
++ if (t->mt) {
++ radeon_miptree_unreference(t->mt);
++ t->mt = 0;
++ t->validated = GL_FALSE;
++ }
+ break;
+
+ default:
+ return;
+ }
+-
+- /* Mark this texobj as dirty (one bit per tex unit)
+- */
+- t->dirty_state = TEX_ALL;
+-}
+-
+-
+-static void radeonBindTexture( GLcontext *ctx, GLenum target,
+- struct gl_texture_object *texObj )
+-{
+- if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+- fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj,
+- ctx->Texture.CurrentUnit );
+- }
+-
+- assert( (target != GL_TEXTURE_1D && target != GL_TEXTURE_2D &&
+- target != GL_TEXTURE_RECTANGLE_NV && target != GL_TEXTURE_CUBE_MAP) ||
+- (texObj->DriverData != NULL) );
+ }
+
+-
+ static void radeonDeleteTexture( GLcontext *ctx,
+ struct gl_texture_object *texObj )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- driTextureObject * t = (driTextureObject *) texObj->DriverData;
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ radeonTexObj* t = radeon_tex_obj(texObj);
++ int i;
+
+ if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+ fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
+ _mesa_lookup_enum_by_nr( texObj->Target ) );
+ }
+
+- if ( t != NULL ) {
+- if ( rmesa ) {
+- RADEON_FIREVERTICES( rmesa );
+- }
+-
+- driDestroyTextureObject( t );
++ if ( rmesa ) {
++ radeon_firevertices(&rmesa->radeon);
++ for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
++ if ( t == rmesa->state.texture.unit[i].texobj ) {
++ rmesa->state.texture.unit[i].texobj = NULL;
++ rmesa->hw.tex[i].dirty = GL_FALSE;
++ rmesa->hw.cube[i].dirty = GL_FALSE;
++ }
++ }
+ }
+
++ if (t->mt) {
++ radeon_miptree_unreference(t->mt);
++ t->mt = 0;
++ }
+ /* Free mipmap images and the texture object itself */
+ _mesa_delete_texture_object(ctx, texObj);
+ }
+@@ -832,7 +410,7 @@ static void radeonTexGen( GLcontext *ctx,
+ GLenum pname,
+ const GLfloat *params )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLuint unit = ctx->Texture.CurrentUnit;
+ rmesa->recheck_texgen[unit] = GL_TRUE;
+ }
+@@ -846,17 +424,27 @@ static void radeonTexGen( GLcontext *ctx,
+ static struct gl_texture_object *
+ radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- struct gl_texture_object *obj;
+- obj = _mesa_new_texture_object(ctx, name, target);
+- if (!obj)
+- return NULL;
+- obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
+- radeonAllocTexObj( obj );
+- return obj;
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
++
++ _mesa_initialize_texture_object(&t->base, name, target);
++ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
++
++ t->border_fallback = GL_FALSE;
++
++ t->pp_txfilter = RADEON_BORDER_MODE_OGL;
++ t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
++ RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
++
++ radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT );
++ radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
++ radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter );
++ radeonSetTexBorderColor( t, t->base._BorderChan );
++ return &t->base;
+ }
+
+
++
+ void radeonInitTextureFuncs( struct dd_function_table *functions )
+ {
+ functions->ChooseTextureFormat = radeonChooseTextureFormat;
+@@ -864,11 +452,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions )
+ functions->TexImage2D = radeonTexImage2D;
+ functions->TexSubImage1D = radeonTexSubImage1D;
+ functions->TexSubImage2D = radeonTexSubImage2D;
++ functions->GetTexImage = radeonGetTexImage;
++ functions->GetCompressedTexImage = radeonGetCompressedTexImage;
+
+ functions->NewTextureObject = radeonNewTextureObject;
+- functions->BindTexture = radeonBindTexture;
++ // functions->BindTexture = radeonBindTexture;
+ functions->DeleteTexture = radeonDeleteTexture;
+- functions->IsTextureResident = driIsTextureResident;
+
+ functions->TexEnv = radeonTexEnv;
+ functions->TexParameter = radeonTexParameter;
+@@ -877,5 +466,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions )
+ functions->CompressedTexImage2D = radeonCompressedTexImage2D;
+ functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+
++ functions->GenerateMipmap = radeonGenerateMipmap;
++
++ functions->NewTextureImage = radeonNewTextureImage;
++ functions->FreeTexImageData = radeonFreeTexImageData;
++ functions->MapTexture = radeonMapTexture;
++ functions->UnmapTexture = radeonUnmapTexture;
++
+ driInitTextureFormats();
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h
+index 8000880..8c2f9be 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_tex.h
++++ b/src/mesa/drivers/dri/radeon/radeon_tex.h
+@@ -43,10 +43,10 @@ extern void radeonSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+
+ extern void radeonUpdateTextureState( GLcontext *ctx );
+
+-extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t,
++extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t,
+ GLuint face );
+
+-extern void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t );
++extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t );
+
+ extern void radeonInitTextureFuncs( struct dd_function_table *functions );
+
+diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c
+deleted file mode 100644
+index 5f7bbe6..0000000
+--- a/src/mesa/drivers/dri/radeon/radeon_texmem.c
++++ /dev/null
+@@ -1,404 +0,0 @@
+-/**************************************************************************
+-
+-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+- VA Linux Systems Inc., Fremont, California.
+-
+-All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation on the rights to use, copy, modify, merge, publish,
+-distribute, sub license, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
+-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+-SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- * Kevin E. Martin <martin@valinux.com>
+- * Gareth Hughes <gareth@valinux.com>
+- *
+- */
+-#include <errno.h>
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/context.h"
+-#include "main/macros.h"
+-
+-#include "radeon_context.h"
+-#include "radeon_ioctl.h"
+-#include "radeon_tex.h"
+-
+-#include <unistd.h> /* for usleep() */
+-
+-
+-/**
+- * Destroy any device-dependent state associated with the texture. This may
+- * include NULLing out hardware state that points to the texture.
+- */
+-void
+-radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
+-{
+- if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+- fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)t, (void *)t->base.tObj );
+- }
+-
+- if ( rmesa != NULL ) {
+- unsigned i;
+-
+-
+- for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
+- if ( t == rmesa->state.texture.unit[i].texobj ) {
+- rmesa->state.texture.unit[i].texobj = NULL;
+- }
+- }
+- }
+-}
+-
+-
+-/* ------------------------------------------------------------
+- * Texture image conversions
+- */
+-
+-
+-static void radeonUploadRectSubImage( radeonContextPtr rmesa,
+- radeonTexObjPtr t,
+- struct gl_texture_image *texImage,
+- GLint x, GLint y,
+- GLint width, GLint height )
+-{
+- const struct gl_texture_format *texFormat = texImage->TexFormat;
+- int blit_format, dstPitch, done;
+-
+- switch ( texFormat->TexelBytes ) {
+- case 1:
+- blit_format = RADEON_GMC_DST_8BPP_CI;
+- break;
+- case 2:
+- blit_format = RADEON_GMC_DST_16BPP;
+- break;
+- case 4:
+- blit_format = RADEON_GMC_DST_32BPP;
+- break;
+- default:
+- fprintf( stderr, "radeonUploadRectSubImage: unknown blit_format (texelbytes=%d)\n",
+- texFormat->TexelBytes);
+- return;
+- }
+-
+- t->image[0][0].data = texImage->Data;
+-
+- /* Currently don't need to cope with small pitches.
+- */
+- width = texImage->Width;
+- height = texImage->Height;
+- dstPitch = t->pp_txpitch + 32;
+-
+- { /* FIXME: prefer GART-texturing if possible */
+- /* Data not in GART memory, or bad pitch.
+- */
+- for (done = 0; done < height ; ) {
+- struct radeon_dma_region region;
+- int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
+- int src_pitch;
+- char *tex;
+-
+- src_pitch = texImage->RowStride * texFormat->TexelBytes;
+-
+- tex = (char *)texImage->Data + done * src_pitch;
+-
+- memset(&region, 0, sizeof(region));
+- radeonAllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
+-
+- /* Copy texdata to dma:
+- */
+- if (0)
+- fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
+- __FUNCTION__, src_pitch, dstPitch);
+-
+- if (src_pitch == dstPitch) {
+- memcpy( region.address + region.start, tex, lines * src_pitch );
+- }
+- else {
+- char *buf = region.address + region.start;
+- int i;
+- for (i = 0 ; i < lines ; i++) {
+- memcpy( buf, tex, src_pitch );
+- buf += dstPitch;
+- tex += src_pitch;
+- }
+- }
+-
+- radeonEmitWait( rmesa, RADEON_WAIT_3D );
+-
+-
+-
+- /* Blit to framebuffer
+- */
+- radeonEmitBlit( rmesa,
+- blit_format,
+- dstPitch, GET_START( &region ),
+- dstPitch, t->bufAddr,
+- 0, 0,
+- 0, done,
+- width, lines );
+-
+- radeonEmitWait( rmesa, RADEON_WAIT_2D );
+-
+- radeonReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
+- done += lines;
+- }
+- }
+-}
+-
+-
+-/**
+- * Upload the texture image associated with texture \a t at the specified
+- * level at the address relative to \a start.
+- */
+-static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t,
+- GLint hwlevel,
+- GLint x, GLint y, GLint width, GLint height,
+- GLuint face )
+-{
+- struct gl_texture_image *texImage = NULL;
+- GLuint offset;
+- GLint imageWidth, imageHeight;
+- GLint ret;
+- drm_radeon_texture_t tex;
+- drm_radeon_tex_image_t tmp;
+- const int level = hwlevel + t->base.firstLevel;
+-
+- if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+- fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
+- __FUNCTION__, (void *)t, (void *)t->base.tObj, level, width, height, face );
+- }
+-
+- ASSERT(face < 6);
+-
+- /* Ensure we have a valid texture to upload */
+- if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
+- _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+- return;
+- }
+-
+- texImage = t->base.tObj->Image[face][level];
+-
+- if ( !texImage ) {
+- if ( RADEON_DEBUG & DEBUG_TEXTURE )
+- fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
+- return;
+- }
+- if ( !texImage->Data ) {
+- if ( RADEON_DEBUG & DEBUG_TEXTURE )
+- fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
+- return;
+- }
+-
+-
+- if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+- assert(level == 0);
+- assert(hwlevel == 0);
+- if ( RADEON_DEBUG & DEBUG_TEXTURE )
+- fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
+- radeonUploadRectSubImage( rmesa, t, texImage, x, y, width, height );
+- return;
+- }
+-
+- imageWidth = texImage->Width;
+- imageHeight = texImage->Height;
+-
+- offset = t->bufAddr + t->base.totalSize * face / 6;
+-
+- if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+- GLint imageX = 0;
+- GLint imageY = 0;
+- GLint blitX = t->image[face][hwlevel].x;
+- GLint blitY = t->image[face][hwlevel].y;
+- GLint blitWidth = t->image[face][hwlevel].width;
+- GLint blitHeight = t->image[face][hwlevel].height;
+- fprintf( stderr, " upload image: %d,%d at %d,%d\n",
+- imageWidth, imageHeight, imageX, imageY );
+- fprintf( stderr, " upload blit: %d,%d at %d,%d\n",
+- blitWidth, blitHeight, blitX, blitY );
+- fprintf( stderr, " blit ofs: 0x%07x level: %d/%d\n",
+- (GLuint)offset, hwlevel, level );
+- }
+-
+- t->image[face][hwlevel].data = texImage->Data;
+-
+- /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
+- * NOTE: we're always use a 1KB-wide blit and I8 texture format.
+- * We used to use 1, 2 and 4-byte texels and used to use the texture
+- * width to dictate the blit width - but that won't work for compressed
+- * textures. (Brian)
+- * NOTE: can't do that with texture tiling. (sroland)
+- */
+- tex.offset = offset;
+- tex.image = &tmp;
+- /* copy (x,y,width,height,data) */
+- memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
+-
+- if (texImage->TexFormat->TexelBytes) {
+- /* use multi-byte upload scheme */
+- tex.height = imageHeight;
+- tex.width = imageWidth;
+- tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK;
+- tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
+- tex.offset += tmp.x & ~1023;
+- tmp.x = tmp.x % 1024;
+- if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+- /* need something like "tiled coordinates" ? */
+- tmp.y = tmp.x / (tex.pitch * 128) * 2;
+- tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
+- tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+- }
+- else {
+- tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+- }
+- if ((t->tile_bits & RADEON_TXO_MACRO_TILE) &&
+- (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) {
+- /* radeon switches off macro tiling for small textures/mipmaps it seems */
+- tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+- }
+- }
+- else {
+- /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
+- needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
+- /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
+- so the kernel module reads the right amount of data. */
+- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+- tex.pitch = (BLIT_WIDTH_BYTES / 64);
+- tex.height = (imageHeight + 3) / 4;
+- tex.width = (imageWidth + 3) / 4;
+- switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) {
+- case RADEON_TXFORMAT_DXT1:
+- tex.width *= 8;
+- break;
+- case RADEON_TXFORMAT_DXT23:
+- case RADEON_TXFORMAT_DXT45:
+- tex.width *= 16;
+- break;
+- }
+- }
+-
+- LOCK_HARDWARE( rmesa );
+- do {
+- ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
+- &tex, sizeof(drm_radeon_texture_t) );
+- } while ( ret == -EAGAIN );
+-
+- UNLOCK_HARDWARE( rmesa );
+-
+- if ( ret ) {
+- fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
+- fprintf( stderr, " offset=0x%08x\n",
+- offset );
+- fprintf( stderr, " image width=%d height=%d\n",
+- imageWidth, imageHeight );
+- fprintf( stderr, " blit width=%d height=%d data=%p\n",
+- t->image[face][hwlevel].width, t->image[face][hwlevel].height,
+- t->image[face][hwlevel].data );
+- exit( 1 );
+- }
+-}
+-
+-
+-/**
+- * Upload the texture images associated with texture \a t. This might
+- * require the allocation of texture memory.
+- *
+- * \param rmesa Context pointer
+- * \param t Texture to be uploaded
+- * \param face Cube map face to be uploaded. Zero for non-cube maps.
+- */
+-
+-int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint face )
+-{
+- int numLevels;
+-
+- if ( !t || t->base.totalSize == 0 || t->image_override )
+- return 0;
+-
+- if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+- fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
+- (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize,
+- t->base.firstLevel, t->base.lastLevel );
+- }
+-
+- numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+- if (RADEON_DEBUG & DEBUG_SYNC) {
+- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
+- radeonFinish( rmesa->glCtx );
+- }
+-
+- LOCK_HARDWARE( rmesa );
+-
+- if ( t->base.memBlock == NULL ) {
+- int heap;
+-
+- heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
+- (driTextureObject *) t );
+- if ( heap == -1 ) {
+- UNLOCK_HARDWARE( rmesa );
+- return -1;
+- }
+-
+- /* Set the base offset of the texture image */
+- t->bufAddr = rmesa->radeonScreen->texOffset[heap]
+- + t->base.memBlock->ofs;
+- t->pp_txoffset = t->bufAddr;
+-
+- if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+- /* hope it's safe to add that here... */
+- t->pp_txoffset |= t->tile_bits;
+- }
+-
+- /* Mark this texobj as dirty on all units:
+- */
+- t->dirty_state = TEX_ALL;
+- }
+-
+-
+- /* Let the world know we've used this memory recently.
+- */
+- driUpdateTextureLRU( (driTextureObject *) t );
+- UNLOCK_HARDWARE( rmesa );
+-
+-
+- /* Upload any images that are new */
+- if (t->base.dirty_images[face]) {
+- int i;
+- for ( i = 0 ; i < numLevels ; i++ ) {
+- if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
+- uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
+- t->image[face][i].height, face );
+- }
+- }
+- t->base.dirty_images[face] = 0;
+- }
+-
+- if (RADEON_DEBUG & DEBUG_SYNC) {
+- fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
+- radeonFinish( rmesa->glCtx );
+- }
+-
+- return 0;
+-}
+diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
+index 1e2f654..6a34f1e 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
++++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
+@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/enums.h"
+
+ #include "radeon_context.h"
++#include "radeon_mipmap_tree.h"
+ #include "radeon_state.h"
+ #include "radeon_ioctl.h"
+ #include "radeon_swtcl.h"
+@@ -75,10 +76,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
+ && (tx_table[f].format != 0xffffffff) )
+
+-static const struct {
++struct tx_table {
+ GLuint format, filter;
+-}
+-tx_table[] =
++};
++
++static const struct tx_table tx_table[] =
+ {
+ _ALPHA(RGBA8888),
+ _ALPHA_REV(RGBA8888),
+@@ -111,252 +113,6 @@ tx_table[] =
+ #undef _ALPHA
+ #undef _INVALID
+
+-/**
+- * This function computes the number of bytes of storage needed for
+- * the given texture object (all mipmap levels, all cube faces).
+- * The \c image[face][level].x/y/width/height parameters for upload/blitting
+- * are computed here. \c pp_txfilter, \c pp_txformat, etc. will be set here
+- * too.
+- *
+- * \param rmesa Context pointer
+- * \param tObj GL texture object whose images are to be posted to
+- * hardware state.
+- */
+-static void radeonSetTexImages( radeonContextPtr rmesa,
+- struct gl_texture_object *tObj )
+-{
+- radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
+- const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+- GLint curOffset, blitWidth;
+- GLint i, texelBytes;
+- GLint numLevels;
+- GLint log2Width, log2Height, log2Depth;
+-
+- /* Set the hardware texture format
+- */
+- if ( !t->image_override ) {
+- t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
+- RADEON_TXFORMAT_ALPHA_IN_MAP);
+- t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
+-
+- if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
+- t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
+- t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
+- }
+- else {
+- _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
+- return;
+- }
+- }
+-
+- texelBytes = baseImage->TexFormat->TexelBytes;
+-
+- /* Compute which mipmap levels we really want to send to the hardware.
+- */
+-
+- if (tObj->Target != GL_TEXTURE_CUBE_MAP)
+- driCalculateTextureFirstLastLevel( (driTextureObject *) t );
+- else {
+- /* r100 can't handle mipmaps for cube/3d textures, so don't waste
+- memory for them */
+- t->base.firstLevel = t->base.lastLevel = tObj->BaseLevel;
+- }
+- log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+- log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+- log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
+-
+- numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+- assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
+-
+- /* Calculate mipmap offsets and dimensions for blitting (uploading)
+- * The idea is that we lay out the mipmap levels within a block of
+- * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
+- */
+- curOffset = 0;
+- blitWidth = BLIT_WIDTH_BYTES;
+- t->tile_bits = 0;
+-
+- /* figure out if this texture is suitable for tiling. */
+- if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
+- if (rmesa->texmicrotile && (baseImage->Height > 1)) {
+- /* allow 32 (bytes) x 1 mip (which will use two times the space
+- the non-tiled version would use) max if base texture is large enough */
+- if ((numLevels == 1) ||
+- (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+- (baseImage->Width * texelBytes > 64)) ||
+- ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+- /* R100 has two microtile bits (only the txoffset reg, not the blitter)
+- weird: X2 + OPT: 32bit correct, 16bit completely hosed
+- X2: 32bit correct, 16bit correct
+- OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
+- t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
+- }
+- }
+- if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
+- /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
+- in the case if height is smaller than 16 (not 100% sure), as does the r200,
+- so need to disable macro tiling in that case */
+- if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
+- t->tile_bits |= RADEON_TXO_MACRO_TILE;
+- }
+- }
+- }
+-
+- for (i = 0; i < numLevels; i++) {
+- const struct gl_texture_image *texImage;
+- GLuint size;
+-
+- texImage = tObj->Image[0][i + t->base.firstLevel];
+- if ( !texImage )
+- break;
+-
+- /* find image size in bytes */
+- if (texImage->IsCompressed) {
+- /* need to calculate the size AFTER padding even though the texture is
+- submitted without padding.
+- Only handle pot textures currently - don't know if npot is even possible,
+- size calculation would certainly need (trivial) adjustments.
+- Align (and later pad) to 32byte, not sure what that 64byte blit width is
+- good for? */
+- if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) {
+- /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
+- if ((texImage->Width + 3) < 8) /* width one block */
+- size = texImage->CompressedSize * 4;
+- else if ((texImage->Width + 3) < 16)
+- size = texImage->CompressedSize * 2;
+- else size = texImage->CompressedSize;
+- }
+- else /* DXT3/5, 16 bytes per block */
+- if ((texImage->Width + 3) < 8)
+- size = texImage->CompressedSize * 2;
+- else size = texImage->CompressedSize;
+- }
+- else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+- size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+- }
+- else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+- /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+- though the actual offset may be different (if texture is less than
+- 32 bytes width) to the untiled case */
+- int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+- size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+- }
+- else {
+- int w = (texImage->Width * texelBytes + 31) & ~31;
+- size = w * texImage->Height * texImage->Depth;
+- blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+- }
+- assert(size > 0);
+-
+- /* Align to 32-byte offset. It is faster to do this unconditionally
+- * (no branch penalty).
+- */
+-
+- curOffset = (curOffset + 0x1f) & ~0x1f;
+-
+- if (texelBytes) {
+- t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+- t->image[0][i].y = 0;
+- t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+- t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+- }
+- else {
+- t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+- t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+- t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
+- t->image[0][i].height = size / t->image[0][i].width;
+- }
+-
+-#if 0
+- /* for debugging only and only applicable to non-rectangle targets */
+- assert(size % t->image[0][i].width == 0);
+- assert(t->image[0][i].x == 0
+- || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
+-#endif
+-
+- if (0)
+- fprintf(stderr,
+- "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+- i, texImage->Width, texImage->Height,
+- t->image[0][i].x, t->image[0][i].y,
+- t->image[0][i].width, t->image[0][i].height, size, curOffset);
+-
+- curOffset += size;
+-
+- }
+-
+- /* Align the total size of texture memory block.
+- */
+- t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+-
+- /* Setup remaining cube face blits, if needed */
+- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+- const GLuint faceSize = t->base.totalSize;
+- GLuint face;
+- /* reuse face 0 x/y/width/height - just update the offset when uploading */
+- for (face = 1; face < 6; face++) {
+- for (i = 0; i < numLevels; i++) {
+- t->image[face][i].x = t->image[0][i].x;
+- t->image[face][i].y = t->image[0][i].y;
+- t->image[face][i].width = t->image[0][i].width;
+- t->image[face][i].height = t->image[0][i].height;
+- }
+- }
+- t->base.totalSize = 6 * faceSize; /* total texmem needed */
+- }
+-
+- /* Hardware state:
+- */
+- t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
+- t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT;
+-
+- t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
+- RADEON_TXFORMAT_HEIGHT_MASK |
+- RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
+- RADEON_TXFORMAT_F5_WIDTH_MASK |
+- RADEON_TXFORMAT_F5_HEIGHT_MASK);
+- t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
+- (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
+-
+- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+- assert(log2Width == log2Height);
+- t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
+- (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
+- (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
+- t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
+- (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
+- (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
+- (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
+- (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
+- (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
+- (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
+- (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
+- }
+-
+- t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
+- ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
+-
+- /* Only need to round to nearest 32 for textures, but the blitter
+- * requires 64-byte aligned pitches, and we may/may not need the
+- * blitter. NPOT only!
+- */
+- if ( !t->image_override ) {
+- if (baseImage->IsCompressed)
+- t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
+- else
+- t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
+- t->pp_txpitch -= 32;
+- }
+-
+- t->dirty_state = TEX_ALL;
+-
+- /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */
+-}
+-
+-
+-
+ /* ================================================================
+ * Texture combine functions
+ */
+@@ -503,7 +259,7 @@ do { \
+
+ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ GLuint color_combine, alpha_combine;
+ const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
+@@ -846,22 +602,21 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
+ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth, GLuint pitch)
+ {
+- radeonContextPtr rmesa = pDRICtx->driverPrivate;
++ r100ContextPtr rmesa = pDRICtx->driverPrivate;
+ struct gl_texture_object *tObj =
+- _mesa_lookup_texture(rmesa->glCtx, texname);
+- radeonTexObjPtr t;
++ _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
++ radeonTexObjPtr t = radeon_tex_obj(tObj);
+
+ if (tObj == NULL)
+ return;
+
+- t = (radeonTexObjPtr) tObj->DriverData;
+-
+ t->image_override = GL_TRUE;
+
+ if (!offset)
+ return;
+-
+- t->pp_txoffset = offset;
++
++ t->bo = NULL;
++ t->override_offset = offset;
+ t->pp_txpitch = pitch - 32;
+
+ switch (depth) {
+@@ -901,12 +656,58 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ RADEON_TXFORMAT_NON_POWER2)
+
+
+-static void import_tex_obj_state( radeonContextPtr rmesa,
++static void disable_tex_obj_state( r100ContextPtr rmesa,
++ int unit )
++{
++ /* do not use RADEON_DB_STATE to avoid stale texture caches */
++ uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
++ GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
++ GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
++
++ RADEON_STATECHANGE( rmesa, tex[unit] );
++
++ RADEON_STATECHANGE( rmesa, tcl );
++ rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
++ RADEON_Q_BIT(unit));
++
++ if (rmesa->radeon.TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
++ TCL_FALLBACK( rmesa->radeon.glCtx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
++ rmesa->recheck_texgen[unit] = GL_TRUE;
++ }
++
++ if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
++ /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
++ cubic_map bit on unit 2 when the unit is disabled, otherwise every
++ 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
++ units, better be safe than sorry though).*/
++ RADEON_STATECHANGE( rmesa, tex[unit] );
++ rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
++ }
++
++ {
++ GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
++ GLuint tmp = rmesa->TexGenEnabled;
++
++ rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
++ rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
++ rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
++ rmesa->TexGenNeedNormals[unit] = 0;
++ rmesa->TexGenEnabled |=
++ (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
++
++ if (tmp != rmesa->TexGenEnabled) {
++ rmesa->recheck_texgen[unit] = GL_TRUE;
++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
++ }
++ }
++}
++
++static void import_tex_obj_state( r100ContextPtr rmesa,
+ int unit,
+ radeonTexObjPtr texobj )
+ {
+ /* do not use RADEON_DB_STATE to avoid stale texture caches */
+- int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
++ uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+ GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+
+ RADEON_STATECHANGE( rmesa, tex[unit] );
+@@ -915,10 +716,9 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
+ cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
+ cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+ cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
+- cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
+ cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+
+- if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
++ if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+ GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
+ txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
+ txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
+@@ -928,22 +728,12 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
+ else {
+ se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit);
+
+- if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
+- int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+- GLuint bytesPerFace = texobj->base.totalSize / 6;
+- ASSERT(texobj->base.totalSize % 6 == 0);
++ if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
++ uint32_t *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+
+ RADEON_STATECHANGE( rmesa, cube[unit] );
+ cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+- /* dont know if this setup conforms to OpenGL..
+- * at least it matches the behavior of mesa software renderer
+- */
+- cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */
+- cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */
+- cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */
+- cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */
+- cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */
+- cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */
++ /* state filled out in the cube_emit */
+ }
+ }
+
+@@ -952,13 +742,11 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
+ rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+ }
+
+- texobj->dirty_state &= ~(1<<unit);
++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+ }
+
+
+-
+-
+-static void set_texgen_matrix( radeonContextPtr rmesa,
++static void set_texgen_matrix( r100ContextPtr rmesa,
+ GLuint unit,
+ const GLfloat *s_plane,
+ const GLfloat *t_plane,
+@@ -986,14 +774,14 @@ static void set_texgen_matrix( radeonContextPtr rmesa,
+ rmesa->TexGenMatrix[unit].m[15] = q_plane[3];
+
+ rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit;
+- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+ }
+
+ /* Returns GL_FALSE if fallback required.
+ */
+ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+ GLuint tmp = rmesa->TexGenEnabled;
+@@ -1094,283 +882,185 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
+ }
+
+ if (tmp != rmesa->TexGenEnabled) {
+- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+ }
+
+ return GL_TRUE;
+ }
+
+-
+-static void disable_tex( GLcontext *ctx, int unit )
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-
+- if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) {
+- /* Texture unit disabled */
+- if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+- /* The old texture is no longer bound to this texture unit.
+- * Mark it as such.
+- */
+-
+- rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
+- rmesa->state.texture.unit[unit].texobj = NULL;
+- }
+-
+- RADEON_STATECHANGE( rmesa, ctx );
+- rmesa->hw.ctx.cmd[CTX_PP_CNTL] &=
+- ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit);
+-
+- RADEON_STATECHANGE( rmesa, tcl );
+- rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
+- RADEON_Q_BIT(unit));
+-
+- if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
+- TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+- rmesa->recheck_texgen[unit] = GL_TRUE;
+- }
+-
+- if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
+- /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
+- cubic_map bit on unit 2 when the unit is disabled, otherwise every
+- 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
+- units, better be safe than sorry though).*/
+- RADEON_STATECHANGE( rmesa, tex[unit] );
+- rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
+- }
+-
+- {
+- GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+- GLuint tmp = rmesa->TexGenEnabled;
+-
+- rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+- rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+- rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+- rmesa->TexGenNeedNormals[unit] = 0;
+- rmesa->TexGenEnabled |=
+- (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+-
+- if (tmp != rmesa->TexGenEnabled) {
+- rmesa->recheck_texgen[unit] = GL_TRUE;
+- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+- }
+- }
+- }
+-}
+-
+-static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+-
+- /* Need to load the 2d images associated with this unit.
+- */
+- if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
+- t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
+- t->base.dirty_images[0] = ~0;
+- }
+-
+- ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
+-
+- if ( t->base.dirty_images[0] ) {
+- RADEON_FIREVERTICES( rmesa );
+- radeonSetTexImages( rmesa, tObj );
+- radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
+- if ( !t->base.memBlock && !t->image_override )
+- return GL_FALSE;
+- }
+-
+- return GL_TRUE;
+-}
+-
+-static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
++/**
++ * Compute the cached hardware register values for the given texture object.
++ *
++ * \param rmesa Context pointer
++ * \param t the r300 texture object
++ */
++static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+- GLuint face;
+-
+- /* Need to load the 2d images associated with this unit.
+- */
+- if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
+- t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
+- for (face = 0; face < 6; face++)
+- t->base.dirty_images[face] = ~0;
+- }
++ const struct gl_texture_image *firstImage;
++ GLint log2Width, log2Height, log2Depth, texelBytes;
+
+- ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
++ firstImage = t->base.Image[0][t->mt->firstLevel];
+
+- if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
+- t->base.dirty_images[2] || t->base.dirty_images[3] ||
+- t->base.dirty_images[4] || t->base.dirty_images[5] ) {
+- /* flush */
+- RADEON_FIREVERTICES( rmesa );
+- /* layout memory space, once for all faces */
+- radeonSetTexImages( rmesa, tObj );
++ if (firstImage->Border > 0) {
++ fprintf(stderr, "%s: border\n", __FUNCTION__);
++ return GL_FALSE;
+ }
+
+- /* upload (per face) */
+- for (face = 0; face < 6; face++) {
+- if (t->base.dirty_images[face]) {
+- radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face );
++ log2Width = firstImage->WidthLog2;
++ log2Height = firstImage->HeightLog2;
++ log2Depth = firstImage->DepthLog2;
++ texelBytes = firstImage->TexFormat->TexelBytes;
++
++ if (!t->image_override) {
++ if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
++ const struct tx_table *table = tx_table;
++
++ t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
++ RADEON_TXFORMAT_ALPHA_IN_MAP);
++ t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
++
++ t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
++ t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
++ } else {
++ _mesa_problem(NULL, "unexpected texture format in %s",
++ __FUNCTION__);
++ return GL_FALSE;
+ }
+ }
+-
+- if ( !t->base.memBlock ) {
+- /* texmem alloc failed, use s/w fallback */
+- return GL_FALSE;
++
++ t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
++ t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << RADEON_MAX_MIP_LEVEL_SHIFT;
++
++ t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
++ RADEON_TXFORMAT_HEIGHT_MASK |
++ RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
++ RADEON_TXFORMAT_F5_WIDTH_MASK |
++ RADEON_TXFORMAT_F5_HEIGHT_MASK);
++ t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
++ (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
++
++ t->tile_bits = 0;
++
++ if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
++ ASSERT(log2Width == log2Height);
++ t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
++ (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
++ /* don't think we need this bit, if it exists at all - fglrx does not set it */
++ (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
++ t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
++ (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
++ (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
++ (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
++ (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
++ (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
++ (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
++ (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
+ }
+
+- return GL_TRUE;
+-}
++ t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT)
++ | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT));
+
+-static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
+-{
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+-
+- if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) {
+- t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+- t->base.dirty_images[0] = ~0;
++ if ( !t->image_override ) {
++ if (firstImage->IsCompressed)
++ t->pp_txpitch = (firstImage->Width + 63) & ~(63);
++ else
++ t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
++ t->pp_txpitch -= 32;
+ }
+
+- ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+-
+- if ( t->base.dirty_images[0] ) {
+- RADEON_FIREVERTICES( rmesa );
+- radeonSetTexImages( rmesa, tObj );
+- radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
+- if ( !t->base.memBlock &&
+- !t->image_override /* && !rmesa->prefer_gart_client_texturing FIXME */ ) {
+- fprintf(stderr, "%s: upload failed\n", __FUNCTION__);
+- return GL_FALSE;
+- }
++ if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
++ t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+ }
+
+ return GL_TRUE;
+ }
+
+-
+-static GLboolean update_tex_common( GLcontext *ctx, int unit )
++static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+- struct gl_texture_object *tObj = texUnit->_Current;
+- radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+- GLenum format;
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
++ radeonTexObj *t = radeon_tex_obj(texObj);
++ int ret;
+
+- /* Fallback if there's a texture border */
+- if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
+- fprintf(stderr, "%s: border\n", __FUNCTION__);
++ if (!radeon_validate_texture_miptree(ctx, texObj))
+ return GL_FALSE;
+- }
++
++ ret = setup_hardware_state(rmesa, t, unit);
++ if (ret == GL_FALSE)
++ return GL_FALSE;
++
+ /* yuv conversion only works in first unit */
+ if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB))
+ return GL_FALSE;
+
+- /* Update state if this is a different texture object to last
+- * time.
+- */
+- if ( rmesa->state.texture.unit[unit].texobj != t ) {
+- if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+- /* The old texture is no longer bound to this texture unit.
+- * Mark it as such.
+- */
+-
+- rmesa->state.texture.unit[unit].texobj->base.bound &=
+- ~(1UL << unit);
+- }
++ RADEON_STATECHANGE( rmesa, ctx );
++ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=
++ (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+
+- rmesa->state.texture.unit[unit].texobj = t;
+- t->base.bound |= (1UL << unit);
+- t->dirty_state |= 1<<unit;
+- driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
+- }
++ RADEON_STATECHANGE( rmesa, tcl );
++ rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
+
++ rmesa->recheck_texgen[unit] = GL_TRUE;
+
+- /* Newly enabled?
+- */
+- if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) {
+- RADEON_STATECHANGE( rmesa, ctx );
+- rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=
+- (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+-
+- RADEON_STATECHANGE( rmesa, tcl );
+-
+- rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
+-
+- rmesa->recheck_texgen[unit] = GL_TRUE;
+- }
+-
+- if (t->dirty_state & (1<<unit)) {
+- import_tex_obj_state( rmesa, unit, t );
+- /* may need to update texture matrix (for texrect adjustments) */
+- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+- }
++ import_tex_obj_state( rmesa, unit, t );
+
+ if (rmesa->recheck_texgen[unit]) {
+ GLboolean fallback = !radeon_validate_texgen( ctx, unit );
+ TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+ rmesa->recheck_texgen[unit] = 0;
+- rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
++ rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+ }
+
+- format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+- if ( rmesa->state.texture.unit[unit].format != format ||
+- rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
+- rmesa->state.texture.unit[unit].format = format;
+- rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
+- if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
+- return GL_FALSE;
+- }
++ if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
++ return GL_FALSE;
+ }
+-
+ FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
++
++ t->validated = GL_TRUE;
+ return !t->border_fallback;
+ }
+
+-
+-
+ static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
+ {
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+- if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
+- return (enable_tex_rect( ctx, unit ) &&
+- update_tex_common( ctx, unit ));
+- }
+- else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
+- return (enable_tex_2d( ctx, unit ) &&
+- update_tex_common( ctx, unit ));
+- }
+- else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
+- return (enable_tex_cube( ctx, unit ) &&
+- update_tex_common( ctx, unit ));
++
++ if (ctx->Texture.Unit[unit]._ReallyEnabled & TEXTURE_3D_BIT) {
++ return GL_FALSE;
+ }
+- else if ( texUnit->_ReallyEnabled ) {
+- return GL_FALSE;
++
++ if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
++ /* disable the unit */
++ disable_tex_obj_state(rmesa, unit);
++ return GL_TRUE;
+ }
+- else {
+- disable_tex( ctx, unit );
+- return GL_TRUE;
++
++ if (!radeon_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
++ _mesa_warning(ctx,
++ "failed to validate texture for unit %d.\n",
++ unit);
++ rmesa->state.texture.unit[unit].texobj = NULL;
++ return GL_FALSE;
+ }
++ rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
++ return GL_TRUE;
+ }
+
+ void radeonUpdateTextureState( GLcontext *ctx )
+ {
+- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ GLboolean ok;
+
++ /* set the ctx all textures off */
++ RADEON_STATECHANGE( rmesa, ctx );
++ rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((RADEON_TEX_ENABLE_MASK) | (RADEON_TEX_BLEND_ENABLE_MASK));
++
+ ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
+ radeonUpdateTextureUnit( ctx, 1 ) &&
+ radeonUpdateTextureUnit( ctx, 2 ));
+
+ FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
+
+- if (rmesa->TclFallback)
++ if (rmesa->radeon.TclFallback)
+ radeonChooseVertexState( ctx );
+ }
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
new file mode 100644
index 0000000..63680b4
@@ -6081,6 +35305,349 @@ index 0000000..d90fda7
+ struct gl_texture_image *texImage);
+
+#endif
---
-1.6.0.3
-
+diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
+index 596a8aa..0df634b 100644
+--- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h
++++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
+@@ -2031,6 +2031,9 @@
+ #define RADEON_CP_PACKET3_3D_DRAW_INDX 0xC0002A00
+ #define RADEON_CP_PACKET3_LOAD_PALETTE 0xC0002C00
+ #define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00
++#define R200_CP_CMD_3D_DRAW_VBUF_2 0xC0003400
++#define R200_CP_CMD_3D_DRAW_IMMD_2 0xC0003500
++#define R200_CP_CMD_3D_DRAW_INDX_2 0xC0003600
+ #define RADEON_CP_PACKET3_CNTL_PAINT 0xC0009100
+ #define RADEON_CP_PACKET3_CNTL_BITBLT 0xC0009200
+ #define RADEON_CP_PACKET3_CNTL_SMALLTEXT 0xC0009300
+diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
+index dae576a..1c02617 100644
+--- a/src/mesa/main/enable.c
++++ b/src/mesa/main/enable.c
+@@ -922,10 +922,13 @@ _mesa_set_enable(GLcontext *ctx, GLenum cap, GLboolean state)
+ return;
+ FLUSH_VERTICES(ctx, _NEW_STENCIL);
+ ctx->Stencil.TestTwoSide = state;
+- if (state)
++ if (state) {
++ ctx->Stencil._BackFace = 2;
+ ctx->_TriangleCaps |= DD_TRI_TWOSTENCIL;
+- else
++ } else {
++ ctx->Stencil._BackFace = 1;
+ ctx->_TriangleCaps &= ~DD_TRI_TWOSTENCIL;
++ }
+ break;
+
+ #if FEATURE_ARB_fragment_program
+diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
+index e1008d7..a42c446 100644
+--- a/src/mesa/main/getstring.c
++++ b/src/mesa/main/getstring.c
+@@ -82,7 +82,16 @@ compute_version(const GLcontext *ctx)
+ ctx->Extensions.ARB_vertex_shader &&
+ ctx->Extensions.ARB_fragment_shader &&
+ ctx->Extensions.ARB_texture_non_power_of_two &&
+- ctx->Extensions.EXT_blend_equation_separate);
++ ctx->Extensions.EXT_blend_equation_separate &&
++
++ /* Technically, 2.0 requires the functionality
++ * of the EXT version. Enable 2.0 if either
++ * extension is available, and assume that a
++ * driver that only exposes the ATI extension
++ * will fallback to software when necessary.
++ */
++ (ctx->Extensions.EXT_stencil_two_side
++ || ctx->Extensions.ATI_separate_stencil));
+ const GLboolean ver_2_1 = (ver_2_0 &&
+ ctx->Extensions.ARB_shading_language_120 &&
+ ctx->Extensions.EXT_pixel_buffer_object &&
+diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
+index 2014745..144c61e 100644
+--- a/src/mesa/main/mtypes.h
++++ b/src/mesa/main/mtypes.h
+@@ -1108,20 +1108,34 @@ struct gl_scissor_attrib
+
+ /**
+ * Stencil attribute group (GL_STENCIL_BUFFER_BIT).
++ *
++ * Three sets of stencil data are tracked so that OpenGL 2.0,
++ * GL_EXT_stencil_two_side, and GL_ATI_separate_stencil can all be supported
++ * simultaneously. In each of the stencil state arrays, element 0 corresponds
++ * to GL_FRONT. Element 1 corresponds to the OpenGL 2.0 /
++ * GL_ATI_separate_stencil GL_BACK state. Element 2 corresponds to the
++ * GL_EXT_stencil_two_side GL_BACK state.
++ *
++ * The derived value \c _BackFace is either 1 or 2 depending on whether or
++ * not GL_STENCIL_TEST_TWO_SIDE_EXT is enabled.
++ *
++ * The derived value \c _TestTwoSide is set when the front-face and back-face
++ * stencil state are different.
+ */
+ struct gl_stencil_attrib
+ {
+ GLboolean Enabled; /**< Enabled flag */
+ GLboolean TestTwoSide; /**< GL_EXT_stencil_two_side */
+- GLubyte ActiveFace; /**< GL_EXT_stencil_two_side (0 or 1) */
++ GLubyte ActiveFace; /**< GL_EXT_stencil_two_side (0 or 2) */
+ GLboolean _TestTwoSide;
+- GLenum Function[2]; /**< Stencil function */
+- GLenum FailFunc[2]; /**< Fail function */
+- GLenum ZPassFunc[2]; /**< Depth buffer pass function */
+- GLenum ZFailFunc[2]; /**< Depth buffer fail function */
+- GLint Ref[2]; /**< Reference value */
+- GLuint ValueMask[2]; /**< Value mask */
+- GLuint WriteMask[2]; /**< Write mask */
++ GLubyte _BackFace;
++ GLenum Function[3]; /**< Stencil function */
++ GLenum FailFunc[3]; /**< Fail function */
++ GLenum ZPassFunc[3]; /**< Depth buffer pass function */
++ GLenum ZFailFunc[3]; /**< Depth buffer fail function */
++ GLint Ref[3]; /**< Reference value */
++ GLuint ValueMask[3]; /**< Value mask */
++ GLuint WriteMask[3]; /**< Write mask */
+ GLuint Clear; /**< Clear value */
+ };
+
+diff --git a/src/mesa/main/stencil.c b/src/mesa/main/stencil.c
+index 2a4c38b..b4ea997 100644
+--- a/src/mesa/main/stencil.c
++++ b/src/mesa/main/stencil.c
+@@ -27,21 +27,6 @@
+ * \file stencil.c
+ * Stencil operations.
+ *
+- * Note: There's an incompatibility between GL_EXT_stencil_two_side and
+- * OpenGL 2.0's two-sided stencil feature.
+- *
+- * With GL_EXT_stencil_two_side, calling glStencilOp/Func/Mask() only the
+- * front OR back face state (as set by glActiveStencilFaceEXT) is set.
+- *
+- * But with OpenGL 2.0, calling glStencilOp/Func/Mask() sets BOTH the
+- * front AND back state.
+- *
+- * So either we advertise the GL_EXT_stencil_two_side extension, or OpenGL
+- * 2.0, but not both.
+- *
+- * Also, note that GL_ATI_separate_stencil is different as well:
+- * glStencilFuncSeparateATI(GLenum frontfunc, GLenum backfunc, ...) vs.
+- * glStencilFuncSeparate(GLenum face, GLenum func, ...).
+ */
+
+
+@@ -198,6 +183,7 @@ _mesa_StencilFunc( GLenum func, GLint ref, GLuint mask )
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ const GLint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
++ const GLint face = ctx->Stencil.ActiveFace;
+ ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+ if (!validate_stencil_func(ctx, func)) {
+@@ -207,9 +193,7 @@ _mesa_StencilFunc( GLenum func, GLint ref, GLuint mask )
+
+ ref = CLAMP( ref, 0, stencilMax );
+
+- if (ctx->Extensions.EXT_stencil_two_side) {
+- /* only set active face state */
+- const GLint face = ctx->Stencil.ActiveFace;
++ if (face != 0) {
+ if (ctx->Stencil.Function[face] == func &&
+ ctx->Stencil.ValueMask[face] == mask &&
+ ctx->Stencil.Ref[face] == ref)
+@@ -218,9 +202,12 @@ _mesa_StencilFunc( GLenum func, GLint ref, GLuint mask )
+ ctx->Stencil.Function[face] = func;
+ ctx->Stencil.Ref[face] = ref;
+ ctx->Stencil.ValueMask[face] = mask;
+- if (ctx->Driver.StencilFuncSeparate) {
+- ctx->Driver.StencilFuncSeparate(ctx, face ? GL_BACK : GL_FRONT,
+- func, ref, mask);
++
++ /* Only propagate the change to the driver if EXT_stencil_two_side
++ * is enabled.
++ */
++ if (ctx->Driver.StencilFuncSeparate && ctx->Stencil.TestTwoSide) {
++ ctx->Driver.StencilFuncSeparate(ctx, GL_BACK, func, ref, mask);
+ }
+ }
+ else {
+@@ -237,7 +224,9 @@ _mesa_StencilFunc( GLenum func, GLint ref, GLuint mask )
+ ctx->Stencil.Ref[0] = ctx->Stencil.Ref[1] = ref;
+ ctx->Stencil.ValueMask[0] = ctx->Stencil.ValueMask[1] = mask;
+ if (ctx->Driver.StencilFuncSeparate) {
+- ctx->Driver.StencilFuncSeparate(ctx, GL_FRONT_AND_BACK,
++ ctx->Driver.StencilFuncSeparate(ctx,
++ ((ctx->Stencil.TestTwoSide)
++ ? GL_FRONT : GL_FRONT_AND_BACK),
+ func, ref, mask);
+ }
+ }
+@@ -259,17 +248,23 @@ void GLAPIENTRY
+ _mesa_StencilMask( GLuint mask )
+ {
+ GET_CURRENT_CONTEXT(ctx);
++ const GLint face = ctx->Stencil.ActiveFace;
++
+ ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+- if (ctx->Extensions.EXT_stencil_two_side) {
+- /* only set active face state */
+- const GLint face = ctx->Stencil.ActiveFace;
++ if (face != 0) {
++ /* Only modify the EXT_stencil_two_side back-face state.
++ */
+ if (ctx->Stencil.WriteMask[face] == mask)
+ return;
+ FLUSH_VERTICES(ctx, _NEW_STENCIL);
+ ctx->Stencil.WriteMask[face] = mask;
+- if (ctx->Driver.StencilMaskSeparate) {
+- ctx->Driver.StencilMaskSeparate(ctx, face ? GL_BACK : GL_FRONT, mask);
++
++ /* Only propagate the change to the driver if EXT_stencil_two_side
++ * is enabled.
++ */
++ if (ctx->Driver.StencilMaskSeparate && ctx->Stencil.TestTwoSide) {
++ ctx->Driver.StencilMaskSeparate(ctx, GL_BACK, mask);
+ }
+ }
+ else {
+@@ -280,7 +275,10 @@ _mesa_StencilMask( GLuint mask )
+ FLUSH_VERTICES(ctx, _NEW_STENCIL);
+ ctx->Stencil.WriteMask[0] = ctx->Stencil.WriteMask[1] = mask;
+ if (ctx->Driver.StencilMaskSeparate) {
+- ctx->Driver.StencilMaskSeparate(ctx, GL_FRONT_AND_BACK, mask);
++ ctx->Driver.StencilMaskSeparate(ctx,
++ ((ctx->Stencil.TestTwoSide)
++ ? GL_FRONT : GL_FRONT_AND_BACK),
++ mask);
+ }
+ }
+ }
+@@ -304,6 +302,8 @@ void GLAPIENTRY
+ _mesa_StencilOp(GLenum fail, GLenum zfail, GLenum zpass)
+ {
+ GET_CURRENT_CONTEXT(ctx);
++ const GLint face = ctx->Stencil.ActiveFace;
++
+ ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+ if (!validate_stencil_op(ctx, fail)) {
+@@ -319,9 +319,8 @@ _mesa_StencilOp(GLenum fail, GLenum zfail, GLenum zpass)
+ return;
+ }
+
+- if (ctx->Extensions.EXT_stencil_two_side) {
++ if (face != 0) {
+ /* only set active face state */
+- const GLint face = ctx->Stencil.ActiveFace;
+ if (ctx->Stencil.ZFailFunc[face] == zfail &&
+ ctx->Stencil.ZPassFunc[face] == zpass &&
+ ctx->Stencil.FailFunc[face] == fail)
+@@ -330,9 +329,12 @@ _mesa_StencilOp(GLenum fail, GLenum zfail, GLenum zpass)
+ ctx->Stencil.ZFailFunc[face] = zfail;
+ ctx->Stencil.ZPassFunc[face] = zpass;
+ ctx->Stencil.FailFunc[face] = fail;
+- if (ctx->Driver.StencilOpSeparate) {
+- ctx->Driver.StencilOpSeparate(ctx, face ? GL_BACK : GL_FRONT,
+- fail, zfail, zpass);
++
++ /* Only propagate the change to the driver if EXT_stencil_two_side
++ * is enabled.
++ */
++ if (ctx->Driver.StencilOpSeparate && ctx->Stencil.TestTwoSide) {
++ ctx->Driver.StencilOpSeparate(ctx, GL_BACK, fail, zfail, zpass);
+ }
+ }
+ else {
+@@ -349,7 +351,9 @@ _mesa_StencilOp(GLenum fail, GLenum zfail, GLenum zpass)
+ ctx->Stencil.ZPassFunc[0] = ctx->Stencil.ZPassFunc[1] = zpass;
+ ctx->Stencil.FailFunc[0] = ctx->Stencil.FailFunc[1] = fail;
+ if (ctx->Driver.StencilOpSeparate) {
+- ctx->Driver.StencilOpSeparate(ctx, GL_FRONT_AND_BACK,
++ ctx->Driver.StencilOpSeparate(ctx,
++ ((ctx->Stencil.TestTwoSide)
++ ? GL_FRONT : GL_FRONT_AND_BACK),
+ fail, zfail, zpass);
+ }
+ }
+@@ -372,7 +376,7 @@ _mesa_ActiveStencilFaceEXT(GLenum face)
+
+ if (face == GL_FRONT || face == GL_BACK) {
+ FLUSH_VERTICES(ctx, _NEW_STENCIL);
+- ctx->Stencil.ActiveFace = (face == GL_FRONT) ? 0 : 1;
++ ctx->Stencil.ActiveFace = (face == GL_FRONT) ? 0 : 2;
+ }
+ else {
+ _mesa_error(ctx, GL_INVALID_ENUM, "glActiveStencilFaceEXT(face)");
+@@ -513,19 +517,16 @@ _mesa_StencilMaskSeparate(GLenum face, GLuint mask)
+ void
+ _mesa_update_stencil(GLcontext *ctx)
+ {
+- if (ctx->Extensions.EXT_stencil_two_side) {
+- ctx->Stencil._TestTwoSide = ctx->Stencil.TestTwoSide;
+- }
+- else {
+- ctx->Stencil._TestTwoSide =
+- (ctx->Stencil.Function[0] != ctx->Stencil.Function[1] ||
+- ctx->Stencil.FailFunc[0] != ctx->Stencil.FailFunc[1] ||
+- ctx->Stencil.ZPassFunc[0] != ctx->Stencil.ZPassFunc[1] ||
+- ctx->Stencil.ZFailFunc[0] != ctx->Stencil.ZFailFunc[1] ||
+- ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1] ||
+- ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[1] ||
+- ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[1]);
+- }
++ const GLint face = ctx->Stencil._BackFace;
++
++ ctx->Stencil._TestTwoSide =
++ (ctx->Stencil.Function[0] != ctx->Stencil.Function[face] ||
++ ctx->Stencil.FailFunc[0] != ctx->Stencil.FailFunc[face] ||
++ ctx->Stencil.ZPassFunc[0] != ctx->Stencil.ZPassFunc[face] ||
++ ctx->Stencil.ZFailFunc[0] != ctx->Stencil.ZFailFunc[face] ||
++ ctx->Stencil.Ref[0] != ctx->Stencil.Ref[face] ||
++ ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[face] ||
++ ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[face]);
+ }
+
+
+@@ -544,17 +545,24 @@ _mesa_init_stencil(GLcontext *ctx)
+ ctx->Stencil.ActiveFace = 0; /* 0 = GL_FRONT, 1 = GL_BACK */
+ ctx->Stencil.Function[0] = GL_ALWAYS;
+ ctx->Stencil.Function[1] = GL_ALWAYS;
++ ctx->Stencil.Function[2] = GL_ALWAYS;
+ ctx->Stencil.FailFunc[0] = GL_KEEP;
+ ctx->Stencil.FailFunc[1] = GL_KEEP;
++ ctx->Stencil.FailFunc[2] = GL_KEEP;
+ ctx->Stencil.ZPassFunc[0] = GL_KEEP;
+ ctx->Stencil.ZPassFunc[1] = GL_KEEP;
++ ctx->Stencil.ZPassFunc[2] = GL_KEEP;
+ ctx->Stencil.ZFailFunc[0] = GL_KEEP;
+ ctx->Stencil.ZFailFunc[1] = GL_KEEP;
++ ctx->Stencil.ZFailFunc[2] = GL_KEEP;
+ ctx->Stencil.Ref[0] = 0;
+ ctx->Stencil.Ref[1] = 0;
++ ctx->Stencil.Ref[2] = 0;
+ ctx->Stencil.ValueMask[0] = ~0U;
+ ctx->Stencil.ValueMask[1] = ~0U;
++ ctx->Stencil.ValueMask[2] = ~0U;
+ ctx->Stencil.WriteMask[0] = ~0U;
+ ctx->Stencil.WriteMask[1] = ~0U;
++ ctx->Stencil.WriteMask[2] = ~0U;
+ ctx->Stencil.Clear = 0;
+ }
+diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c
+index c925922..2e84dde 100644
+--- a/src/mesa/swrast/s_stencil.c
++++ b/src/mesa/swrast/s_stencil.c
+@@ -997,10 +997,12 @@ stencil_and_ztest_pixels( GLcontext *ctx, SWspan *span, GLuint face )
+ GLboolean
+ _swrast_stencil_and_ztest_span(GLcontext *ctx, SWspan *span)
+ {
++ const GLuint face = (span->facing == 0) ? 0 : ctx->Stencil._BackFace;
++
+ if (span->arrayMask & SPAN_XY)
+- return stencil_and_ztest_pixels(ctx, span, span->facing);
++ return stencil_and_ztest_pixels(ctx, span, face);
+ else
+- return stencil_and_ztest_span(ctx, span, span->facing);
++ return stencil_and_ztest_span(ctx, span, face);
+ }
+
+