
#ifdef HAVE_CONFIG_H
#include <kdrive-config.h>
#endif
#include "fbdev.h"
#include <sys/ioctl.h>

#include <errno.h>
/*
#include    "kaa.h"

#include    "gcstruct.h"
#include    "scrnintstr.h"
#include    "pixmapstr.h"
#include    "regionstr.h"
#include    "mistruct.h"
#include    "dixfontstr.h"
#include    "fb.h"
#include    "migc.h"
#include    "miline.h"
#include <sys/ioctl.h>
#include <errno.h>
*/
#include "pxa3xx-accel.h"
#include "fbdev.h"

#define DEBUG

#ifdef DEBUG
#define dbg(format, arg...)		\
	fprintf(stderr, format , ## arg);fflush(stderr)
#else
#define dbg(format, arg...){ }
#endif

static struct pxa3xxAccelPriv *globalPriv = NULL;  /* FIXME: Make dynamic */

#define	RAST_OP_INVALID		0xFFFF


/* Conversion from X11 raster ops to pxa3xx GC ops */
static const unsigned short pxa3xx_gc_raster_op[] = {	
	0x00,			/* GXclear 		- 0 */
	0x88,			/* GXand 		- src AND dst */
	0x44,			/* GXandReverse 	- src AND NOT dst */
	0xCC,			/* GXcopy 		- src */
	0x22,			/* GXandInverted 	- (NOT src) AND dst */
	0xAA,			/* GXnoop 		- dst */
	0x66,			/* GXxor 		- src XOR dst */
	0xEE,			/* GXor 		- src OR dst */
	0x11,			/* GXnor 		- (NOT src) AND (NOT dst) */
	0x99,			/* GXequiv 		- (NOT src) XOR dst */
	0x55,			/* GXinvert 		- NOT dst */
	0xDD,			/* GXorReverse 		- src OR (NOT dst) */
	0x33,			/* GXcopyInverted 	- NOT src */
	0xBB,			/* GXorInverted 	- (NOT src) OR dst */
	0x77,			/* GXnand 		- (NOT src) OR (NOT dst) */
	0xFF,			/* GXset 		- 1 */
};


int markSync(ScreenPtr pScreen){
	dbg("+-markSync()()\n");
	return 0;
}

void waitMarker(ScreenPtr pScreen, int marker){
//	dbg("+waitMarker()\n");
	pxa3xxGFX_Wait_Complete();
//	dbg("-waitMarker()\n");
}



#define OP_SOLID	2
#define OP_COPY		3
#define OP_COMPOSITE	4
#define OP_BLEND	5
#define OP_VIDEO	6

inline void startOp(int op){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */	
	if(gfxPriv->opPrepared){
		dbg("GFX: *************** Starting op '%i' while a '%i' is already prepared  ************\n", op, gfxPriv->opPrepared);
		exit(0);
	}
	gfxPriv->opPrepared = op;
}

inline void checkOp(int op){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */	
	if(gfxPriv->opPrepared != op){
		dbg("GFX: *************** chcek op with %i while prepared %i ************\n", op, gfxPriv->opPrepared);
		exit(0);
	}
}

inline void endOp(int op){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */	
	if(gfxPriv->opPrepared != op){
		dbg("GFX: *************** ending op %i while %i is prepared ************\n", op, gfxPriv->opPrepared);
		exit(0);
	}
	gfxPriv->opPrepared = 0;
}

unsigned char getPixelFormat(PixmapPtr pPixmap){

	switch(pPixmap->drawable.bitsPerPixel){
		case 16: return PF_16B_RGB565;
		case 32: return PF_32B_RGBA8888;
		// Add more bit-depths here
		default:
			dbg("GFX: ****** Unsupported bit depth %i ***** n", pPixmap->drawable.depth);
			exit(0);
			return PF_INVALID;
	}
}

inline unsigned char getStep(PixmapPtr pPixmap){
	return pPixmap->drawable.bitsPerPixel / 8;
}

inline unsigned short getStride(PixmapPtr pPixmap){
	return pPixmap->devKind; //kaa stores it's 'pitch' here - which is our 'stride'. Because of the pitchAlign=4 its not always exactly (width*bpp/8)
//	return pPixmap->drawable.width * (pPixmap->drawable.bitsPerPixel / 8);
}

inline int isPixmapInVidmem(struct pxa3xxAccelPriv *gfxPriv, PixmapPtr pPixmap){
	unsigned long relative = ((unsigned long)pPixmap->devPrivate.ptr) - ((unsigned long)gfxPriv->vidMem.virt);
	return relative <= (gfxPriv->vidMem.scrapAreaRel ? gfxPriv->vidMem.scrapAreaRel : gfxPriv->vidMem.size);
}

inline unsigned long getPhysAddr(PixmapPtr pPixmap){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */	
	unsigned long relative = ((unsigned long)pPixmap->devPrivate.ptr) - ((unsigned long)gfxPriv->vidMem.virt);

	return getPhysAddrFromOffset(relative);
}

inline unsigned long getPhysAddrFromOffset(unsigned long relative){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */	

	if( relative > (gfxPriv->vidMem.scrapAreaRel ? gfxPriv->vidMem.scrapAreaRel : gfxPriv->vidMem.size) ){
		dbg("GFX: *************** getPhysAddr() with pixmap not in vidMem: %08lx ************\n", relative);
		exit(0);
	}
	if( (gfxPriv->vidMem.phys + relative) & 0x07 ){
		dbg("GFX: *************** getPhysAddr() with pixmap not 8 byte aligned: %08lx ************\n", relative);
		exit(0);
	}
	return gfxPriv->vidMem.phys + relative;
}

inline Bool isGFXEnabled(void){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */	

	return gfxPriv->enabled;
}

Bool PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */	
	int ret;
//	dbg("GFX: +PrepareSolid()\n");
	checkOp(0);
	if(!gfxPriv->enabled)
		return FALSE;

	gfxPriv->thisSolid.pixelFormat = getPixelFormat(pPixmap); //assume they give us the 
	gfxPriv->thisSolid.fg = fg;
	gfxPriv->thisSolid.width = pPixmap->drawable.width;  //For checks
	gfxPriv->thisSolid.height = pPixmap->drawable.height; //For checks

	//set the destination buffers (errors in the get*() functions will drop through Cmd_set_buff)
	ret = pxa3xxGFXCmd_set_buff(GC_BUFFI_DEST0,
			getPhysAddr(pPixmap),
			getStep(pPixmap),
			getStride(pPixmap),
			gfxPriv->thisSolid.pixelFormat);

	if(ret < 0){
		dbg("GFX: Error queueing buffer set, dropping back to software Solid\n");
		return FALSE;		
	}

	startOp(OP_SOLID);
//	dbg("GFX: -PrepareSolid() OK\n");
	return TRUE;
}

void Solid(int x1, int y1, int x2, int y2){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */
//	dbg("GFX: Solid()\n");
	checkOp(OP_SOLID);
	
	if( x1 < 0 || y1 < 0 || x2 <= x1 || y2 <= y1 ||
			x2 > gfxPriv->thisSolid.width || y2 > gfxPriv->thisSolid.height ){
		dbg("GFX: **************** Solid with invalid rect(%i,%i)-(%i,%i) ************\n", x1, y1, x2, y2);
		exit(0);
	}

	pxa3xxGFXCmd_color_fill(x1, y1, (x2-x1), (y2-y1),
		gfxPriv->thisSolid.pixelFormat,
		gfxPriv->thisSolid.fg);

}

void DoneSolid(void){
//	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */
//	dbg("GFX: DoneSolid()\n");
	endOp(OP_SOLID);
}

void scrapSave(ScreenPtr pScreen, KdOffscreenArea *area) {
	/* Not sure what todo here */
}


Bool PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, Bool upsidedown,
			Bool reverse, int alu, Pixel planemask){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */

	int ret1, ret2;
	unsigned long srcDstMemSize;
	checkOp(0);
	if(!gfxPriv->enabled)
		return FALSE;

	gfxPriv->thisCopy.srcWidth = pSrcPixmap->drawable.width;
	gfxPriv->thisCopy.srcHeight = pSrcPixmap->drawable.height;
	gfxPriv->thisCopy.dstWidth = pDstPixmap->drawable.width;
	gfxPriv->thisCopy.dstHeight = pDstPixmap->drawable.height;

	if(pSrcPixmap == pDstPixmap){
//		dbg("GFX: PrepareCopy() src==dest\n");
		gfxPriv->thisCopy.srcIsDest = 1;

		//we need to store the buffer info as we'll need to set them later
		gfxPriv->thisCopy.srcDstPhys = getPhysAddr(pSrcPixmap);
		gfxPriv->thisCopy.srcDstStep = getStep(pSrcPixmap);
		gfxPriv->thisCopy.srcDstStride = getStride(pSrcPixmap);
		gfxPriv->thisCopy.srcDstPixelFormat = getPixelFormat(pSrcPixmap);

		/* If the source and dest are the same pixmap things get a bit complicated... */
		/* We need to allocate enough memory to make a completel copy of the pixmap */
		srcDstMemSize = pSrcPixmap->drawable.width * pSrcPixmap->drawable.height * gfxPriv->thisCopy.srcDstStep;

		gfxPriv->thisCopy.temp_off_screen = KdOffscreenAlloc( 
				pSrcPixmap->drawable.pScreen, srcDstMemSize, 
				64, TRUE, scrapSave, NULL);
		
		if (gfxPriv->thisCopy.temp_off_screen == NULL){
			dbg("GFX: Offscreen alloc failed for temp area for up-memory copies."
				"Requested %lu bytes\n", srcDstMemSize );
			return FALSE; /* can't be sure we can do it */
		}
		gfxPriv->thisCopy.temp_phys = getPhysAddrFromOffset(
			gfxPriv->thisCopy.temp_off_screen->offset);
		gfxPriv->thisCopy.pScreen = pSrcPixmap->drawable.pScreen;

		/*if(!gfxPriv->vidMem.scrapAreaRel){ //We might need a scratch area
			dbg("GFX: dest==source and no scratch area.\n");
			return FALSE;
		}*/

		/* We can't do a dst==src copy if it might require more memory than the screens worth */
		/*srcDstMemSize = pSrcPixmap->drawable.width * pSrcPixmap->drawable.height * gfxPriv->thisCopy.srcDstStep;
		if( srcDstMemSize > gfxPriv->vidMem.vidSize ){
			dbg("GFX: ****** Copy request within a single pixmap that takes more memory (%lu) than the screen (%lu)",
				srcDstMemSize, gfxPriv->vidMem.vidSize);
			return FALSE;
		}*/


	}else{
		gfxPriv->thisCopy.srcIsDest = 0;
		gfxPriv->thisCopy.temp_off_screen = NULL;

		//otherwise we can just set the source and destination buffers now
		ret1 = pxa3xxGFXCmd_set_buff(GC_BUFFI_SOURCE0,
				getPhysAddr(pSrcPixmap),
				getStep(pSrcPixmap),
				getStride(pSrcPixmap),
				getPixelFormat(pSrcPixmap));

		ret2 = pxa3xxGFXCmd_set_buff(GC_BUFFI_DEST0,
				getPhysAddr(pDstPixmap),
				getStep(pDstPixmap),
				getStride(pDstPixmap),
				getPixelFormat(pDstPixmap));
	}

	startOp(OP_COPY);
	return TRUE;
}

void Copy(int srcX, int	srcY, int dstX, int dstY,int width, int height){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */

//	dbg("GFX: Copy(): srcIsDest = %i\n", gfxPriv->thisCopy.srcIsDest);	
	checkOp(OP_COPY);

	if( srcX < 0 || srcY < 0 ||	
			(srcX + width) > gfxPriv->thisCopy.srcWidth || 
			(srcY + height) > gfxPriv->thisCopy.srcHeight ){
		dbg("GFX: **************** Copy with invalid src rect (%i,%i) size (%ix%i) in (%ix%i) ************\n", 
				srcX, srcY, width, height, gfxPriv->thisCopy.srcWidth, gfxPriv->thisCopy.srcHeight);
		exit(0);
	}

	if(  dstX < 0 || dstY < 0 ||
			(dstX + width) > gfxPriv->thisCopy.dstWidth || 
			(dstY + height) > gfxPriv->thisCopy.dstHeight ){
		dbg("GFX: **************** Copy with invalid src rect (%i,%i) size (%ix%i) in (%ix%i) ************\n", 
				dstX, dstY, width, height, gfxPriv->thisCopy.dstWidth, gfxPriv->thisCopy.dstHeight);
		exit(0);
	}

	if ( !gfxPriv->thisCopy.srcIsDest ){
		/* For a src!=dst copy, the buffers are already set */
		pxa3xxGFXCmd_copy_blt(srcX, srcY, dstX, dstY, width, height);
		return;
	}

	/* For src==dst, if the dest is downward from the source
		we need to copy out to our scrap area first */
	if(gfxPriv->thisCopy.srcDstPixelFormat == 0){
		dbg("GFX: ******************** src=dst with srcDstPixelFormat==0 **************\n");
		exit(0);
	}
	
	/* TODO: Add a check for non-overlapping here, in which case 
		we don't actually need to do this */
 	if( ((dstY > srcY) || (dstY == srcY && dstX > srcX)) ){
		//first copy the area from the screen to the scrap area	
		pxa3xxGFXCmd_set_buff(GC_BUFFI_SOURCE0,
			gfxPriv->thisCopy.srcDstPhys,
			gfxPriv->thisCopy.srcDstStep,
			gfxPriv->thisCopy.srcDstStride,
			gfxPriv->thisCopy.srcDstPixelFormat);

		pxa3xxGFXCmd_set_buff(GC_BUFFI_DEST0,
			//gfxPriv->vidMem.scrapAreaPhys,
			gfxPriv->thisCopy.temp_phys,
			gfxPriv->thisCopy.srcDstStep,
			gfxPriv->thisCopy.srcDstStride,
			gfxPriv->thisCopy.srcDstPixelFormat);

		pxa3xxGFXCmd_copy_blt(srcX, srcY, 0, 0, width, height);

		//and then copy it back to the new position
		pxa3xxGFXCmd_set_buff(GC_BUFFI_SOURCE0,
			//gfxPriv->vidMem.scrapAreaPhys,
			gfxPriv->thisCopy.temp_phys,
			gfxPriv->thisCopy.srcDstStep,
			gfxPriv->thisCopy.srcDstStride,
			gfxPriv->thisCopy.srcDstPixelFormat);

		pxa3xxGFXCmd_set_buff(GC_BUFFI_DEST0,
			gfxPriv->thisCopy.srcDstPhys,
			gfxPriv->thisCopy.srcDstStep,
			gfxPriv->thisCopy.srcDstStride,
			gfxPriv->thisCopy.srcDstPixelFormat);
	
		pxa3xxGFXCmd_copy_blt(0, 0, dstX, dstY, width, height);

	}else {
		//otherwise we can copy in place, but we still need to set the buffers
		pxa3xxGFXCmd_set_buff(GC_BUFFI_SOURCE0,
			gfxPriv->thisCopy.srcDstPhys,
			gfxPriv->thisCopy.srcDstStep,
			gfxPriv->thisCopy.srcDstStride,
			gfxPriv->thisCopy.srcDstPixelFormat);

		pxa3xxGFXCmd_set_buff(GC_BUFFI_DEST0,
			gfxPriv->thisCopy.srcDstPhys,
			gfxPriv->thisCopy.srcDstStep,
			gfxPriv->thisCopy.srcDstStride,
			gfxPriv->thisCopy.srcDstPixelFormat);

		pxa3xxGFXCmd_copy_blt(srcX, srcY, dstX, dstY, width, height);
	}

}

void DoneCopy(void){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */

	if (gfxPriv->thisCopy.temp_off_screen != NULL) {
		KdOffscreenFree(gfxPriv->thisCopy.pScreen, gfxPriv->thisCopy.temp_off_screen);
		gfxPriv->thisCopy.temp_off_screen = NULL;
	}
	endOp(OP_COPY);	
}

Bool PrepareBlend(int op, PicturePtr pSrcPicture, PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pDst){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */
	return FALSE;

	if(!gfxPriv->enabled)
		return FALSE;

/*	dbg("+-PrepareBlend()\n");
	dbg("GFX: op = %i\n", op);
	dbg("GFX: pSrcPicture = %p\n", pSrcPicture);
	dbg("GFX: pDstPicture = %p\n", pDstPicture);
	dbg("GFX: pSrc = %p\n", pSrc);
	dbg("GFX: pDst = %p\n", pDst);
*/
	if( !isPixmapInVidmem(gfxPriv, pDst) ){
		dbg("GFX: PrepareBlend with dst not in vidmem\n");
		return FALSE;
	}
	if( !isPixmapInVidmem(gfxPriv, pSrc) ){
		dbg("GFX: PrepareBlend with src not in vidmem\n");
		return FALSE;
	}

	return TRUE;
}

void Blend(int srcX, int srcY, int dstX, int dstY, int width, int height){
	dbg("+-Blend()\n");
}

void DoneBlend(void){
	dbg("+-DoneBlend()\n");
}

Bool CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */

	return FALSE;

	if(!gfxPriv->enabled)
		return FALSE;
/*	dbg("GFX: +-CheckComposite()\n");
	dbg("GFX: op = %i\n", op);
	dbg("GFX: pSrcPicture = %p\n", pSrcPicture);
	dbg("GFX: pMaskPicture = %p\n", pMaskPicture);
	dbg("GFX: pDstPicture = %p\n", pDstPicture);
*/
	if( pMaskPicture ){
		dbg("GFX: checkComposite mask != null\n");
		return FALSE;
	}

	if( pxa3xx_gc_raster_op[op] == RAST_OP_INVALID )
		return FALSE;

	if( pSrcPicture == pDstPicture ){
		dbg("GFX: checkComposite src == dst\n");
		return FALSE;
	}
	
	if( pMaskPicture == pDstPicture ){
		dbg("GFX: checkComposite mask == dst\n");
		return FALSE;
	}

	return TRUE;
}

Bool PrepareComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
		PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */

	int ret1, ret2, ret3;
	checkOp(0);

	if(!gfxPriv->enabled)
		return FALSE;

/*	dbg("GFX: +-PrepareComposite()\n");
	dbg("GFX: op = %i\n", op);
	dbg("GFX: pSrcPicture = %p\n", pSrcPicture);
	dbg("GFX: pMaskPicture = %p\n", pMaskPicture);
	dbg("GFX: pDstPicture = %p\n", pDstPicture);
	dbg("GFX: pSrc = %p\n", pSrc);
	dbg("GFX: pMask = %p\n", pMask);
	dbg("GFX: pDst = %p\n", pDst);
*/

	gfxPriv->thisCopy.op = (unsigned char)pxa3xx_gc_raster_op[op];
	gfxPriv->thisCopy.srcWidth = pSrc->drawable.width;
	gfxPriv->thisCopy.srcHeight = pSrc->drawable.height;
	gfxPriv->thisCopy.dstWidth = pDst->drawable.width;
	gfxPriv->thisCopy.dstHeight = pDst->drawable.height;

	if(pMask){
		gfxPriv->thisCopy.maskWidth = pMask->drawable.width;
		gfxPriv->thisCopy.maskHeight = pMask->drawable.height;
		gfxPriv->thisCopy.maskValid = 1;
	}else
		gfxPriv->thisCopy.maskValid = 0;


	if( !isPixmapInVidmem(gfxPriv, pDst) ){
		dbg("GFX: Composite with dst not in vidmem\n");
		return FALSE;
	}
	if( !isPixmapInVidmem(gfxPriv, pSrc) ){
		dbg("GFX: Composite with src not in vidmem\n");
		return FALSE;
	}
	if( gfxPriv->thisCopy.maskValid && !isPixmapInVidmem(gfxPriv, pMask) ){
		dbg("GFX: Composite with mask not in vidmem\n");
		return FALSE;
	}

	if(pSrc == pDst){
		dbg("GFX: **************** SRC==DST in preparecomposite ************8\n");
		exit(0);
	}

	if(pMask == pDst){
		dbg("GFX: **************** MASK==DST in preparecomposite ************8\n");
		exit(0);
	}
		

//	}else{
//		dbg("GFX: PrepareCopy() src!=dest\n");
		gfxPriv->thisCopy.srcIsDest = 0;
//		dbg("GFX: bpp=%i, width=%i, getStride=%i, devKind=%i\n", pSrcPixmap->drawable.bitsPerPixel, pSrcPixmap->drawable.width, (int)getStride(pSrcPixmap), pSrcPixmap->devKind);

		//otherwise we can just set the source and destination buffers now
		ret1 = pxa3xxGFXCmd_set_buff(GC_BUFFI_SOURCE0,
				getPhysAddr(pSrc),
				getStep(pSrc),
				getStride(pSrc),
				getPixelFormat(pSrc));

		if(pMask)
			ret2 = pxa3xxGFXCmd_set_buff(GC_BUFFI_SOURCE1,
					getPhysAddr(pMask),
					getStep(pMask),
					getStride(pMask),
					getPixelFormat(pMask));
			

		ret3 = pxa3xxGFXCmd_set_buff(GC_BUFFI_DEST0,
				getPhysAddr(pDst),
				getStep(pDst),
				getStride(pDst),
				getPixelFormat(pDst));
	//}


//	return FALSE;

	startOp(OP_COMPOSITE);	
	return TRUE;
}

void  Composite(int srcX, int srcY, int maskX, int maskY,
			int dstX, int dstY, int width, int height){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */

	dbg("GFX: +-Composite(): src=(%i,%i), dst=(%i,%i), mask=(%i,%i), size=(%ix%i), op=%i\n",
		srcX, srcY, dstX, dstY, maskX, maskY, width, height,  gfxPriv->thisCopy.op);


	if( srcX < 0 || srcY < 0 ||	
			(srcX + width) > gfxPriv->thisCopy.srcWidth || 
			(srcY + height) > gfxPriv->thisCopy.srcHeight ){
		dbg("GFX: Composite: **************** Copy with invalid src rect (%i,%i) size (%ix%i) in (%ix%i) ************\n", 
				srcX, srcY, width, height, gfxPriv->thisCopy.srcWidth, gfxPriv->thisCopy.srcHeight);
		exit(0);
	}

	if( gfxPriv->thisCopy.maskValid && (maskX < 0 || maskY < 0 ||	
			(maskX + width) > gfxPriv->thisCopy.maskWidth || 
			(maskY + height) > gfxPriv->thisCopy.maskHeight ) ){
		dbg("GFX: Composite: **************** Copy with invalid mask rect (%i,%i) size (%ix%i) in (%ix%i) ************\n", 
				maskX, maskY, width, height, gfxPriv->thisCopy.maskWidth, gfxPriv->thisCopy.maskHeight);
		exit(0);
	}

	if(  dstX < 0 || dstY < 0 ||
			(dstX + width) > gfxPriv->thisCopy.dstWidth || 
			(dstY + height) > gfxPriv->thisCopy.dstHeight ){
		dbg("GFX: Composite: **************** Copy with invalid src rect (%i,%i) size (%ix%i) in (%ix%i) ************\n", 
				dstX, dstY, width, height, gfxPriv->thisCopy.dstWidth, gfxPriv->thisCopy.dstHeight);
		exit(0);
	}

	if ( !gfxPriv->thisCopy.srcIsDest ){
		/* For a src!=dst copy, the buffers are already set */
		dbg("GFX: doing composite!\n");
		pxa3xxGFXCmd_raster_blt(srcX, srcY, maskX, maskY, dstX, dstY, width, height, gfxPriv->thisCopy.op);
		return;
	}

	/* For src==dst, if the dest is downward from the source
		we need to copy out to our scrap area first */
	if(gfxPriv->thisCopy.srcDstPixelFormat == 0){
		dbg("GFX: Composite: ******************** src=dst with srcDstPixelFormat==0 **************\n");
		exit(0);
	}
	
	dbg("GFX: Composite: ******************** We are made of the fail ******************** \n");
	exit(0);

	/* TODO: Add a check for non-overlapping here, in which case 
		we don't actually need to do this */
 	if( ((dstY > srcY) || (dstY == srcY && dstX > srcX)) ){
		//first copy the area from the screen to the scrap area	
	


	}else {
		//otherwise we can copy in place, but we still need to set the buffers
		pxa3xxGFXCmd_set_buff(GC_BUFFI_SOURCE0,
			gfxPriv->thisCopy.srcDstPhys,
			gfxPriv->thisCopy.srcDstStep,
			gfxPriv->thisCopy.srcDstStride,
			gfxPriv->thisCopy.srcDstPixelFormat);

		if(gfxPriv->thisCopy.maskValid)
			pxa3xxGFXCmd_set_buff(GC_BUFFI_SOURCE1,
				gfxPriv->thisCopy.srcDstPhys,
				gfxPriv->thisCopy.srcDstStep,
				gfxPriv->thisCopy.srcDstStride,
				gfxPriv->thisCopy.srcDstPixelFormat);

		pxa3xxGFXCmd_set_buff(GC_BUFFI_DEST0,
			gfxPriv->thisCopy.srcDstPhys,
			gfxPriv->thisCopy.srcDstStep,
			gfxPriv->thisCopy.srcDstStride,
			gfxPriv->thisCopy.srcDstPixelFormat);

		pxa3xxGFXCmd_raster_blt(srcX, srcY, maskX, maskY, dstX, dstY, width, height, gfxPriv->thisCopy.op);
	}
	checkOp(OP_COMPOSITE);	
}

void DoneComposite(void){
	dbg("GFX: +-DoneComposite()\n");

	endOp(OP_COMPOSITE);	
}


Bool PrepareTrapezoids(PicturePtr pDstPicture, PixmapPtr pDst){
//	dbg("+-PrepareTrapezoids()\n");
	return FALSE;
}

void Trapezoids(KaaTrapezoid *traps, int ntraps){
//	dbg("+-Trapezoids()\n");
}

void DoneTrapezoids(void) {
//	dbg("+-DoneTrapezoids()\n");
}

Bool UploadToScreen(PixmapPtr pDst, char *src, int src_pitch) {
	
//	dbg("+-UploadToScreen()\n");
	return FALSE;
}

Bool UploadToScratch(PixmapPtr pSrc, PixmapPtr pDst) {
//	dbg("+-UploadToScratch(pSrc=%08lx, pDst=%08lx)\n", pSrc->devPrivate.ptr, pDst->devPrivate.ptr);
	
	return FALSE;
}



Bool pxa3xxGFXInit(ScreenPtr pScreen) {
	KdScreenPriv(pScreen);
	KdScreenInfo *screen = pScreenPriv->screen;
	FbdevPriv *fbPriv = pScreenPriv->card->driver; /* grab a copy of the fixed info struct */
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */
	int ret;

	if( (screen->off_screen_base + 1024) > screen->memory_size ){
		dbg("GFX: Less than 1KB of extra video memory for pixmaps. Not accelerating!\n");
		return FALSE;
	}
	
	if ( gfxPriv ){
		dbg("GFX: Cannot have more than 1 pxa3xx-gc device.\n");
		return FALSE;
	}

	gfxPriv = (struct pxa3xxAccelPriv *) xalloc( sizeof(struct pxa3xxAccelPriv) );
	if ( !gfxPriv ){
		dbg("GFX: Cannot alloc globalPriv\n");
		return FALSE;
	}
	globalPriv = gfxPriv; /* FIXME: Make dynamic */
	memset(gfxPriv, 0, sizeof(struct pxa3xxAccelPriv) );

	/* Copy lots of annoyingly named and placed things into our structure */
	/* TODO: Many of these probably arn't used now */
	gfxPriv->enabled = FALSE;
	gfxPriv->fbPriv = fbPriv;
	gfxPriv->vidMem.phys = (unsigned long)fbPriv->fix.smem_start;
	gfxPriv->vidMem.virt = screen->memory_base;
	gfxPriv->vidMem.size = screen->memory_size;
	gfxPriv->vidMem.width = screen->width;
	gfxPriv->vidMem.height = screen->height;
	gfxPriv->vidMem.stride = screen->fb[0].byteStride;
	gfxPriv->vidMem.bpp = screen->fb[0].bitsPerPixel;
	gfxPriv->vidMem.step = (gfxPriv->vidMem.bpp / 8);
	gfxPriv->vidMem.kdrive_off_screen_base = screen->off_screen_base;	
	gfxPriv->vidMem.vidSize = gfxPriv->vidMem.width * gfxPriv->vidMem.height * gfxPriv->vidMem.step;	

	/* Allocate us an off-screen scrap area space (and then don't let KAA use it).
		TODO: Use KdOffscreenAlloc for this. Most of the time it won't
		need this much memory.
	 */
	/*if ( gfxPriv->vidMem.size > (2*gfxPriv->vidMem.vidSize) ){
		gfxPriv->vidMem.scrapAreaRel = gfxPriv->vidMem.size - gfxPriv->vidMem.vidSize;
		gfxPriv->vidMem.scrapAreaPhys = gfxPriv->vidMem.phys + gfxPriv->vidMem.scrapAreaRel;
		screen->memory_size = gfxPriv->vidMem.size - gfxPriv->vidMem.vidSize; //and stop anyone else using it
		dbg("GFX: scrap area allocated at offset %08lx. memsize was %lu and is now %lu\n",
			gfxPriv->vidMem.scrapAreaRel, gfxPriv->vidMem.size, screen->memory_size);		
	}else{
		dbg("GFX: Not enough memory for scrap area (need at least the video size again - %lu bytes).\n",
			gfxPriv->vidMem.vidSize );
*/		gfxPriv->vidMem.scrapAreaRel = 0;
		gfxPriv->vidMem.scrapAreaPhys = 0;
	//}

	/** Setup the KAA callbacks */
	memset(&gfxPriv->kaa, 0, sizeof(KaaScreenInfoRec));

	gfxPriv->kaa.offsetAlign = 8;
	gfxPriv->kaa.pitchAlign = 4;	/* The H/W can cope with this = 1 but kaa can't for some reason. */
	gfxPriv->kaa.flags = KAA_OFFSCREEN_PIXMAPS;
	dbg("GFX: aligns: pitch=%i, offset=%i\n", gfxPriv->kaa.pitchAlign, gfxPriv->kaa.offsetAlign);

//	gfxPriv->kaa.markSync		= markSync;
	gfxPriv->kaa.waitMarker		= waitMarker;
	gfxPriv->kaa.PrepareSolid	= PrepareSolid;
	gfxPriv->kaa.Solid		= Solid;
	gfxPriv->kaa.DoneSolid		= DoneSolid;
	gfxPriv->kaa.PrepareCopy	= PrepareCopy;
	gfxPriv->kaa.Copy		= Copy;
	gfxPriv->kaa.DoneCopy		= DoneCopy;
	gfxPriv->kaa.PrepareBlend	= PrepareBlend;
	gfxPriv->kaa.Blend		= Blend;
	gfxPriv->kaa.DoneBlend		= DoneBlend;
	gfxPriv->kaa.CheckComposite	= CheckComposite;
	gfxPriv->kaa.PrepareComposite	= PrepareComposite;
	gfxPriv->kaa.Composite		= Composite;
	gfxPriv->kaa.DoneComposite	= DoneComposite;
	gfxPriv->kaa.PrepareTrapezoids	= PrepareTrapezoids;
	gfxPriv->kaa.Trapezoids		= Trapezoids;
	gfxPriv->kaa.DoneTrapezoids	= DoneTrapezoids;
	gfxPriv->kaa.UploadToScreen	= UploadToScreen;
	gfxPriv->kaa.UploadToScratch	= UploadToScratch;

	/* Open the character device to talk to the kernel pxa3xx-gc driver */
	gfxPriv->cdev_fd = open(GC_CDEV_PATH, O_RDWR);
	if ( gfxPriv->cdev_fd < 0 ){
		dbg("GFX: Could not open device '%s'.\n", GC_CDEV_PATH);
		return FALSE;
	}

	/* reset the graphics controller and check its ready */
	ret = ioctl(gfxPriv->cdev_fd,  GCIO_RESET, NULL);
	if(ret){
		dbg("GFX: Controller reset failed or not ready. Not accelerating.\n");
		return FALSE;
	}

	gfxPriv->haveSetBuffers = FALSE; /* for double checking */

	if ( !kaaDrawInit(pScreen, &gfxPriv->kaa) ){
		dbg("GFX: -pxa3xxGFXInit(): kaaDrawInit failed\n");
		return FALSE;
	}
	
	return TRUE;
}

void pxa3xxGFXEnable(ScreenPtr pScreen) {
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */	

	dbg("+pxa3xxGFXEnable()\n");
	gfxPriv->enabled = TRUE;
	kaaMarkSync(pScreen);
	dbg("-pxa3xxGFXEnable()\n");
}

void pxa3xxGFXDisable(ScreenPtr pScreen) {
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */	

	dbg("+-pxa3xxGFXDisable()\n");
	gfxPriv->enabled = FALSE;
	/* err... */
}

void pxa3xxGFXFini(ScreenPtr pScreen) {
	
	kaaDrawFini(pScreen);
	pxa3xxGFX_Wait_Complete();
	
	close(globalPriv->cdev_fd);
	xfree(globalPriv);

	globalPriv = NULL;
}



/************* Command handlers *************/

int pxa3xxGFX_Wait_Complete(void){
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */
	int ret;

	ret = ioctl(gfxPriv->cdev_fd, GCIO_WAIT_ALL_COMPLETE, NULL);
	if( ret != 0 ){
		dbg("GFX: GCIO_WAIT_ALL_COMPLETE returned %i\n", ret);
	}
	return ret;
}

int pxa3xxGFXCmd_Ringbuff_Add(unsigned long *data, int len){
	int ret, wordCount;
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */

	if(!gfxPriv->haveSetBuffers){
		dbg("GFX: ************ command %08lx before buffer set **************\n", data[0]);
		exit(0);
	}

	//check the word count reflects the data length
	wordCount = (data[0] & 0x0F);
	if( len != ((wordCount+1) * 4) ){
		dbg("GFX: pxa3xxGFXCmd_Ringbuff_Add(): data length (%i) doesn't agree with word count (%i)\n",
			 len, wordCount); 
		return -1;
	}

	ret = ioctl(gfxPriv->cdev_fd, GCIO_GET_RINGBUFF_FREE, NULL);
	if( len > ret ){
		dbg("GFX: Not enough room (%i bytes) in ring buffer to add instruction (%i bytes).\n"
			, ret, len);
		return -1;
	}
	
	ret = ioctl(gfxPriv->cdev_fd, GCIO_ADD_COMMAND, data);
	if( ret < 0 )
		dbg("GFX: GCIO_ADD_COMMAND returned %i.\n", ret);
	
	return ret;
}

/** Executes a nop command */
int pxa3xxGFXCmd_nop(unsigned long nop_id){
	unsigned long data = GC_NOP | ((nop_id & 0x000FFFFF) << 4);
	dbg("GFX: Adding NOP command with nop_id %08lx.\n", nop_id);	

	return pxa3xxGFXCmd_Ringbuff_Add(&data, 4); /* is a single long only */
}

/** Executes a set buffer command */
int pxa3xxGFXCmd_set_buff(unsigned int buff, 
				unsigned long base, unsigned char step,
				unsigned int stride, unsigned char pixelFormat){
	unsigned long data[3];
	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */

	if(!base || pixelFormat == PF_INVALID) // ignore invalid requests
		return -1;

	if(buff == GC_BUFFI_SOURCE0){
		gfxPriv->src0Step = step;
		gfxPriv->src0Stride = stride;
		gfxPriv->src0PixelFormat = pixelFormat;
	}


//	dbg("GFX: Adding set buffer command: buff=%02x, base=%08lx, step=%i, stride=%i, pxfmt=%01x.\n",
//				buff, base, step, stride, pixelFormat);	

	data[0] = GC_BUFFI | (buff << GC_BUFFI_ADDR_BIT) | 0x02;
	data[1] = base;
	data[2] = ((pixelFormat & 0x0F) << 19) | ((stride & 0x3FFF) << 5) | (step & 0x0F);

	gfxPriv->haveSetBuffers = TRUE;
	return pxa3xxGFXCmd_Ringbuff_Add(data, 12);
}

/** Executes a color fill command */
int pxa3xxGFXCmd_color_fill(int x0, int y0, int w, int h, 
		unsigned char pixelFormat, unsigned long color){
	unsigned long data[5];
//	dbg("GFX: Color fill at (%i,%i) by (%ix%i) in colour 0x%08lx of format 0x%01x\n", x0, y0, w, h, color, pixelFormat);

	data[0] = GC_CFILL | (0 << 20) | ((pixelFormat & 0x0F) << 8) | GC_CFILL_IMM | 4;
	data[1] = x0 & 0x07FF;
	data[2] = y0 & 0x07FF;
	data[3] = ((h & 0x07FF) << 16) | (w & 0x07FF);
	data[4] = color;

	return pxa3xxGFXCmd_Ringbuff_Add(data, 20);
}

/** Executes a stretch BLT. 
 * TODO: Given the comment mid-way through deceminate_blt(), this
 * and deceminate_blt() are the same code.
 */
int pxa3xxGFXCmd_stretch_blt(
		int sx0, int sy0, int sw, int sh,
		int dx0, int dy0, int dw, int dh ) {

	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */

	unsigned long data[9];
	unsigned int x_str_int, x_str_frac;
	unsigned int y_str_int, y_str_frac;
	unsigned const static short maxWidth[] = { 497, 249, 167, 126, 63 };

//	dbg("GFX: Stretch BLT (%i,%i)(%ix%i) -> (%i,%i)(%ix%i)\n",
//			 sx0, sy0, sw, sh, dx0, dy0, dw, dh );

	if(sw < 2 || dw < 2 || sh < 2 || dh < 2){ /* TODO: Implement exceptions */
		dbg("GFX: Stretch BLT can't cope with src/dst width/height < 2 (not implemented)\n");
		return -EINVAL;
	}

	if(gfxPriv->src0Step < 1 || gfxPriv->src0Step > 8){
		dbg("GFX: Stretch BLT with invalid step size of source0: %i\n",
			gfxPriv->src0Step);
		return -EINVAL;
	}
	if(sw > maxWidth[gfxPriv->src0Step-1]){
		dbg("GFX: Stretch BLT width %i too big (> %i) for step size: %i\n",
			sw, maxWidth[gfxPriv->src0Step-1], gfxPriv->src0Step);
		return -EINVAL;
	}

	x_str_int = 0;
	if(sw > dw){
		dbg("GFX: Stretch BLT with SW > DW\n");
		return -1;
	}else if(sw == dw)
		x_str_frac = 0x3FF;
	else
		x_str_frac = (((sw - 1) * 0x400) / (dw - 1)) & 0x3FF;
	

	y_str_int = 0;
	if(sh > dh){
		dbg("GFX: Stretch BLT with SH > DH\n");
		return -1;
	}else if(sh == dh)
		y_str_frac = 0x3FF;
	else
		y_str_frac = (((sh - 1) * 0x400) / (dh - 1)) & 0x3FF;

//	dbg("GFX: xstr=%04x.%04x ystr=%04x.%04x\n",
//			x_str_int, x_str_frac, y_str_int, y_str_frac);

	data[0] = GC_STRBLT | 8;
	data[1] = dx0 & 0x07FF;
	data[2] = dy0 & 0x07FF;
	data[3] = sx0 & 0x07FF;
	data[4] = sy0 & 0x07FF;
	data[5] = ((sh & 0x07FF) << 16) | (sw & 0x07FF);
	data[6] = ((dh & 0x07FF) << 16) | (dw & 0x07FF);
	data[7] = ((x_str_int & 0x03FF) << 16) | (x_str_frac & 0x03FF);
	data[8] = ((y_str_int & 0x03FF) << 16) | (y_str_frac & 0x03FF);

	return pxa3xxGFXCmd_Ringbuff_Add(data, 36);	
}


/** Executes a deceminate BLT.
 * TODO: Given the comment mid-way through deceminate_blt(), this
 * and stretch_blt() are the same code.
 */
int pxa3xxGFXCmd_deceminate_blt(
		int sx0, int sy0, int sw, int sh,
		int dx0, int dy0, int dw, int dh ) {

	struct pxa3xxAccelPriv *gfxPriv = globalPriv;  /* FIXME: Make dynamic */

	unsigned long data[9];
	unsigned int x_str_int, x_str_frac;
	unsigned int y_str_int, y_str_frac;
	unsigned const static short maxWidth[] = { 497, 249, 167, 126, 63 };

//	dbg("GFX: Deceminate BLT (%i,%i)(%ix%i) -> (%i,%i)(%ix%i)\n",
//			 sx0, sy0, sw, sh, dx0, dy0, dw, dh );

	if(sw < 2 || dw < 2 || sh < 2 || dh < 2){ /* TODO: Implement exceptions */
		dbg("GFX: Deceminate BLT can't cope with src/dst width/height < 2 (not implemented)\n");
		return -EINVAL;
	}

	if(gfxPriv->src0Step < 1 || gfxPriv->src0Step > 8){
		dbg("GFX: Deceminate BLT with invalid step size of source0: %i\n",
			gfxPriv->src0Step);
		return -EINVAL;
	}

	if(sw > maxWidth[gfxPriv->src0Step-1]){
		/* FIXME: temporary
		 * Split the op half way and go around again
		 * hoping the split lines up with a pixel 
		 * on the dest rect */
		int ret;
		int sw1 = (sw/2); // 1/2 rounded down
		int sw2 = sw - sw1; // 1/2 rounded up 
		int dw1 = (dw/2);
		int dw2 = dw - dw1;
		ret = pxa3xxGFXCmd_deceminate_blt(sx0, sy0, sw1, sh, dx0, dy0, dw1, dh);
		if(ret<0)return ret;
		ret = pxa3xxGFXCmd_deceminate_blt(sx0+sw1, sy0, sw2, sh, dx0+dw1, dy0, dw2, dh);
		if(ret<0)return ret;

		return 0;

//		dbg("GFX: Deceminate BLT width %i too big (> %i) for step size: %i\n",
//			sw, maxWidth[gfxPriv->src0Step-1], gfxPriv->src0Step);

//		return -EINVAL;
	}

/*	The docs say you can only shrink with this function and should
	use the stretch BLT to shretch. However it seems it does work 
	here and it's useful if you need to shrink in one direction
	and grow in the other... 

	TODO: Considering which we should check the dest rects for overwidth aswell */

/*	if(sw < dw){
		dbg("GFX: Deceminate BLT with SW < DW\n");
		return -1;
	}else */if(sw == dw){
		x_str_int = 0;
		x_str_frac = 0x3FF;
	}else{
		x_str_frac = ((sw - 1) * 0x400) / (dw - 1);
		x_str_int = (x_str_frac >> 10) & 0x3FF;
		x_str_frac &= 0x3FF;
	}
	
/*	if(sh < dh){
		dbg("GFX: Deceminate BLT with SH < DH\n");
		return -1;
	}else*/ if(sh == dh){
		y_str_int = 0;
		y_str_frac = 0x3FF;
	}else{
		y_str_frac = ((sh - 1) * 0x400) / (dh - 1);
		y_str_int = (y_str_frac >> 10) & 0x3FF;
		y_str_frac &= 0x3FF;
	}

//	dbg("GFX: xstr=%04x.%04x ystr=%04x.%04x\n",
//			x_str_int, x_str_frac, y_str_int, y_str_frac);

	data[0] = GC_DECBLT | 8;
	data[1] = dx0 & 0x07FF;
	data[2] = dy0 & 0x07FF;
	data[3] = sx0 & 0x07FF;
	data[4] = sy0 & 0x07FF;
	data[5] = ((sh & 0x07FF) << 16) | (sw & 0x07FF);
	data[6] = ((dh & 0x07FF) << 16) | (dw & 0x07FF);
	data[7] = ((x_str_int & 0x03FF) << 16) | (x_str_frac & 0x03FF);
	data[8] = ((y_str_int & 0x03FF) << 16) | (y_str_frac & 0x03FF);

	return pxa3xxGFXCmd_Ringbuff_Add(data, 36);	
}

inline int pxa3xxGFXCmd_copy_blt(int s0x, int s0y, int dx, int dy, int w, int h) {
	return pxa3xxGFXCmd_raster_blt(s0x, s0y, 0, 0, dx, dy, w, h, 0xCC);
}

int pxa3xxGFXCmd_raster_blt(int s0x, int s0y, int s1x, int s1y, int dx, int dy, int w, int h, unsigned char op) {
	unsigned long data[8];

	data[0] = GC_RAST | ((op & 0xFF) << 16) | 7;
	data[1] = dx & 0x07FF;
	data[2] = dy & 0x07FF;
	data[3] = s0x & 0x07FF;
	data[4] = s0y & 0x07FF;
	data[5] = s1x & 0x07FF;
	data[6] = s1y & 0x07FF;
	data[7] = ((h & 0x07FF) << 16) | (w & 0x07FF);

	return pxa3xxGFXCmd_Ringbuff_Add(data, 32);
}


