// 模板体,bitblt_body.c
//////////////////////////////////////////////////////////////////////////
 
MCASE(dstBits, srcBits, bitOpCode) {
	callerVars()
	int dstRowBytes = ROW_BYTES_2(dstBits, dst);
	int srcRowBytes = ROW_BYTES_2(srcBits, src);
	int i;
	PBYTE dstRow = dst->data + dy * dstRowBytes + PIX_POS_2(dstBits, dx);
	PBYTE srcRow = src->data + sy * srcRowBytes + PIX_POS_2(srcBits, sx);
	for (i = cy; i; --i) {
		PBYTE dstCol = dstRow;
		PBYTE srcCol = srcRow;
		int j;
		for (j = cx; j; --j) {
			condition(dstCol, srcCol)
				callBitOp(dstCol, srcCol);
		}
		dstRow += dstRowBytes;
		srcRow += srcRowBytes;
	}
}
return 0;
 
#undef bitOpCode
#undef doBitOp
 
 
// 相同象素位的不同位操作(14种)
 
#define bitOpCode    opCopy
#define doBitOp(d, s)      d = s
#include "bitblt_body.c"
 
#define bitOpCode    opAnd
#define doBitOp(d, s)      d &= s
#include "bitblt_body.c"
 
#define bitOpCode    opOr
#define doBitOp(d, s)      d |= s
#include "bitblt_body.c"
 
#define bitOpCode    opXor
#define doBitOp(d, s)      d ^= s
#include "bitblt_body.c"
 
#define bitOpCode    opNotSrc
#define doBitOp(d, s)      d = ~s
#include "bitblt_body.c"
 
#define bitOpCode    opNotAnd
#define doBitOp(d, s)      d = ~(d & s)
#include "bitblt_body.c"
 
#define bitOpCode    opNotOr
#define doBitOp(d, s)      d = ~(d | s)
#include "bitblt_body.c"
 
#define bitOpCode    opNotXor
#define doBitOp(d, s)      d = ~(d ^ s)
#include "bitblt_body.c"
 
#define bitOpCode    opNotDestAnd
#define doBitOp(d, s)      d = ~d & s
#include "bitblt_body.c"
 
#define bitOpCode    opNotDestOr
#define doBitOp(d, s)      d = ~d | s
#include "bitblt_body.c"
 
#define bitOpCode    opNotDestXor
#define doBitOp(d, s)      d = ~d ^ s
#include "bitblt_body.c"
 
#define bitOpCode    opNotSrcAnd
#define doBitOp(d, s)      d &= ~s
#include "bitblt_body.c"
 
#define bitOpCode    opNotSrcOr
#define doBitOp(d, s)      d |= ~s
#include "bitblt_body.c"
 
#define bitOpCode    opNotSrcXor
#define doBitOp(d, s)      d ^= ~s
#include "bitblt_body.c"
 
#undef callBitOp
#undef dstBits
#undef srcBits
#undef pSrcToColor
 
 
// blt_body.c, 所有象素位数的模板,这里只定义了 16,24,32三种象素的相互操作
 
// dst --- 16
#define dstBits  16
#define srcBits  16
#define pSrcToColor  PCOLOR16_TO_32
#define callBitOp(pd, ps)  doBitOp(*(UINT16*)pd,*(UINT16*)ps), pd+=2,ps+=2
#include "bitblt_op.c"
 
#define dstBits  16
#define srcBits  24
#define pSrcToColor  PCOLOR24_TO_32
#define callBitOp(pd, ps) doBitOp(*(UINT16*)pd, PCOLOR24_TO_16(ps)), 
pd +=2, ps += 3
#include "bitblt_op.c"
 
#define dstBits  16
#define srcBits  32
#define pSrcToColor(x)  *(x)
#define callBitOp(pd, ps) doBitOp(*(UINT16*)pd, PCOLOR32_TO_16(ps)), 
pd +=2, ps += 4
#include "bitblt_op.c"
 
// dst --- 24
#define dstBits  24
#define srcBits  16
#define pSrcToColor  PCOLOR16_TO_32
#define callBitOp(pd, ps) doBitOp(pd[0], C16_R(*(UINT16*)ps)), 
doBitOp(pd[1], C16_G(*(UINT16*)ps)), 
	doBitOp(pd[2], C16_B(*(UINT16*)ps)), 
	pd +=3, ps += 2
#include "bitblt_op.c"
 
#define dstBits  24
#define srcBits  24
#define pSrcToColor  PCOLOR24_TO_32
#define callBitOp(pd, ps) doBitOp(pd[0], ps[0]), 
	doBitOp(pd[1], ps[1]), 
	doBitOp(pd[2], ps[2]),  pd +=3, ps += 3
#include "bitblt_op.c"
 
#define dstBits  24
#define srcBits  32
#define pSrcToColor(x)  *(x)
#define callBitOp(pd, ps) doBitOp(pd[0], C32_R(*ps)), 
	doBitOp(pd[1], C32_G(*ps)), 
	doBitOp(pd[2], C32_B(*ps)), pd +=3, ps += 4
#include "bitblt_op.c"
 
	// dst --- 32
#define dstBits  32
#define srcBits  16
#define pSrcToColor  PCOLOR16_TO_32
#define callBitOp(pd, ps)  doBitOp(*(UINT32*)pd, PCOLOR16_TO_32(ps)), 
	pd +=4, ps += 2
#include "bitblt_op.c"
 
#define dstBits  32
#define srcBits  24
#define pSrcToColor  PCOLOR24_TO_32
#define callBitOp(pd, ps) doBitOp(*(UINT32*)pd, PCOLOR24_TO_32(ps)), 
	pd +=4, ps += 3
#include "bitblt_op.c"
 
#define dstBits  32
#define srcBits  32
#define pSrcToColor(x)  *(x)
#define callBitOp(pd, ps)  doBitOp(*(UINT32*)pd,*(UINT32*)ps), pd+=4, ps+=4
#include "bitblt_op.c"
 
 
	// 函数体,适用于 bitblt 和 mergeblt,模板参数为 condition
 
	static const unsigned char jump_table[] = {
		0,    bit_1,       bit_2, 0, bit_4 , 0, 0, 0,
		bit_8 ,        0,           0, 0, bit_12, 0, 0, 0,
		bit_16,              0,           0, 0,        0, 0, 0, 0,
		bit_24,        0,     0, 0,        0, 0, 0, 0,
		bit_32
	};
	int jump_index;
 
	if (dst->colorBits > 32) return -1;
	if (src->colorBits > 32) return -2;
	if ((unsigned int)op >= (unsigned int)BinaryOpCode_Radix)
		return -4;
 
	jump_index =
		jump_table[dst->colorBits] +
		jump_table[src->colorBits] * bitRadix +  op * bitRadix*bitRadix;
 
	switch (jump_index)
	{
	default:
		if (0 == jump_table[dst->colorBits])
			return -1; // dst->colorBits error
		else if (0 == jump_table[src->colorBits])
			return -2; // src->colorBits error
		else
			return -3; // not support
#include "blt_body.c"
	}
 
	return -5; // it will not goes here
 
#undef  condition
 
 
 
// gdi.h 定义图像基本类型和常量,及 utilities
 
#ifndef __GDI_H__
#define __GDI_H__
 
typedef enum BinaryOpCode
{
	opCopy,              // dst = src
	opXor,        // dst = src ^ dst
	opAnd,        // dst = src & dst
	opOr,         // dst = src | dst
 
	opNotSrc,     // dst = ~src
	opNotAnd,     // dst = ~(src & dst)
	opNotOr,      // dst = ~(src | dst)
	opNotXor,     // dst = ~(src ^ dst)
 
	opNotDestAnd, // dst = ~dst &  src
	opNotDestOr,  // dst = ~dst |  src
	opNotDestXor, // dst = ~dst ^  src
	opNotSrcAnd,  // dst =  dst & ~src
	opNotSrcOr,          // dst =  dst | ~src
	opNotSrcXor,  // dst =  dst ^ ~src
 
	//     opSet,        // dst = 1
	//     opClear,      // dst = 0
 
	BinaryOpCode_Radix
} BinaryOpCode;
 
typedef unsigned char BYTE, *PBYTE;
typedef unsigned long COLOR;
typedef unsigned short UINT16;
typedef unsigned long  UINT32;
 
typedef struct _GdiDevice
{
	unsigned int colorBits;
	int           width;
	int           height;
	COLOR  transparentColor;
	int           transparentTolerance;
	COLOR* pallete;
	PBYTE  data;
} GdiDevice;
 
#define ROW_BYTES_2(colorBits, gdi) ((7 + (gdi)->width * colorBits) >> 3)
#define PIX_POS_2(colorBits, x) ((x * colorBits) >> 3)
 
#define ROW_BYTES(gdi) ((7 + (gdi)->width * (gdi)->colorBits) >> 3)
#define PIX_POS(gdi, x) ((x * (gdi)->colorBits) >> 3)
 
#endif
 
 
// bitblt.c
// bitblt 函数和 mergeblt的函数体
 
#include "gdi.h"
 
enum ColorBitKind
{
	bitError = 0,
	bit_1 = 1,
	bit_2 = 2,
	bit_4,
	bit_8,
	bit_12,
	bit_16,
	bit_24,
	bit_32,
	bitRadix
};
 
#define CAT_TOKEN1(t1, t2)  t1##t2
#define CAT_TOKEN(t1, t2)   CAT_TOKEN1(t1, t2)
 
#define MCASE(dstBits, srcBits, bitOpCode)      
   case CAT_TOKEN(bit_, dstBits) + 
        CAT_TOKEN(bit_, srcBits)*bitRadix + bitOpCode*bitRadix*bitRadix:
 
 
// 00RR-RRRR-RRRR-GGGG-GGGG-GGBB-BBBB-BBBB
// 0x3FF00000  0x000FFC00  0x000003FF
#define C32_R(c)     (c >> 20 & 0x3FF)
#define C32_G(c)     (c >> 10 & 0x3FF)
#define C32_B(c)     (c     & 0x3FF)
 
#define C16_R(c)     (c >> 10 & 0x1F)
#define C16_G(c)     (c >>  5 & 0x1F)
#define C16_B(c)     (c     & 0x1F)
 
#define PCOLOR24_TO_16(p24)  (
		((UINT16)p24[2]&0xF8) << 8 |
		((UINT16)p24[1]&0xF8) << 3 |
				 p24[0]       >> 3)
// 这几种颜色转换未实现
#define PCOLOR32_TO_16(p32)  0
 
#define PCOLOR16_TO_24(p16)  0
#define PCOLOR32_TO_24(p16)  0
 
#define PCOLOR16_TO_32(p24)  0
#define PCOLOR24_TO_32(p24)  0
 
// empty...
#define callerVars()
 
int bitblt(GdiDevice* dst, const GdiDevice* src,
		int dx, int dy, int cx, int cy, int sx, int sy, BinaryOpCode op)
{
#define condition(pd, ps)
#include "template/fun_body.c"
}
 
int mergeblt(GdiDevice* dst, const GdiDevice* src,
		int dx, int dy, int cx, int cy, int sx, int sy, BinaryOpCode op)
{
	// condition no tolerance...
#define condition(pd, ps)  if (pSrcToColor(ps) != src->transparentColor)
#include "template/fun_body.c"
}