WinUAE/pcem/vid_voodoo_render.cpp
2022-12-25 19:59:01 +02:00

1660 lines
72 KiB
C++

#include <intrin.h>
#include <math.h>
#include <stddef.h>
#include "ibm.h"
#include "device.h"
#include "mem.h"
#include "thread.h"
#include "video.h"
#include "vid_svga.h"
#include "vid_voodoo.h"
#include "vid_voodoo_common.h"
#include "vid_voodoo_dither.h"
#include "vid_voodoo_regs.h"
#include "vid_voodoo_render.h"
#include "vid_voodoo_texture.h"
typedef struct voodoo_state_t
{
int xstart, xend, xdir;
uint32_t base_r, base_g, base_b, base_a, base_z;
struct
{
int64_t base_s, base_t, base_w;
int lod;
} tmu[2];
int64_t base_w;
int lod;
int lod_min[2], lod_max[2];
int dx1, dx2;
int y, yend, ydir;
int32_t dxAB, dxAC, dxBC;
int tex_b[2], tex_g[2], tex_r[2], tex_a[2];
int tex_s, tex_t;
int clamp_s[2], clamp_t[2];
int32_t vertexAx, vertexAy, vertexBx, vertexBy, vertexCx, vertexCy;
uint32_t *tex[2][LOD_MAX+1];
int tformat;
int *tex_w_mask[2];
int *tex_h_mask[2];
int *tex_shift[2];
int *tex_lod[2];
uint16_t *fb_mem, *aux_mem;
int32_t ib, ig, ir, ia;
int32_t z;
int32_t new_depth;
int64_t tmu0_s, tmu0_t;
int64_t tmu0_w;
int64_t tmu1_s, tmu1_t;
int64_t tmu1_w;
int64_t w;
int pixel_count, texel_count;
int x, x2, x_tiled;
uint32_t w_depth;
float log_temp;
uint32_t ebp_store;
uint32_t texBaseAddr;
int lod_frac[2];
} voodoo_state_t;
static int voodoo_output = 0;
static uint8_t logtable[256] =
{
0x00,0x01,0x02,0x04,0x05,0x07,0x08,0x09,0x0b,0x0c,0x0e,0x0f,0x10,0x12,0x13,0x15,
0x16,0x17,0x19,0x1a,0x1b,0x1d,0x1e,0x1f,0x21,0x22,0x23,0x25,0x26,0x27,0x28,0x2a,
0x2b,0x2c,0x2e,0x2f,0x30,0x31,0x33,0x34,0x35,0x36,0x38,0x39,0x3a,0x3b,0x3d,0x3e,
0x3f,0x40,0x41,0x43,0x44,0x45,0x46,0x47,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x50,0x51,
0x52,0x53,0x54,0x55,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x60,0x61,0x62,0x63,
0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,
0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x80,0x81,0x83,0x84,0x85,
0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8c,0x8d,0x8e,0x8f,0x90,0x91,0x92,0x93,0x94,
0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,0xa0,0xa1,0xa2,0xa2,0xa3,
0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
0xb3,0xb4,0xb5,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbc,0xbd,0xbe,0xbf,0xc0,
0xc1,0xc2,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xcd,
0xce,0xcf,0xd0,0xd1,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd6,0xd7,0xd8,0xd9,0xda,0xda,
0xdb,0xdc,0xdd,0xde,0xde,0xdf,0xe0,0xe1,0xe1,0xe2,0xe3,0xe4,0xe5,0xe5,0xe6,0xe7,
0xe8,0xe8,0xe9,0xea,0xeb,0xeb,0xec,0xed,0xee,0xef,0xef,0xf0,0xf1,0xf2,0xf2,0xf3,
0xf4,0xf5,0xf5,0xf6,0xf7,0xf7,0xf8,0xf9,0xfa,0xfa,0xfb,0xfc,0xfd,0xfd,0xfe,0xff
};
static inline int fastlog(uint64_t val)
{
uint64_t oldval = val;
int exp = 63;
int frac;
if (!val || val & (1ULL << 63))
return 0x80000000;
if (!(val & 0xffffffff00000000))
{
exp -= 32;
val <<= 32;
}
if (!(val & 0xffff000000000000))
{
exp -= 16;
val <<= 16;
}
if (!(val & 0xff00000000000000))
{
exp -= 8;
val <<= 8;
}
if (!(val & 0xf000000000000000))
{
exp -= 4;
val <<= 4;
}
if (!(val & 0xc000000000000000))
{
exp -= 2;
val <<= 2;
}
if (!(val & 0x8000000000000000))
{
exp -= 1;
val <<= 1;
}
if (exp >= 8)
frac = (oldval >> (exp - 8)) & 0xff;
else
frac = (oldval << (8 - exp)) & 0xff;
return (exp << 8) | logtable[frac];
}
static inline int voodoo_fls(uint16_t val)
{
int num = 0;
//pclog("fls(%04x) = ", val);
if (!(val & 0xff00))
{
num += 8;
val <<= 8;
}
if (!(val & 0xf000))
{
num += 4;
val <<= 4;
}
if (!(val & 0xc000))
{
num += 2;
val <<= 2;
}
if (!(val & 0x8000))
{
num += 1;
val <<= 1;
}
//pclog("%i %04x\n", num, val);
return num;
}
typedef struct voodoo_texture_state_t
{
int s, t;
int w_mask, h_mask;
int tex_shift;
} voodoo_texture_state_t;
static inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int tmu)
{
uint32_t dat;
if (texture_state->s & ~texture_state->w_mask)
{
if (state->clamp_s[tmu])
{
if (texture_state->s < 0)
texture_state->s = 0;
if (texture_state->s > texture_state->w_mask)
texture_state->s = texture_state->w_mask;
}
else
texture_state->s &= texture_state->w_mask;
}
if (texture_state->t & ~texture_state->h_mask)
{
if (state->clamp_t[tmu])
{
if (texture_state->t < 0)
texture_state->t = 0;
if (texture_state->t > texture_state->h_mask)
texture_state->t = texture_state->h_mask;
}
else
texture_state->t &= texture_state->h_mask;
}
dat = state->tex[tmu][state->lod][texture_state->s + (texture_state->t << texture_state->tex_shift)];
state->tex_b[tmu] = dat & 0xff;
state->tex_g[tmu] = (dat >> 8) & 0xff;
state->tex_r[tmu] = (dat >> 16) & 0xff;
state->tex_a[tmu] = (dat >> 24) & 0xff;
}
#define LOW4(x) ((x & 0x0f) | ((x & 0x0f) << 4))
#define HIGH4(x) ((x & 0xf0) | ((x & 0xf0) >> 4))
static inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int s, int t, int *d, int tmu, int x)
{
rgba_u dat[4];
if (((s | (s + 1)) & ~texture_state->w_mask) || ((t | (t + 1)) & ~texture_state->h_mask))
{
int c;
for (c = 0; c < 4; c++)
{
int _s = s + (c & 1);
int _t = t + ((c & 2) >> 1);
if (_s & ~texture_state->w_mask)
{
if (state->clamp_s[tmu])
{
if (_s < 0)
_s = 0;
if (_s > texture_state->w_mask)
_s = texture_state->w_mask;
}
else
_s &= texture_state->w_mask;
}
if (_t & ~texture_state->h_mask)
{
if (state->clamp_t[tmu])
{
if (_t < 0)
_t = 0;
if (_t > texture_state->h_mask)
_t = texture_state->h_mask;
}
else
_t &= texture_state->h_mask;
}
dat[c].u = state->tex[tmu][state->lod][_s + (_t << texture_state->tex_shift)];
}
}
else
{
dat[0].u = state->tex[tmu][state->lod][s + (t << texture_state->tex_shift)];
dat[1].u = state->tex[tmu][state->lod][s + 1 + (t << texture_state->tex_shift)];
dat[2].u = state->tex[tmu][state->lod][s + ((t + 1) << texture_state->tex_shift)];
dat[3].u = state->tex[tmu][state->lod][s + 1 + ((t + 1) << texture_state->tex_shift)];
}
state->tex_r[tmu] = (dat[0].rgba.r * d[0] + dat[1].rgba.r * d[1] + dat[2].rgba.r * d[2] + dat[3].rgba.r * d[3]) >> 8;
state->tex_g[tmu] = (dat[0].rgba.g * d[0] + dat[1].rgba.g * d[1] + dat[2].rgba.g * d[2] + dat[3].rgba.g * d[3]) >> 8;
state->tex_b[tmu] = (dat[0].rgba.b * d[0] + dat[1].rgba.b * d[1] + dat[2].rgba.b * d[2] + dat[3].rgba.b * d[3]) >> 8;
state->tex_a[tmu] = (dat[0].rgba.a * d[0] + dat[1].rgba.a * d[1] + dat[2].rgba.a * d[2] + dat[3].rgba.a * d[3]) >> 8;
}
static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x)
{
voodoo_texture_state_t texture_state;
int d[4];
int s, t;
int tex_lod = state->tex_lod[tmu][state->lod];
texture_state.w_mask = state->tex_w_mask[tmu][state->lod];
texture_state.h_mask = state->tex_h_mask[tmu][state->lod];
texture_state.tex_shift = 8 - tex_lod;
if (params->tLOD[tmu] & LOD_TMIRROR_S)
{
if (state->tex_s & 0x1000)
state->tex_s = ~state->tex_s;
}
if (params->tLOD[tmu] & LOD_TMIRROR_T)
{
if (state->tex_t & 0x1000)
state->tex_t = ~state->tex_t;
}
if (voodoo->bilinear_enabled && params->textureMode[tmu] & 6)
{
int _ds, dt;
state->tex_s -= 1 << (3+tex_lod);
state->tex_t -= 1 << (3+tex_lod);
s = state->tex_s >> tex_lod;
t = state->tex_t >> tex_lod;
_ds = s & 0xf;
dt = t & 0xf;
s >>= 4;
t >>= 4;
//if (x == 80)
//if (voodoo_output)
// pclog("s=%08x t=%08x _ds=%02x _dt=%02x\n", s, t, _ds, dt);
d[0] = (16 - _ds) * (16 - dt);
d[1] = _ds * (16 - dt);
d[2] = (16 - _ds) * dt;
d[3] = _ds * dt;
// texture_state.s = s;
// texture_state.t = t;
tex_read_4(state, &texture_state, s, t, d, tmu, x);
/* state->tex_r = (tex_samples[0].rgba.r * d[0] + tex_samples[1].rgba.r * d[1] + tex_samples[2].rgba.r * d[2] + tex_samples[3].rgba.r * d[3]) >> 8;
state->tex_g = (tex_samples[0].rgba.g * d[0] + tex_samples[1].rgba.g * d[1] + tex_samples[2].rgba.g * d[2] + tex_samples[3].rgba.g * d[3]) >> 8;
state->tex_b = (tex_samples[0].rgba.b * d[0] + tex_samples[1].rgba.b * d[1] + tex_samples[2].rgba.b * d[2] + tex_samples[3].rgba.b * d[3]) >> 8;
state->tex_a = (tex_samples[0].rgba.a * d[0] + tex_samples[1].rgba.a * d[1] + tex_samples[2].rgba.a * d[2] + tex_samples[3].rgba.a * d[3]) >> 8;*/
/* state->tex_r = tex_samples[0].r;
state->tex_g = tex_samples[0].g;
state->tex_b = tex_samples[0].b;
state->tex_a = tex_samples[0].a;*/
}
else
{
// rgba_t tex_samples;
// voodoo_texture_state_t texture_state;
// int s = state->tex_s >> (18+state->lod);
// int t = state->tex_t >> (18+state->lod);
// int s, t;
// state->tex_s -= 1 << (17+state->lod);
// state->tex_t -= 1 << (17+state->lod);
s = state->tex_s >> (4+tex_lod);
t = state->tex_t >> (4+tex_lod);
texture_state.s = s;
texture_state.t = t;
tex_read(state, &texture_state, tmu);
/* state->tex_r = tex_samples[0].rgba.r;
state->tex_g = tex_samples[0].rgba.g;
state->tex_b = tex_samples[0].rgba.b;
state->tex_a = tex_samples[0].rgba.a;*/
}
}
static inline void voodoo_tmu_fetch(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x)
{
if (params->textureMode[tmu] & 1)
{
int64_t _w = 0;
if (tmu)
{
if (state->tmu1_w)
_w = (int64_t)((1ULL << 48) / state->tmu1_w);
state->tex_s = (int32_t)(((((state->tmu1_s + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30);
state->tex_t = (int32_t)(((((state->tmu1_t + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30);
}
else
{
if (state->tmu0_w)
_w = (int64_t)((1ULL << 48) / state->tmu0_w);
state->tex_s = (int32_t)(((((state->tmu0_s + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30);
state->tex_t = (int32_t)(((((state->tmu0_t + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30);
}
state->lod = state->tmu[tmu].lod + (fastlog(_w) - (19 << 8));
}
else
{
if (tmu)
{
state->tex_s = (int32_t)(state->tmu1_s >> (14+14));
state->tex_t = (int32_t)(state->tmu1_t >> (14+14));
}
else
{
state->tex_s = (int32_t)(state->tmu0_s >> (14+14));
state->tex_t = (int32_t)(state->tmu0_t >> (14+14));
}
state->lod = state->tmu[tmu].lod;
}
if (state->lod < state->lod_min[tmu])
state->lod = state->lod_min[tmu];
else if (state->lod > state->lod_max[tmu])
state->lod = state->lod_max[tmu];
state->lod_frac[tmu] = state->lod & 0xff;
state->lod >>= 8;
voodoo_get_texture(voodoo, params, state, tmu, x);
}
/*Perform texture fetch and blending for both TMUs*/
static inline void voodoo_tmu_fetch_and_blend(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int x)
{
int r,g,b,a;
int c_reverse, a_reverse;
// int c_reverse1, a_reverse1;
int factor_r = 0, factor_g = 0, factor_b = 0, factor_a = 0;
voodoo_tmu_fetch(voodoo, params, state, 1, x);
if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && (state->lod & 1))
{
c_reverse = tc_reverse_blend;
a_reverse = tca_reverse_blend;
}
else
{
c_reverse = !tc_reverse_blend;
a_reverse = !tca_reverse_blend;
}
/* c_reverse1 = c_reverse;
a_reverse1 = a_reverse;*/
if (tc_sub_clocal_1)
{
switch (tc_mselect_1)
{
case TC_MSELECT_ZERO:
factor_r = factor_g = factor_b = 0;
break;
case TC_MSELECT_CLOCAL:
factor_r = state->tex_r[1];
factor_g = state->tex_g[1];
factor_b = state->tex_b[1];
break;
case TC_MSELECT_AOTHER:
factor_r = factor_g = factor_b = 0;
break;
case TC_MSELECT_ALOCAL:
factor_r = factor_g = factor_b = state->tex_a[1];
break;
case TC_MSELECT_DETAIL:
factor_r = (params->detail_bias[1] - state->lod) << params->detail_scale[1];
if (factor_r > params->detail_max[1])
factor_r = params->detail_max[1];
factor_g = factor_b = factor_r;
break;
case TC_MSELECT_LOD_FRAC:
factor_r = factor_g = factor_b = state->lod_frac[1];
break;
}
if (!c_reverse)
{
r = (-state->tex_r[1] * (factor_r + 1)) >> 8;
g = (-state->tex_g[1] * (factor_g + 1)) >> 8;
b = (-state->tex_b[1] * (factor_b + 1)) >> 8;
}
else
{
r = (-state->tex_r[1] * ((factor_r^0xff) + 1)) >> 8;
g = (-state->tex_g[1] * ((factor_g^0xff) + 1)) >> 8;
b = (-state->tex_b[1] * ((factor_b^0xff) + 1)) >> 8;
}
if (tc_add_clocal_1)
{
r += state->tex_r[1];
g += state->tex_g[1];
b += state->tex_b[1];
}
else if (tc_add_alocal_1)
{
r += state->tex_a[1];
g += state->tex_a[1];
b += state->tex_a[1];
}
state->tex_r[1] = CLAMP(r);
state->tex_g[1] = CLAMP(g);
state->tex_b[1] = CLAMP(b);
}
if (tca_sub_clocal_1)
{
switch (tca_mselect_1)
{
case TCA_MSELECT_ZERO:
factor_a = 0;
break;
case TCA_MSELECT_CLOCAL:
factor_a = state->tex_a[1];
break;
case TCA_MSELECT_AOTHER:
factor_a = 0;
break;
case TCA_MSELECT_ALOCAL:
factor_a = state->tex_a[1];
break;
case TCA_MSELECT_DETAIL:
factor_a = (params->detail_bias[1] - state->lod) << params->detail_scale[1];
if (factor_a > params->detail_max[1])
factor_a = params->detail_max[1];
break;
case TCA_MSELECT_LOD_FRAC:
factor_a = state->lod_frac[1];
break;
}
if (!a_reverse)
a = (-state->tex_a[1] * ((factor_a ^ 0xff) + 1)) >> 8;
else
a = (-state->tex_a[1] * (factor_a + 1)) >> 8;
if (tca_add_clocal_1 || tca_add_alocal_1)
a += state->tex_a[1];
state->tex_a[1] = CLAMP(a);
}
voodoo_tmu_fetch(voodoo, params, state, 0, x);
if ((params->textureMode[0] & TEXTUREMODE_TRILINEAR) && (state->lod & 1))
{
c_reverse = tc_reverse_blend;
a_reverse = tca_reverse_blend;
}
else
{
c_reverse = !tc_reverse_blend;
a_reverse = !tca_reverse_blend;
}
if (!tc_zero_other)
{
r = state->tex_r[1];
g = state->tex_g[1];
b = state->tex_b[1];
}
else
r = g = b = 0;
if (tc_sub_clocal)
{
r -= state->tex_r[0];
g -= state->tex_g[0];
b -= state->tex_b[0];
}
switch (tc_mselect)
{
case TC_MSELECT_ZERO:
factor_r = factor_g = factor_b = 0;
break;
case TC_MSELECT_CLOCAL:
factor_r = state->tex_r[0];
factor_g = state->tex_g[0];
factor_b = state->tex_b[0];
break;
case TC_MSELECT_AOTHER:
factor_r = factor_g = factor_b = state->tex_a[1];
break;
case TC_MSELECT_ALOCAL:
factor_r = factor_g = factor_b = state->tex_a[0];
break;
case TC_MSELECT_DETAIL:
factor_r = (params->detail_bias[0] - state->lod) << params->detail_scale[0];
if (factor_r > params->detail_max[0])
factor_r = params->detail_max[0];
factor_g = factor_b = factor_r;
break;
case TC_MSELECT_LOD_FRAC:
factor_r = factor_g = factor_b = state->lod_frac[0];
break;
}
if (!c_reverse)
{
r = (r * (factor_r + 1)) >> 8;
g = (g * (factor_g + 1)) >> 8;
b = (b * (factor_b + 1)) >> 8;
}
else
{
r = (r * ((factor_r^0xff) + 1)) >> 8;
g = (g * ((factor_g^0xff) + 1)) >> 8;
b = (b * ((factor_b^0xff) + 1)) >> 8;
}
if (tc_add_clocal)
{
r += state->tex_r[0];
g += state->tex_g[0];
b += state->tex_b[0];
}
else if (tc_add_alocal)
{
r += state->tex_a[0];
g += state->tex_a[0];
b += state->tex_a[0];
}
if (!tca_zero_other)
a = state->tex_a[1];
else
a = 0;
if (tca_sub_clocal)
a -= state->tex_a[0];
switch (tca_mselect)
{
case TCA_MSELECT_ZERO:
factor_a = 0;
break;
case TCA_MSELECT_CLOCAL:
factor_a = state->tex_a[0];
break;
case TCA_MSELECT_AOTHER:
factor_a = state->tex_a[1];
break;
case TCA_MSELECT_ALOCAL:
factor_a = state->tex_a[0];
break;
case TCA_MSELECT_DETAIL:
factor_a = (params->detail_bias[0] - state->lod) << params->detail_scale[0];
if (factor_a > params->detail_max[0])
factor_a = params->detail_max[0];
break;
case TCA_MSELECT_LOD_FRAC:
factor_a = state->lod_frac[0];
break;
}
if (a_reverse)
a = (a * ((factor_a ^ 0xff) + 1)) >> 8;
else
a = (a * (factor_a + 1)) >> 8;
if (tca_add_clocal || tca_add_alocal)
a += state->tex_a[0];
state->tex_r[0] = CLAMP(r);
state->tex_g[0] = CLAMP(g);
state->tex_b[0] = CLAMP(b);
state->tex_a[0] = CLAMP(a);
if (tc_invert_output)
{
state->tex_r[0] ^= 0xff;
state->tex_g[0] ^= 0xff;
state->tex_b[0] ^= 0xff;
}
if (tca_invert_output)
state->tex_a[0] ^= 0xff;
}
#if (defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined WIN32 || defined _WIN32 || defined _WIN32) && !(defined __amd64__)
#include "vid_voodoo_codegen_x86.h"
#elif (defined __amd64__)
#include "vid_voodoo_codegen_x86-64.h"
#else
int voodoo_recomp = 0;
#endif
static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int ystart, int yend, int odd_even)
{
/* int rgb_sel = params->fbzColorPath & 3;
int a_sel = (params->fbzColorPath >> 2) & 3;
int cc_localselect = params->fbzColorPath & (1 << 4);
int cca_localselect = (params->fbzColorPath >> 5) & 3;
int cc_localselect_override = params->fbzColorPath & (1 << 7);
int cc_zero_other = params->fbzColorPath & (1 << 8);
int cc_sub_clocal = params->fbzColorPath & (1 << 9);
int cc_mselect = (params->fbzColorPath >> 10) & 7;
int cc_reverse_blend = params->fbzColorPath & (1 << 13);
int cc_add = (params->fbzColorPath >> 14) & 3;
int cc_add_alocal = params->fbzColorPath & (1 << 15);
int cc_invert_output = params->fbzColorPath & (1 << 16);
int cca_zero_other = params->fbzColorPath & (1 << 17);
int cca_sub_clocal = params->fbzColorPath & (1 << 18);
int cca_mselect = (params->fbzColorPath >> 19) & 7;
int cca_reverse_blend = params->fbzColorPath & (1 << 22);
int cca_add = (params->fbzColorPath >> 23) & 3;
int cca_invert_output = params->fbzColorPath & (1 << 25);
int src_afunc = (params->alphaMode >> 8) & 0xf;
int dest_afunc = (params->alphaMode >> 12) & 0xf;
int alpha_func = (params->alphaMode >> 1) & 7;
int a_ref = params->alphaMode >> 24;
int depth_op = (params->fbzMode >> 5) & 7;
int dither = params->fbzMode & FBZ_DITHER;*/
int texels;
int c;
#ifndef NO_CODEGEN
uint8_t (__cdecl *voodoo_draw)(voodoo_state_t *state, voodoo_params_t *params, int x, int real_y);
#endif
int y_diff = SLI_ENABLED ? 2 : 1;
int y_origin = (voodoo->type >= VOODOO_BANSHEE) ? voodoo->y_origin_swap : (voodoo->v_disp-1);
if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH ||
(params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL)
texels = 1;
else
texels = 2;
state->clamp_s[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPS;
state->clamp_t[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPT;
state->clamp_s[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPS;
state->clamp_t[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPT;
// int last_x;
// pclog("voodoo_triangle : bottom-half %X %X %X %X %X %i %i %i %i\n", xstart, xend, dx1, dx2, dx2 * 36, xdir, y, yend, ydir);
for (c = 0; c <= LOD_MAX; c++)
{
state->tex[0][c] = &voodoo->texture_cache[0][params->tex_entry[0]].data[texture_offset[c]];
state->tex[1][c] = &voodoo->texture_cache[1][params->tex_entry[1]].data[texture_offset[c]];
}
state->tformat = params->tformat[0];
state->tex_w_mask[0] = params->tex_w_mask[0];
state->tex_h_mask[0] = params->tex_h_mask[0];
state->tex_shift[0] = params->tex_shift[0];
state->tex_lod[0] = params->tex_lod[0];
state->tex_w_mask[1] = params->tex_w_mask[1];
state->tex_h_mask[1] = params->tex_h_mask[1];
state->tex_shift[1] = params->tex_shift[1];
state->tex_lod[1] = params->tex_lod[1];
if ((params->fbzMode & 1) && (ystart < params->clipLowY))
{
int dy = params->clipLowY - ystart;
state->base_r += params->dRdY*dy;
state->base_g += params->dGdY*dy;
state->base_b += params->dBdY*dy;
state->base_a += params->dAdY*dy;
state->base_z += params->dZdY*dy;
state->tmu[0].base_s += params->tmu[0].dSdY*dy;
state->tmu[0].base_t += params->tmu[0].dTdY*dy;
state->tmu[0].base_w += params->tmu[0].dWdY*dy;
state->tmu[1].base_s += params->tmu[1].dSdY*dy;
state->tmu[1].base_t += params->tmu[1].dTdY*dy;
state->tmu[1].base_w += params->tmu[1].dWdY*dy;
state->base_w += params->dWdY*dy;
state->xstart += state->dx1*dy;
state->xend += state->dx2*dy;
ystart = params->clipLowY;
}
if ((params->fbzMode & 1) && (yend >= params->clipHighY))
yend = params->clipHighY;
state->y = ystart;
// yend--;
if (SLI_ENABLED)
{
int test_y;
if (params->fbzMode & (1 << 17))
test_y = y_origin - state->y;
else
test_y = state->y;
if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (test_y & 1)) ||
((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(test_y & 1)))
{
state->y++;
state->base_r += params->dRdY;
state->base_g += params->dGdY;
state->base_b += params->dBdY;
state->base_a += params->dAdY;
state->base_z += params->dZdY;
state->tmu[0].base_s += params->tmu[0].dSdY;
state->tmu[0].base_t += params->tmu[0].dTdY;
state->tmu[0].base_w += params->tmu[0].dWdY;
state->tmu[1].base_s += params->tmu[1].dSdY;
state->tmu[1].base_t += params->tmu[1].dTdY;
state->tmu[1].base_w += params->tmu[1].dWdY;
state->base_w += params->dWdY;
state->xstart += state->dx1;
state->xend += state->dx2;
}
}
#ifndef NO_CODEGEN
typedef uint8_t(__cdecl *VOODOO_DRAW)(voodoo_state_t*,voodoo_params_t*, int,int);
if (voodoo->use_recompiler)
voodoo_draw = (VOODOO_DRAW)voodoo_get_block(voodoo, params, state, odd_even);
else
voodoo_draw = NULL;
#endif
if (voodoo_output)
pclog("dxAB=%08x dxBC=%08x dxAC=%08x\n", state->dxAB, state->dxBC, state->dxAC);
// pclog("Start %i %i\n", ystart, voodoo->fbzMode & (1 << 17));
for (; state->y < yend; state->y += y_diff)
{
int x, x2;
int real_y = (state->y << 4) + 8;
int start_x, start_x2;
int dx;
uint16_t *fb_mem, *aux_mem;
state->ir = state->base_r;
state->ig = state->base_g;
state->ib = state->base_b;
state->ia = state->base_a;
state->z = state->base_z;
state->tmu0_s = state->tmu[0].base_s;
state->tmu0_t = state->tmu[0].base_t;
state->tmu0_w = state->tmu[0].base_w;
state->tmu1_s = state->tmu[1].base_s;
state->tmu1_t = state->tmu[1].base_t;
state->tmu1_w = state->tmu[1].base_w;
state->w = state->base_w;
x = (state->vertexAx << 12) + ((state->dxAC * (real_y - state->vertexAy)) >> 4);
if (real_y < state->vertexBy)
x2 = (state->vertexAx << 12) + ((state->dxAB * (real_y - state->vertexAy)) >> 4);
else
x2 = (state->vertexBx << 12) + ((state->dxBC * (real_y - state->vertexBy)) >> 4);
if (params->fbzMode & (1 << 17))
real_y = y_origin - (real_y >> 4);
else
real_y >>= 4;
if (SLI_ENABLED)
{
if (((real_y >> 1) & voodoo->odd_even_mask) != odd_even)
goto next_line;
}
else
{
if ((real_y & voodoo->odd_even_mask) != odd_even)
goto next_line;
}
start_x = x;
if (state->xdir > 0)
x2 -= (1 << 16);
else
x -= (1 << 16);
dx = ((x + 0x7000) >> 16) - (((state->vertexAx << 12) + 0x7000) >> 16);
start_x2 = x + 0x7000;
x = (x + 0x7000) >> 16;
x2 = (x2 + 0x7000) >> 16;
if (voodoo_output)
pclog("%03i:%03i : Ax=%08x start_x=%08x dSdX=%016llx dx=%08x s=%08x -> ", x, state->y, state->vertexAx << 8, start_x, params->tmu[0].dTdX, dx, state->tmu0_t);
state->ir += (params->dRdX * dx);
state->ig += (params->dGdX * dx);
state->ib += (params->dBdX * dx);
state->ia += (params->dAdX * dx);
state->z += (params->dZdX * dx);
state->tmu0_s += (params->tmu[0].dSdX * dx);
state->tmu0_t += (params->tmu[0].dTdX * dx);
state->tmu0_w += (params->tmu[0].dWdX * dx);
state->tmu1_s += (params->tmu[1].dSdX * dx);
state->tmu1_t += (params->tmu[1].dTdX * dx);
state->tmu1_w += (params->tmu[1].dWdX * dx);
state->w += (params->dWdX * dx);
if (voodoo_output)
pclog("%08llx %lli %lli\n", state->tmu0_t, state->tmu0_t >> (18+state->lod), (state->tmu0_t + (1 << (17+state->lod))) >> (18+state->lod));
if (params->fbzMode & 1)
{
if (state->xdir > 0)
{
if (x < params->clipLeft)
{
int dx = params->clipLeft - x;
state->ir += params->dRdX*dx;
state->ig += params->dGdX*dx;
state->ib += params->dBdX*dx;
state->ia += params->dAdX*dx;
state->z += params->dZdX*dx;
state->tmu0_s += params->tmu[0].dSdX*dx;
state->tmu0_t += params->tmu[0].dTdX*dx;
state->tmu0_w += params->tmu[0].dWdX*dx;
state->tmu1_s += params->tmu[1].dSdX*dx;
state->tmu1_t += params->tmu[1].dTdX*dx;
state->tmu1_w += params->tmu[1].dWdX*dx;
state->w += params->dWdX*dx;
x = params->clipLeft;
}
if (x2 >= params->clipRight)
x2 = params->clipRight-1;
}
else
{
if (x >= params->clipRight)
{
int dx = (params->clipRight-1) - x;
state->ir += params->dRdX*dx;
state->ig += params->dGdX*dx;
state->ib += params->dBdX*dx;
state->ia += params->dAdX*dx;
state->z += params->dZdX*dx;
state->tmu0_s += params->tmu[0].dSdX*dx;
state->tmu0_t += params->tmu[0].dTdX*dx;
state->tmu0_w += params->tmu[0].dWdX*dx;
state->tmu1_s += params->tmu[1].dSdX*dx;
state->tmu1_t += params->tmu[1].dTdX*dx;
state->tmu1_w += params->tmu[1].dWdX*dx;
state->w += params->dWdX*dx;
x = params->clipRight-1;
}
if (x2 < params->clipLeft)
x2 = params->clipLeft;
}
}
if (x2 < x && state->xdir > 0)
goto next_line;
if (x2 > x && state->xdir < 0)
goto next_line;
if (SLI_ENABLED)
{
state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + ((real_y >> 1) * params->row_width)];
state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + ((real_y >> 1) * params->row_width)) & voodoo->fb_mask];
}
else
{
if (params->col_tiled)
state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + (real_y >> 5) * params->row_width + (real_y & 31) * 128];
else
state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + (real_y * params->row_width)];
if (params->aux_tiled)
state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (real_y >> 5) * params->aux_row_width + (real_y & 31) * 128) & voodoo->fb_mask];
else
state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (real_y * params->row_width)) & voodoo->fb_mask];
}
if (voodoo_output)
pclog("%03i: x=%08x x2=%08x xstart=%08x xend=%08x dx=%08x start_x2=%08x\n", state->y, x, x2, state->xstart, state->xend, dx, start_x2);
state->pixel_count = 0;
state->texel_count = 0;
state->x = x;
state->x2 = x2;
#ifndef NO_CODEGEN
if (voodoo->use_recompiler)
{
voodoo_draw(state, params, x, real_y);
}
else
#endif
do
{
int x_tiled = (x & 63) | ((x >> 6) * 128*32/2);
start_x = x;
state->x = x;
voodoo->pixel_count[odd_even]++;
voodoo->texel_count[odd_even] += texels;
voodoo->fbiPixelsIn++;
if (voodoo_output)
pclog(" X=%03i T=%08x\n", x, state->tmu0_t);
// if (voodoo->fbzMode & FBZ_RGB_WMASK)
{
int update = 1;
uint8_t cother_r, cother_g, cother_b, aother;
uint8_t clocal_r, clocal_g, clocal_b, alocal;
int src_r = 0, src_g = 0, src_b = 0, src_a = 0;
int msel_r, msel_g, msel_b, msel_a;
uint8_t dest_r, dest_g, dest_b, dest_a;
uint16_t dat;
int sel;
int32_t new_depth, w_depth;
if (state->w & 0xffff00000000)
w_depth = 0;
else if (!(state->w & 0xffff0000))
w_depth = 0xf001;
else
{
int exp = voodoo_fls((uint16_t)((uint32_t)state->w >> 16));
int mant = ((~(uint32_t)state->w >> (19 - exp))) & 0xfff;
w_depth = (exp << 12) + mant + 1;
if (w_depth > 0xffff)
w_depth = 0xffff;
}
// w_depth = CLAMP16(w_depth);
if (params->fbzMode & FBZ_W_BUFFER)
new_depth = w_depth;
else
new_depth = CLAMP16(state->z >> 12);
if (params->fbzMode & FBZ_DEPTH_BIAS)
new_depth = CLAMP16(new_depth + (int16_t)params->zaColor);
if (params->fbzMode & FBZ_DEPTH_ENABLE)
{
uint16_t old_depth = voodoo->params.aux_tiled ? aux_mem[x_tiled] : aux_mem[x];
DEPTH_TEST((params->fbzMode & FBZ_DEPTH_SOURCE) ? (params->zaColor & 0xffff) : new_depth);
}
dat = voodoo->params.col_tiled ? fb_mem[x_tiled] : fb_mem[x];
dest_r = (dat >> 8) & 0xf8;
dest_g = (dat >> 3) & 0xfc;
dest_b = (dat << 3) & 0xf8;
dest_r |= (dest_r >> 5);
dest_g |= (dest_g >> 6);
dest_b |= (dest_b >> 5);
dest_a = 0xff;
if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED)
{
if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus)
{
/*TMU0 only sampling local colour or only one TMU, only sample TMU0*/
voodoo_tmu_fetch(voodoo, params, state, 0, x);
}
else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH)
{
/*TMU0 in pass-through mode, only sample TMU1*/
voodoo_tmu_fetch(voodoo, params, state, 1, x);
state->tex_r[0] = state->tex_r[1];
state->tex_g[0] = state->tex_g[1];
state->tex_b[0] = state->tex_b[1];
state->tex_a[0] = state->tex_a[1];
}
else
{
voodoo_tmu_fetch_and_blend(voodoo, params, state, x);
}
if ((params->fbzMode & FBZ_CHROMAKEY) &&
state->tex_r[0] == params->chromaKey_r &&
state->tex_g[0] == params->chromaKey_g &&
state->tex_b[0] == params->chromaKey_b)
{
voodoo->fbiChromaFail++;
goto skip_pixel;
}
}
if (voodoo->trexInit1[0] & (1 << 18))
{
state->tex_r[0] = state->tex_g[0] = 0;
state->tex_b[0] = voodoo->tmuConfig;
}
if (cc_localselect_override)
sel = (state->tex_a[0] & 0x80) ? 1 : 0;
else
sel = cc_localselect;
if (sel)
{
clocal_r = (params->color0 >> 16) & 0xff;
clocal_g = (params->color0 >> 8) & 0xff;
clocal_b = params->color0 & 0xff;
}
else
{
clocal_r = CLAMP(state->ir >> 12);
clocal_g = CLAMP(state->ig >> 12);
clocal_b = CLAMP(state->ib >> 12);
}
switch (_rgb_sel)
{
case CC_LOCALSELECT_ITER_RGB: /*Iterated RGB*/
cother_r = CLAMP(state->ir >> 12);
cother_g = CLAMP(state->ig >> 12);
cother_b = CLAMP(state->ib >> 12);
break;
case CC_LOCALSELECT_TEX: /*TREX Color Output*/
cother_r = state->tex_r[0];
cother_g = state->tex_g[0];
cother_b = state->tex_b[0];
break;
case CC_LOCALSELECT_COLOR1: /*Color1 RGB*/
cother_r = (params->color1 >> 16) & 0xff;
cother_g = (params->color1 >> 8) & 0xff;
cother_b = params->color1 & 0xff;
break;
case CC_LOCALSELECT_LFB: /*Linear Frame Buffer*/
cother_r = src_r;
cother_g = src_g;
cother_b = src_b;
break;
}
switch (cca_localselect)
{
case CCA_LOCALSELECT_ITER_A:
alocal = CLAMP(state->ia >> 12);
break;
case CCA_LOCALSELECT_COLOR0:
alocal = (params->color0 >> 24) & 0xff;
break;
case CCA_LOCALSELECT_ITER_Z:
alocal = CLAMP(state->z >> 20);
break;
default:
fatal("Bad cca_localselect %i\n", cca_localselect);
alocal = 0xff;
break;
}
switch (a_sel)
{
case A_SEL_ITER_A:
aother = CLAMP(state->ia >> 12);
break;
case A_SEL_TEX:
aother = state->tex_a[0];
break;
case A_SEL_COLOR1:
aother = (params->color1 >> 24) & 0xff;
break;
default:
fatal("Bad a_sel %i\n", a_sel);
aother = 0;
break;
}
if (cc_zero_other)
{
src_r = 0;
src_g = 0;
src_b = 0;
}
else
{
src_r = cother_r;
src_g = cother_g;
src_b = cother_b;
}
if (cca_zero_other)
src_a = 0;
else
src_a = aother;
if (cc_sub_clocal)
{
src_r -= clocal_r;
src_g -= clocal_g;
src_b -= clocal_b;
}
if (cca_sub_clocal)
src_a -= alocal;
switch (cc_mselect)
{
case CC_MSELECT_ZERO:
msel_r = 0;
msel_g = 0;
msel_b = 0;
break;
case CC_MSELECT_CLOCAL:
msel_r = clocal_r;
msel_g = clocal_g;
msel_b = clocal_b;
break;
case CC_MSELECT_AOTHER:
msel_r = aother;
msel_g = aother;
msel_b = aother;
break;
case CC_MSELECT_ALOCAL:
msel_r = alocal;
msel_g = alocal;
msel_b = alocal;
break;
case CC_MSELECT_TEX:
msel_r = state->tex_a[0];
msel_g = state->tex_a[0];
msel_b = state->tex_a[0];
break;
case CC_MSELECT_TEXRGB:
msel_r = state->tex_r[0];
msel_g = state->tex_g[0];
msel_b = state->tex_b[0];
break;
default:
fatal("Bad cc_mselect %i\n", cc_mselect);
msel_r = 0;
msel_g = 0;
msel_b = 0;
break;
}
switch (cca_mselect)
{
case CCA_MSELECT_ZERO:
msel_a = 0;
break;
case CCA_MSELECT_ALOCAL:
msel_a = alocal;
break;
case CCA_MSELECT_AOTHER:
msel_a = aother;
break;
case CCA_MSELECT_ALOCAL2:
msel_a = alocal;
break;
case CCA_MSELECT_TEX:
msel_a = state->tex_a[0];
break;
default:
fatal("Bad cca_mselect %i\n", cca_mselect);
msel_a = 0;
break;
}
if (!cc_reverse_blend)
{
msel_r ^= 0xff;
msel_g ^= 0xff;
msel_b ^= 0xff;
}
msel_r++;
msel_g++;
msel_b++;
if (!cca_reverse_blend)
msel_a ^= 0xff;
msel_a++;
src_r = (src_r * msel_r) >> 8;
src_g = (src_g * msel_g) >> 8;
src_b = (src_b * msel_b) >> 8;
src_a = (src_a * msel_a) >> 8;
switch (cc_add)
{
case CC_ADD_CLOCAL:
src_r += clocal_r;
src_g += clocal_g;
src_b += clocal_b;
break;
case CC_ADD_ALOCAL:
src_r += alocal;
src_g += alocal;
src_b += alocal;
break;
case 0:
break;
default:
fatal("Bad cc_add %i\n", cc_add);
}
if (cca_add)
src_a += alocal;
src_r = CLAMP(src_r);
src_g = CLAMP(src_g);
src_b = CLAMP(src_b);
src_a = CLAMP(src_a);
if (cc_invert_output)
{
src_r ^= 0xff;
src_g ^= 0xff;
src_b ^= 0xff;
}
if (cca_invert_output)
src_a ^= 0xff;
if (params->fogMode & FOG_ENABLE)
APPLY_FOG(src_r, src_g, src_b, state->z, state->ia, state->w);
if (params->alphaMode & 1)
ALPHA_TEST(src_a);
if (params->alphaMode & (1 << 4))
{
if (dithersub && !dither2x2 && voodoo->dithersub_enabled)
{
dest_r = dithersub_rb[dest_r][real_y & 3][x & 3];
dest_g = dithersub_g [dest_g][real_y & 3][x & 3];
dest_b = dithersub_rb[dest_b][real_y & 3][x & 3];
}
if (dithersub && dither2x2 && voodoo->dithersub_enabled)
{
dest_r = dithersub_rb2x2[dest_r][real_y & 1][x & 1];
dest_g = dithersub_g2x2 [dest_g][real_y & 1][x & 1];
dest_b = dithersub_rb2x2[dest_b][real_y & 1][x & 1];
}
ALPHA_BLEND(src_r, src_g, src_b, src_a);
}
if (update)
{
if (dither)
{
if (dither2x2)
{
src_r = dither_rb2x2[src_r][real_y & 1][x & 1];
src_g = dither_g2x2[src_g][real_y & 1][x & 1];
src_b = dither_rb2x2[src_b][real_y & 1][x & 1];
}
else
{
src_r = dither_rb[src_r][real_y & 3][x & 3];
src_g = dither_g[src_g][real_y & 3][x & 3];
src_b = dither_rb[src_b][real_y & 3][x & 3];
}
}
else
{
src_r >>= 3;
src_g >>= 2;
src_b >>= 3;
}
if (params->fbzMode & FBZ_RGB_WMASK)
{
if (voodoo->params.col_tiled)
fb_mem[x_tiled] = src_b | (src_g << 5) | (src_r << 11);
else
fb_mem[x] = src_b | (src_g << 5) | (src_r << 11);
}
if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE))
{
if (voodoo->params.aux_tiled)
aux_mem[x_tiled] = new_depth;
else
aux_mem[x] = new_depth;
}
}
}
voodoo_output &= ~2;
voodoo->fbiPixelsOut++;
skip_pixel:
if (state->xdir > 0)
{
state->ir += params->dRdX;
state->ig += params->dGdX;
state->ib += params->dBdX;
state->ia += params->dAdX;
state->z += params->dZdX;
state->tmu0_s += params->tmu[0].dSdX;
state->tmu0_t += params->tmu[0].dTdX;
state->tmu0_w += params->tmu[0].dWdX;
state->tmu1_s += params->tmu[1].dSdX;
state->tmu1_t += params->tmu[1].dTdX;
state->tmu1_w += params->tmu[1].dWdX;
state->w += params->dWdX;
}
else
{
state->ir -= params->dRdX;
state->ig -= params->dGdX;
state->ib -= params->dBdX;
state->ia -= params->dAdX;
state->z -= params->dZdX;
state->tmu0_s -= params->tmu[0].dSdX;
state->tmu0_t -= params->tmu[0].dTdX;
state->tmu0_w -= params->tmu[0].dWdX;
state->tmu1_s -= params->tmu[1].dSdX;
state->tmu1_t -= params->tmu[1].dTdX;
state->tmu1_w -= params->tmu[1].dWdX;
state->w -= params->dWdX;
}
x += state->xdir;
} while (start_x != x2);
voodoo->pixel_count[odd_even] += state->pixel_count;
voodoo->texel_count[odd_even] += state->texel_count;
voodoo->fbiPixelsIn += state->pixel_count;
if (voodoo->params.draw_offset == voodoo->params.front_offset && (real_y >> 1) < 2048)
voodoo->dirty_line[real_y >> 1] = 1;
next_line:
if (SLI_ENABLED)
{
state->base_r += params->dRdY;
state->base_g += params->dGdY;
state->base_b += params->dBdY;
state->base_a += params->dAdY;
state->base_z += params->dZdY;
state->tmu[0].base_s += params->tmu[0].dSdY;
state->tmu[0].base_t += params->tmu[0].dTdY;
state->tmu[0].base_w += params->tmu[0].dWdY;
state->tmu[1].base_s += params->tmu[1].dSdY;
state->tmu[1].base_t += params->tmu[1].dTdY;
state->tmu[1].base_w += params->tmu[1].dWdY;
state->base_w += params->dWdY;
state->xstart += state->dx1;
state->xend += state->dx2;
}
state->base_r += params->dRdY;
state->base_g += params->dGdY;
state->base_b += params->dBdY;
state->base_a += params->dAdY;
state->base_z += params->dZdY;
state->tmu[0].base_s += params->tmu[0].dSdY;
state->tmu[0].base_t += params->tmu[0].dTdY;
state->tmu[0].base_w += params->tmu[0].dWdY;
state->tmu[1].base_s += params->tmu[1].dSdY;
state->tmu[1].base_t += params->tmu[1].dTdY;
state->tmu[1].base_w += params->tmu[1].dWdY;
state->base_w += params->dWdY;
state->xstart += state->dx1;
state->xend += state->dx2;
}
voodoo->texture_cache[0][params->tex_entry[0]].refcount_r[odd_even]++;
voodoo->texture_cache[1][params->tex_entry[1]].refcount_r[odd_even]++;
}
void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_even)
{
voodoo_state_t state;
int vertexAy_adjusted;
int vertexCy_adjusted;
int dx, dy;
uint64_t tempdx, tempdy;
uint64_t tempLOD;
int LOD;
int lodbias;
voodoo->tri_count++;
dx = 8 - (params->vertexAx & 0xf);
if ((params->vertexAx & 0xf) > 8)
dx += 16;
dy = 8 - (params->vertexAy & 0xf);
if ((params->vertexAy & 0xf) > 8)
dy += 16;
/* pclog("voodoo_triangle %i %i %i : vA %f, %f vB %f, %f vC %f, %f f %i,%i %08x %08x %08x,%08x tex=%i,%i fogMode=%08x\n", odd_even, voodoo->params_read_idx[odd_even], voodoo->params_read_idx[odd_even] & PARAM_MASK, (float)params->vertexAx / 16.0, (float)params->vertexAy / 16.0,
(float)params->vertexBx / 16.0, (float)params->vertexBy / 16.0,
(float)params->vertexCx / 16.0, (float)params->vertexCy / 16.0,
(params->fbzColorPath & FBZCP_TEXTURE_ENABLED) ? params->tformat[0] : 0,
(params->fbzColorPath & FBZCP_TEXTURE_ENABLED) ? params->tformat[1] : 0, params->fbzColorPath, params->alphaMode, params->textureMode[0],params->textureMode[1], params->tex_entry[0],params->tex_entry[1], params->fogMode);*/
state.base_r = params->startR;
state.base_g = params->startG;
state.base_b = params->startB;
state.base_a = params->startA;
state.base_z = params->startZ;
state.tmu[0].base_s = params->tmu[0].startS;
state.tmu[0].base_t = params->tmu[0].startT;
state.tmu[0].base_w = params->tmu[0].startW;
state.tmu[1].base_s = params->tmu[1].startS;
state.tmu[1].base_t = params->tmu[1].startT;
state.tmu[1].base_w = params->tmu[1].startW;
state.base_w = params->startW;
if (params->fbzColorPath & FBZ_PARAM_ADJUST)
{
state.base_r += (dx*params->dRdX + dy*params->dRdY) >> 4;
state.base_g += (dx*params->dGdX + dy*params->dGdY) >> 4;
state.base_b += (dx*params->dBdX + dy*params->dBdY) >> 4;
state.base_a += (dx*params->dAdX + dy*params->dAdY) >> 4;
state.base_z += (dx*params->dZdX + dy*params->dZdY) >> 4;
state.tmu[0].base_s += (dx*params->tmu[0].dSdX + dy*params->tmu[0].dSdY) >> 4;
state.tmu[0].base_t += (dx*params->tmu[0].dTdX + dy*params->tmu[0].dTdY) >> 4;
state.tmu[0].base_w += (dx*params->tmu[0].dWdX + dy*params->tmu[0].dWdY) >> 4;
state.tmu[1].base_s += (dx*params->tmu[1].dSdX + dy*params->tmu[1].dSdY) >> 4;
state.tmu[1].base_t += (dx*params->tmu[1].dTdX + dy*params->tmu[1].dTdY) >> 4;
state.tmu[1].base_w += (dx*params->tmu[1].dWdX + dy*params->tmu[1].dWdY) >> 4;
state.base_w += (dx*params->dWdX + dy*params->dWdY) >> 4;
}
tris++;
state.vertexAy = params->vertexAy & ~0xffff0000;
if (state.vertexAy & 0x8000)
state.vertexAy |= 0xffff0000;
state.vertexBy = params->vertexBy & ~0xffff0000;
if (state.vertexBy & 0x8000)
state.vertexBy |= 0xffff0000;
state.vertexCy = params->vertexCy & ~0xffff0000;
if (state.vertexCy & 0x8000)
state.vertexCy |= 0xffff0000;
state.vertexAx = params->vertexAx & ~0xffff0000;
if (state.vertexAx & 0x8000)
state.vertexAx |= 0xffff0000;
state.vertexBx = params->vertexBx & ~0xffff0000;
if (state.vertexBx & 0x8000)
state.vertexBx |= 0xffff0000;
state.vertexCx = params->vertexCx & ~0xffff0000;
if (state.vertexCx & 0x8000)
state.vertexCx |= 0xffff0000;
vertexAy_adjusted = (state.vertexAy+7) >> 4;
vertexCy_adjusted = (state.vertexCy+7) >> 4;
if (state.vertexBy - state.vertexAy)
state.dxAB = (int)((((int64_t)state.vertexBx << 12) - ((int64_t)state.vertexAx << 12)) << 4) / (int)(state.vertexBy - state.vertexAy);
else
state.dxAB = 0;
if (state.vertexCy - state.vertexAy)
state.dxAC = (int)((((int64_t)state.vertexCx << 12) - ((int64_t)state.vertexAx << 12)) << 4) / (int)(state.vertexCy - state.vertexAy);
else
state.dxAC = 0;
if (state.vertexCy - state.vertexBy)
state.dxBC = (int)((((int64_t)state.vertexCx << 12) - ((int64_t)state.vertexBx << 12)) << 4) / (int)(state.vertexCy - state.vertexBy);
else
state.dxBC = 0;
state.lod_min[0] = (params->tLOD[0] & 0x3f) << 6;
state.lod_max[0] = ((params->tLOD[0] >> 6) & 0x3f) << 6;
if (state.lod_max[0] > 0x800)
state.lod_max[0] = 0x800;
state.lod_min[1] = (params->tLOD[1] & 0x3f) << 6;
state.lod_max[1] = ((params->tLOD[1] >> 6) & 0x3f) << 6;
if (state.lod_max[1] > 0x800)
state.lod_max[1] = 0x800;
state.xstart = state.xend = state.vertexAx << 8;
state.xdir = params->sign ? -1 : 1;
state.y = (state.vertexAy + 8) >> 4;
state.ydir = 1;
tempdx = (params->tmu[0].dSdX >> 14) * (params->tmu[0].dSdX >> 14) + (params->tmu[0].dTdX >> 14) * (params->tmu[0].dTdX >> 14);
tempdy = (params->tmu[0].dSdY >> 14) * (params->tmu[0].dSdY >> 14) + (params->tmu[0].dTdY >> 14) * (params->tmu[0].dTdY >> 14);
if (tempdx > tempdy)
tempLOD = tempdx;
else
tempLOD = tempdy;
LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256);
LOD >>= 2;
lodbias = (params->tLOD[0] >> 12) & 0x3f;
if (lodbias & 0x20)
lodbias |= ~0x3f;
state.tmu[0].lod = LOD + (lodbias << 6);
tempdx = (params->tmu[1].dSdX >> 14) * (params->tmu[1].dSdX >> 14) + (params->tmu[1].dTdX >> 14) * (params->tmu[1].dTdX >> 14);
tempdy = (params->tmu[1].dSdY >> 14) * (params->tmu[1].dSdY >> 14) + (params->tmu[1].dTdY >> 14) * (params->tmu[1].dTdY >> 14);
if (tempdx > tempdy)
tempLOD = tempdx;
else
tempLOD = tempdy;
LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256);
LOD >>= 2;
lodbias = (params->tLOD[1] >> 12) & 0x3f;
if (lodbias & 0x20)
lodbias |= ~0x3f;
state.tmu[1].lod = LOD + (lodbias << 6);
voodoo_half_triangle(voodoo, params, &state, vertexAy_adjusted, vertexCy_adjusted, odd_even);
}
static void render_thread(void *param, int odd_even)
{
voodoo_t *voodoo = (voodoo_t *)param;
while (1)
{
thread_set_event(voodoo->render_not_full_event[odd_even]);
thread_wait_event(voodoo->wake_render_thread[odd_even], -1);
thread_reset_event(voodoo->wake_render_thread[odd_even]);
voodoo->render_voodoo_busy[odd_even] = 1;
while (!PARAM_EMPTY(odd_even))
{
uint64_t start_time = timer_read();
uint64_t end_time;
voodoo_params_t *params = &voodoo->params_buffer[voodoo->params_read_idx[odd_even] & PARAM_MASK];
voodoo_triangle(voodoo, params, odd_even);
voodoo->params_read_idx[odd_even]++;
if (PARAM_ENTRIES(odd_even) > (PARAM_SIZE - 10))
thread_set_event(voodoo->render_not_full_event[odd_even]);
end_time = timer_read();
voodoo->render_time[odd_even] += end_time - start_time;
}
voodoo->render_voodoo_busy[odd_even] = 0;
}
}
void voodoo_render_thread_1(void *param)
{
render_thread(param, 0);
}
void voodoo_render_thread_2(void *param)
{
render_thread(param, 1);
}
void voodoo_render_thread_3(void *param)
{
render_thread(param, 2);
}
void voodoo_render_thread_4(void *param)
{
render_thread(param, 3);
}
void voodoo_queue_triangle(voodoo_t *voodoo, voodoo_params_t *params)
{
voodoo_params_t *params_new = &voodoo->params_buffer[voodoo->params_write_idx & PARAM_MASK];
while (PARAM_FULL(0) || (voodoo->render_threads >= 2 && PARAM_FULL(1)) ||
(voodoo->render_threads == 4 && (PARAM_FULL(2) || PARAM_FULL(3))))
{
thread_reset_event(voodoo->render_not_full_event[0]);
if (voodoo->render_threads >= 2)
thread_reset_event(voodoo->render_not_full_event[1]);
if (voodoo->render_threads == 4)
{
thread_reset_event(voodoo->render_not_full_event[2]);
thread_reset_event(voodoo->render_not_full_event[3]);
}
if (PARAM_FULL(0))
thread_wait_event(voodoo->render_not_full_event[0], -1); /*Wait for room in ringbuffer*/
if (voodoo->render_threads >= 2 && PARAM_FULL(1))
thread_wait_event(voodoo->render_not_full_event[1], -1); /*Wait for room in ringbuffer*/
if (voodoo->render_threads == 4 && PARAM_FULL(2))
thread_wait_event(voodoo->render_not_full_event[2], -1); /*Wait for room in ringbuffer*/
if (voodoo->render_threads == 4 && PARAM_FULL(3))
thread_wait_event(voodoo->render_not_full_event[3], -1); /*Wait for room in ringbuffer*/
}
voodoo_use_texture(voodoo, params, 0);
if (voodoo->dual_tmus)
voodoo_use_texture(voodoo, params, 1);
memcpy(params_new, params, sizeof(voodoo_params_t));
voodoo->params_write_idx++;
if (PARAM_ENTRIES(0) < 4 || (voodoo->render_threads >= 2 && PARAM_ENTRIES(1) < 4) ||
(voodoo->render_threads == 4 && (PARAM_ENTRIES(2) < 4 || PARAM_ENTRIES(3) < 4)))
voodoo_wake_render_thread(voodoo);
}