git-subtree-dir: external/parallel-rdp/parallel-rdp-standalone git-subtree-split: 3f59f61f2c1c56424356003041df5e4a10612049
284 lines
9.1 KiB
C
284 lines
9.1 KiB
C
/* Copyright (c) 2020 Themaister
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef COMBINER_H_
|
|
#define COMBINER_H_
|
|
|
|
#include "clamping.h"
|
|
|
|
ivec4 special_expand(ivec4 value)
|
|
{
|
|
// Special sign-extend without explicit clamp.
|
|
return bitfieldExtract(value - 0x80, 0, 9) + 0x80;
|
|
}
|
|
|
|
i16x4 combiner_equation(ivec4 a, ivec4 b, ivec4 c, ivec4 d)
|
|
{
|
|
// Sign-extend multiplier to 9 bits.
|
|
c = bitfieldExtract(c, 0, 9);
|
|
|
|
// Need this to deal with very specific 9-bit sign bits ...
|
|
a = special_expand(a);
|
|
b = special_expand(b);
|
|
d = special_expand(d);
|
|
|
|
ivec4 color = (a - b) * c;
|
|
color += 0x80;
|
|
return i16x4(color >> 8) + i16x4(d);
|
|
}
|
|
|
|
struct CombinerInputs
|
|
{
|
|
u8x4 constant_muladd;
|
|
u8x4 constant_mulsub;
|
|
u8x4 constant_mul;
|
|
u8x4 constant_add;
|
|
|
|
u8x4 shade;
|
|
i16x4 combined;
|
|
i16x4 texel0;
|
|
i16x4 texel1;
|
|
i16 lod_frac;
|
|
i16 noise;
|
|
};
|
|
|
|
const int RGB_MULADD_COMBINED = 0;
|
|
const int RGB_MULADD_TEXEL0 = 1;
|
|
const int RGB_MULADD_TEXEL1 = 2;
|
|
const int RGB_MULADD_SHADE = 4;
|
|
const int RGB_MULADD_ONE = 6;
|
|
const int RGB_MULADD_NOISE = 7;
|
|
|
|
const int RGB_MULSUB_COMBINED = 0;
|
|
const int RGB_MULSUB_TEXEL0 = 1;
|
|
const int RGB_MULSUB_TEXEL1 = 2;
|
|
const int RGB_MULSUB_SHADE = 4;
|
|
const int RGB_MULSUB_K4 = 7;
|
|
|
|
const int RGB_MUL_COMBINED = 0;
|
|
const int RGB_MUL_TEXEL0 = 1;
|
|
const int RGB_MUL_TEXEL1 = 2;
|
|
const int RGB_MUL_SHADE = 4;
|
|
const int RGB_MUL_COMBINED_ALPHA = 7;
|
|
const int RGB_MUL_TEXEL0_ALPHA = 8;
|
|
const int RGB_MUL_TEXEL1_ALPHA = 9;
|
|
const int RGB_MUL_SHADE_ALPHA = 11;
|
|
const int RGB_MUL_LOD_FRAC = 13;
|
|
const int RGB_MUL_K5 = 15;
|
|
|
|
const int RGB_ADD_COMBINED = 0;
|
|
const int RGB_ADD_TEXEL0 = 1;
|
|
const int RGB_ADD_TEXEL1 = 2;
|
|
const int RGB_ADD_SHADE = 4;
|
|
const int RGB_ADD_ONE = 6;
|
|
|
|
const int ALPHA_ADDSUB_COMBINED = 0;
|
|
const int ALPHA_ADDSUB_TEXEL0_ALPHA = 1;
|
|
const int ALPHA_ADDSUB_TEXEL1_ALPHA = 2;
|
|
const int ALPHA_ADDSUB_SHADE_ALPHA = 4;
|
|
const int ALPHA_ADDSUB_ONE = 6;
|
|
|
|
const int ALPHA_MUL_LOD_FRAC = 0;
|
|
const int ALPHA_MUL_TEXEL0_ALPHA = 1;
|
|
const int ALPHA_MUL_TEXEL1_ALPHA = 2;
|
|
const int ALPHA_MUL_SHADE_ALPHA = 4;
|
|
|
|
ivec4 select_muladd(CombinerInputs inputs, int selector_rgb, int selector_alpha)
|
|
{
|
|
ivec3 res;
|
|
switch (selector_rgb)
|
|
{
|
|
case RGB_MULADD_COMBINED: res = inputs.combined.rgb; break;
|
|
case RGB_MULADD_TEXEL0: res = inputs.texel0.rgb; break;
|
|
case RGB_MULADD_TEXEL1: res = inputs.texel1.rgb; break;
|
|
case RGB_MULADD_SHADE: res = inputs.shade.rgb; break;
|
|
case RGB_MULADD_NOISE: res = ivec3(inputs.noise); break;
|
|
case RGB_MULADD_ONE: res = ivec3(0x100); break;
|
|
default: res = inputs.constant_muladd.rgb; break;
|
|
}
|
|
|
|
int alpha;
|
|
switch (selector_alpha)
|
|
{
|
|
case ALPHA_ADDSUB_COMBINED: alpha = inputs.combined.a; break;
|
|
case ALPHA_ADDSUB_TEXEL0_ALPHA: alpha = inputs.texel0.a; break;
|
|
case ALPHA_ADDSUB_TEXEL1_ALPHA: alpha = inputs.texel1.a; break;
|
|
case ALPHA_ADDSUB_SHADE_ALPHA: alpha = inputs.shade.a; break;
|
|
case ALPHA_ADDSUB_ONE: alpha = 0x100; break;
|
|
default: alpha = inputs.constant_muladd.a; break;
|
|
}
|
|
return ivec4(res, alpha);
|
|
}
|
|
|
|
ivec4 select_mulsub(CombinerInputs inputs, int selector_rgb, int selector_alpha)
|
|
{
|
|
ivec3 res;
|
|
switch (selector_rgb)
|
|
{
|
|
case RGB_MULSUB_COMBINED: res = inputs.combined.rgb; break;
|
|
case RGB_MULSUB_TEXEL0: res = inputs.texel0.rgb; break;
|
|
case RGB_MULSUB_TEXEL1: res = inputs.texel1.rgb; break;
|
|
case RGB_MULSUB_SHADE: res = inputs.shade.rgb; break;
|
|
case RGB_MULSUB_K4: res = ivec3((int(inputs.constant_mulsub.g) << 8) | inputs.constant_mulsub.b); break;
|
|
default: res = inputs.constant_mulsub.rgb; break;
|
|
}
|
|
|
|
int alpha;
|
|
switch (selector_alpha)
|
|
{
|
|
case ALPHA_ADDSUB_COMBINED: alpha = inputs.combined.a; break;
|
|
case ALPHA_ADDSUB_TEXEL0_ALPHA: alpha = inputs.texel0.a; break;
|
|
case ALPHA_ADDSUB_TEXEL1_ALPHA: alpha = inputs.texel1.a; break;
|
|
case ALPHA_ADDSUB_SHADE_ALPHA: alpha = inputs.shade.a; break;
|
|
case ALPHA_ADDSUB_ONE: alpha = 0x100; break;
|
|
default: alpha = inputs.constant_mulsub.a; break;
|
|
}
|
|
return ivec4(res, alpha);
|
|
}
|
|
|
|
ivec4 select_mul(CombinerInputs inputs, int selector_rgb, int selector_alpha)
|
|
{
|
|
ivec3 res;
|
|
switch (selector_rgb)
|
|
{
|
|
case RGB_MUL_COMBINED: res = inputs.combined.rgb; break;
|
|
case RGB_MUL_COMBINED_ALPHA: res = inputs.combined.aaa; break;
|
|
case RGB_MUL_TEXEL0: res = inputs.texel0.rgb; break;
|
|
case RGB_MUL_TEXEL1: res = inputs.texel1.rgb; break;
|
|
case RGB_MUL_SHADE: res = inputs.shade.rgb; break;
|
|
case RGB_MUL_TEXEL0_ALPHA: res = inputs.texel0.aaa; break;
|
|
case RGB_MUL_TEXEL1_ALPHA: res = inputs.texel1.aaa; break;
|
|
case RGB_MUL_SHADE_ALPHA: res = inputs.shade.aaa; break;
|
|
case RGB_MUL_LOD_FRAC: res = ivec3(inputs.lod_frac); break;
|
|
case RGB_MUL_K5: res = ivec3((int(inputs.constant_mul.g) << 8) | inputs.constant_mul.b); break;
|
|
default: res = inputs.constant_mul.rgb; break;
|
|
}
|
|
|
|
int alpha;
|
|
switch (selector_alpha)
|
|
{
|
|
case ALPHA_MUL_LOD_FRAC: alpha = inputs.lod_frac; break;
|
|
case ALPHA_MUL_TEXEL0_ALPHA: alpha = inputs.texel0.a; break;
|
|
case ALPHA_MUL_TEXEL1_ALPHA: alpha = inputs.texel1.a; break;
|
|
case ALPHA_MUL_SHADE_ALPHA: alpha = inputs.shade.a; break;
|
|
default: alpha = inputs.constant_mul.a; break;
|
|
}
|
|
return ivec4(res, alpha);
|
|
}
|
|
|
|
ivec4 select_add(CombinerInputs inputs, int selector_rgb, int selector_alpha)
|
|
{
|
|
ivec3 res;
|
|
switch (selector_rgb)
|
|
{
|
|
case RGB_ADD_COMBINED: res = inputs.combined.rgb; break;
|
|
case RGB_ADD_TEXEL0: res = inputs.texel0.rgb; break;
|
|
case RGB_ADD_TEXEL1: res = inputs.texel1.rgb; break;
|
|
case RGB_ADD_SHADE: res = inputs.shade.rgb; break;
|
|
case RGB_ADD_ONE: res = ivec3(0x100); break;
|
|
default: res = inputs.constant_add.rgb; break;
|
|
}
|
|
|
|
int alpha;
|
|
switch (selector_alpha)
|
|
{
|
|
case ALPHA_ADDSUB_COMBINED: alpha = inputs.combined.a; break;
|
|
case ALPHA_ADDSUB_TEXEL0_ALPHA: alpha = inputs.texel0.a; break;
|
|
case ALPHA_ADDSUB_TEXEL1_ALPHA: alpha = inputs.texel1.a; break;
|
|
case ALPHA_ADDSUB_SHADE_ALPHA: alpha = inputs.shade.a; break;
|
|
case ALPHA_ADDSUB_ONE: alpha = 0x100; break;
|
|
default: alpha = inputs.constant_add.a; break;
|
|
}
|
|
return ivec4(res, alpha);
|
|
}
|
|
|
|
i16x4 combiner_cycle0(CombinerInputs inputs, u8x4 combiner_inputs_rgb, u8x4 combiner_inputs_alpha, int alpha_dith,
|
|
int coverage, bool cvg_times_alpha, bool alpha_cvg_select, bool alpha_test, out u8 alpha_test_reference)
|
|
{
|
|
ivec4 muladd = select_muladd(inputs, combiner_inputs_rgb.x, combiner_inputs_alpha.x);
|
|
ivec4 mulsub = select_mulsub(inputs, combiner_inputs_rgb.y, combiner_inputs_alpha.y);
|
|
ivec4 mul = select_mul(inputs, combiner_inputs_rgb.z, combiner_inputs_alpha.z);
|
|
ivec4 add = select_add(inputs, combiner_inputs_rgb.w, combiner_inputs_alpha.w);
|
|
|
|
i16x4 combined = combiner_equation(muladd, mulsub, mul, add);
|
|
|
|
if (alpha_test)
|
|
{
|
|
int clamped_alpha = clamp_9bit(combined.a);
|
|
// Expands 0xff to 0x100 to avoid having to divide by 2**n - 1.
|
|
int expanded_alpha = clamped_alpha + ((clamped_alpha + 1) >> 8);
|
|
|
|
if (alpha_cvg_select)
|
|
{
|
|
int modulated_alpha;
|
|
if (cvg_times_alpha)
|
|
modulated_alpha = (expanded_alpha * coverage + 4) >> 3;
|
|
else
|
|
modulated_alpha = coverage << 5;
|
|
expanded_alpha = modulated_alpha;
|
|
}
|
|
else
|
|
expanded_alpha += alpha_dith;
|
|
|
|
alpha_test_reference = u8(clamp(expanded_alpha, 0, 0xff));
|
|
}
|
|
else
|
|
alpha_test_reference = U8_C(0);
|
|
|
|
return combined;
|
|
}
|
|
|
|
i16x4 combiner_cycle1(CombinerInputs inputs, u8x4 combiner_inputs_rgb, u8x4 combiner_inputs_alpha, int alpha_dith,
|
|
inout int coverage, bool cvg_times_alpha, bool alpha_cvg_select)
|
|
{
|
|
ivec4 muladd = select_muladd(inputs, combiner_inputs_rgb.x, combiner_inputs_alpha.x);
|
|
ivec4 mulsub = select_mulsub(inputs, combiner_inputs_rgb.y, combiner_inputs_alpha.y);
|
|
ivec4 mul = select_mul(inputs, combiner_inputs_rgb.z, combiner_inputs_alpha.z);
|
|
ivec4 add = select_add(inputs, combiner_inputs_rgb.w, combiner_inputs_alpha.w);
|
|
|
|
i16x4 combined = combiner_equation(muladd, mulsub, mul, add);
|
|
|
|
combined = clamp_9bit_notrunc(combined);
|
|
|
|
// Expands 0xff to 0x100 to avoid having to divide by 2**n - 1.
|
|
int expanded_alpha = combined.a + ((combined.a + 1) >> 8);
|
|
|
|
int modulated_alpha;
|
|
if (cvg_times_alpha)
|
|
{
|
|
modulated_alpha = (expanded_alpha * coverage + 4) >> 3;
|
|
coverage = modulated_alpha >> 5;
|
|
}
|
|
else
|
|
modulated_alpha = coverage << 5;
|
|
|
|
if (alpha_cvg_select)
|
|
expanded_alpha = modulated_alpha;
|
|
else
|
|
expanded_alpha += alpha_dith;
|
|
|
|
combined.a = i16(clamp(expanded_alpha, 0, 0xff));
|
|
|
|
return combined;
|
|
}
|
|
|
|
#endif |