/* Copyright (c) 2020 Themaister * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef BINNING_H_ #define BINNING_H_ // There are 4 critical Y coordinates to test when binning. Top, bottom, mid, and mid - 1. const int SUBPIXELS_Y = 4; ivec4 quantize_x(ivec4 x) { return x >> 15; } int minimum4(ivec4 v) { ivec2 minimum2 = min(v.xy, v.zw); return min(minimum2.x, minimum2.y); } int maximum4(ivec4 v) { ivec2 maximum2 = max(v.xy, v.zw); return max(maximum2.x, maximum2.y); } ivec4 madd_32_64(ivec4 a, int b, int c, out ivec4 hi_bits) { ivec4 lo, hi; imulExtended(a, ivec4(b), hi, lo); uvec4 carry; lo = ivec4(uaddCarry(lo, uvec4(c), carry)); hi += ivec4(carry); hi_bits = hi; return lo; } ivec2 interpolate_xs(TriangleSetup setup, ivec4 ys, bool flip, int scaling) { int yh_interpolation_base = setup.yh & ~(SUBPIXELS_Y - 1); int ym_interpolation_base = setup.ym; yh_interpolation_base *= scaling; ym_interpolation_base *= scaling; // Interpolate in 64-bit so we can detect quirky overflow scenarios. ivec4 xh_hi, xm_hi, xl_hi; ivec4 xh = madd_32_64(ys - yh_interpolation_base, setup.dxhdy, scaling * setup.xh, xh_hi); ivec4 xm = madd_32_64(ys - yh_interpolation_base, setup.dxmdy, scaling * setup.xm, xm_hi); ivec4 xl = madd_32_64(ys - ym_interpolation_base, setup.dxldy, scaling * setup.xl, xl_hi); xl = mix(xl, xm, lessThan(ys, ivec4(scaling * setup.ym))); xl_hi = mix(xl_hi, xm_hi, lessThan(ys, ivec4(scaling * setup.ym))); // Handle overflow scenarios. Saturate 64-bit signed to 32-bit signed without 64-bit math. xh = mix(xh, ivec4(0x7fffffff), greaterThan(xh_hi, ivec4(0))); xh = mix(xh, ivec4(-0x80000000), lessThan(xh_hi, ivec4(-1))); xl = mix(xl, ivec4(0x7fffffff), greaterThan(xl_hi, ivec4(0))); xl = mix(xl, ivec4(-0x80000000), lessThan(xl_hi, ivec4(-1))); ivec4 xh_shifted = quantize_x(xh); ivec4 xl_shifted = quantize_x(xl); ivec4 xleft, xright; if (flip) { xleft = xh_shifted; xright = xl_shifted; } else { xleft = xl_shifted; xright = xh_shifted; } // If one of the results are out of range, we have overflow, and we need to be conservative when binning. int max_range = maximum4(max(abs(xleft), abs(xright))); ivec2 range; if (max_range <= 2047 * scaling) range = ivec2(minimum4(xleft), maximum4(xright)); else range = ivec2(0, 0x7fffffff); return range; } bool bin_primitive(TriangleSetup setup, ivec2 lo, ivec2 hi, int scaling, ScissorState scissor) { // First clip Y range based on scissor. lo.y = max(lo.y, scaling * (scissor.ylo >> 2)); hi.y = min(hi.y, scaling * ((scissor.yhi + 3) >> 2) - 1); int start_y = lo.y * SUBPIXELS_Y; int end_y = (hi.y * SUBPIXELS_Y) + (SUBPIXELS_Y - 1); // First, we clip start/end against y_lo, y_hi. start_y = max(start_y, scaling * int(setup.yh)); end_y = min(end_y, scaling * int(setup.yl) - 1); // Y is clipped out, exit early. if (end_y < start_y) return false; bool flip = (setup.flags & TRIANGLE_SETUP_FLIP_BIT) != 0; // Sample the X ranges for min and max Y, and potentially the mid-point as well. ivec4 ys = ivec4(start_y, end_y, clamp(setup.ym * scaling + ivec2(-1, 0), ivec2(start_y), ivec2(end_y))); ivec2 x_range = interpolate_xs(setup, ys, flip, scaling); // For FILL_COPY_RASTER_BIT we're inclusive, if not, exclusive. int x_bias = (setup.flags & TRIANGLE_SETUP_FILL_COPY_RASTER_BIT) != 0 ? 4 : 3; ivec2 scissor_x = ivec2(scaling * (scissor.xlo >> 2), scaling * ((scissor.xhi + x_bias) >> 2) - 1); // Scissor is applied through a clamp with a mask being generated for overshoot which affects if the line is valid. // Since this is a conservative test we don't compute valid line here, so we have to assume it is valid. // We can end up creating fake coverage in FILL/COPY modes in some cases // if we clamp scissor to outside the primitive's range as long as at least one sub-line passes the scissor test. // The x_range ends up being degenerate, but these fill modes are conservative and generate one pixel of coverage // anyways. x_range = clamp(x_range, scissor_x.xx, scissor_x.yy); x_range.x = max(x_range.x, lo.x); x_range.y = min(x_range.y, hi.x); return x_range.x <= x_range.y; } #endif