mirror of
https://gitlab.com/suyu-emu/suyu.git
synced 2024-03-15 23:15:44 +00:00
Merge pull request #358 from neobrain/pica_progress2
pica_progress followups
This commit is contained in:
commit
7c8f6ca051
|
@ -10,6 +10,7 @@
|
|||
#include <QPushButton>
|
||||
#include <QSpinBox>
|
||||
|
||||
#include "video_core/color.h"
|
||||
#include "video_core/pica.h"
|
||||
|
||||
#include "graphics_framebuffer.hxx"
|
||||
|
@ -202,7 +203,8 @@ void GraphicsFramebufferWidget::OnUpdate()
|
|||
framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
|
||||
framebuffer_width = framebuffer.GetWidth();
|
||||
framebuffer_height = framebuffer.GetHeight();
|
||||
framebuffer_format = static_cast<Format>(framebuffer.color_format);
|
||||
// TODO: It's unknown how this format is actually specified
|
||||
framebuffer_format = Format::RGBA8;
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -258,10 +260,10 @@ void GraphicsFramebufferWidget::OnUpdate()
|
|||
for (unsigned y = 0; y < framebuffer_height; ++y) {
|
||||
for (unsigned x = 0; x < framebuffer_width; ++x) {
|
||||
u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2);
|
||||
u8 r = (value >> 11) & 0x1F;
|
||||
u8 g = (value >> 6) & 0x1F;
|
||||
u8 b = (value >> 1) & 0x1F;
|
||||
u8 a = value & 1;
|
||||
u8 r = Color::Convert5To8((value >> 11) & 0x1F);
|
||||
u8 g = Color::Convert5To8((value >> 6) & 0x1F);
|
||||
u8 b = Color::Convert5To8((value >> 1) & 0x1F);
|
||||
u8 a = Color::Convert1To8(value & 1);
|
||||
|
||||
decoded_image.setPixel(x, y, qRgba(r, g, b, 255/*a*/));
|
||||
}
|
||||
|
|
|
@ -94,11 +94,15 @@ inline void Write(u32 addr, const T data) {
|
|||
int r, g, b, a;
|
||||
} source_color = { 0, 0, 0, 0 };
|
||||
|
||||
// Cheap emulation of horizontal scaling: Just skip each second pixel of the
|
||||
// input framebuffer. We keep track of this in the pixel_skip variable.
|
||||
unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1;
|
||||
|
||||
switch (config.input_format) {
|
||||
case Regs::PixelFormat::RGBA8:
|
||||
{
|
||||
// TODO: Most likely got the component order messed up.
|
||||
u8* srcptr = source_pointer + x * 4 + y * config.input_width * 4;
|
||||
u8* srcptr = source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip;
|
||||
source_color.r = srcptr[0]; // blue
|
||||
source_color.g = srcptr[1]; // green
|
||||
source_color.b = srcptr[2]; // red
|
||||
|
|
|
@ -157,6 +157,9 @@ struct Regs {
|
|||
BitField< 8, 3, PixelFormat> input_format;
|
||||
BitField<12, 3, PixelFormat> output_format;
|
||||
BitField<16, 1, u32> output_tiled; // stores output in a tiled format
|
||||
|
||||
// TODO: Not really sure if this actually scales, or even resizes at all.
|
||||
BitField<24, 1, u32> scale_horizontally;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x1);
|
||||
|
|
32
src/video_core/color.h
Normal file
32
src/video_core/color.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Color {
|
||||
|
||||
/// Convert a 1-bit color component to 8 bit
|
||||
static inline u8 Convert1To8(u8 value) {
|
||||
return value * 255;
|
||||
}
|
||||
|
||||
/// Convert a 4-bit color component to 8 bit
|
||||
static inline u8 Convert4To8(u8 value) {
|
||||
return (value << 4) | value;
|
||||
}
|
||||
|
||||
/// Convert a 5-bit color component to 8 bit
|
||||
static inline u8 Convert5To8(u8 value) {
|
||||
return (value << 3) | (value >> 2);
|
||||
}
|
||||
|
||||
/// Convert a 6-bit color component to 8 bit
|
||||
static inline u8 Convert6To8(u8 value) {
|
||||
return (value << 2) | (value >> 4);
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
|
@ -112,6 +112,11 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||
// Initialize data for the current vertex
|
||||
VertexShader::InputVertex input;
|
||||
|
||||
// Load a debugging token to check whether this gets loaded by the running
|
||||
// application or not.
|
||||
static const float24 debug_token = float24::FromRawFloat24(0x00abcdef);
|
||||
input.attr[0].w = debug_token;
|
||||
|
||||
for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
|
||||
for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
|
||||
const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]));
|
||||
|
@ -136,6 +141,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||
}
|
||||
}
|
||||
|
||||
// HACK: Some games do not initialize the vertex position's w component. This leads
|
||||
// to critical issues since it messes up perspective division. As a
|
||||
// workaround, we force the fourth component to 1.0 if we find this to be the
|
||||
// case.
|
||||
// To do this, we additionally have to assume that the first input attribute
|
||||
// is the vertex position, since there's no information about this other than
|
||||
// the empiric observation that this is usually the case.
|
||||
if (input.attr[0].w == debug_token)
|
||||
input.attr[0].w = float24::FromFloat32(1.0);
|
||||
|
||||
if (g_debug_context)
|
||||
g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
|
||||
|
||||
|
@ -173,6 +188,19 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
|
|||
|
||||
break;
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4):
|
||||
{
|
||||
int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1));
|
||||
auto values = registers.vs_int_uniforms[index];
|
||||
VertexShader::GetIntUniform(index) = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
|
||||
LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x",
|
||||
index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
|
||||
break;
|
||||
}
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2):
|
||||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3):
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "common/log.h"
|
||||
#include "common/file_util.h"
|
||||
|
||||
#include "video_core/color.h"
|
||||
#include "video_core/math.h"
|
||||
#include "video_core/pica.h"
|
||||
|
||||
|
@ -359,29 +360,26 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
|
|||
u8 g = ((source_ptr) >> 6) & 0x1F;
|
||||
u8 b = (source_ptr >> 1) & 0x1F;
|
||||
u8 a = source_ptr & 1;
|
||||
return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), disable_alpha ? 255 : (a * 255));
|
||||
return Math::MakeVec<u8>(Color::Convert5To8(r), Color::Convert5To8(g),
|
||||
Color::Convert5To8(b), disable_alpha ? 255 : Color::Convert1To8(a));
|
||||
}
|
||||
|
||||
case Regs::TextureFormat::RGB565:
|
||||
{
|
||||
const u16 source_ptr = *(const u16*)(source + offset * 2);
|
||||
u8 r = (source_ptr >> 11) & 0x1F;
|
||||
u8 g = ((source_ptr) >> 5) & 0x3F;
|
||||
u8 b = (source_ptr) & 0x1F;
|
||||
return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 2) | (g >> 4), (b << 3) | (b >> 2), 255);
|
||||
u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F);
|
||||
u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F);
|
||||
u8 b = Color::Convert5To8((source_ptr) & 0x1F);
|
||||
return Math::MakeVec<u8>(r, g, b, 255);
|
||||
}
|
||||
|
||||
case Regs::TextureFormat::RGBA4:
|
||||
{
|
||||
const u8* source_ptr = source + offset * 2;
|
||||
u8 r = source_ptr[1] >> 4;
|
||||
u8 g = source_ptr[1] & 0xFF;
|
||||
u8 b = source_ptr[0] >> 4;
|
||||
u8 a = source_ptr[0] & 0xFF;
|
||||
r = (r << 4) | r;
|
||||
g = (g << 4) | g;
|
||||
b = (b << 4) | b;
|
||||
a = (a << 4) | a;
|
||||
u8 r = Color::Convert4To8(source_ptr[1] >> 4);
|
||||
u8 g = Color::Convert4To8(source_ptr[1] & 0xF);
|
||||
u8 b = Color::Convert4To8(source_ptr[0] >> 4);
|
||||
u8 a = Color::Convert4To8(source_ptr[0] & 0xF);
|
||||
return { r, g, b, disable_alpha ? (u8)255 : a };
|
||||
}
|
||||
|
||||
|
@ -389,13 +387,11 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
|
|||
{
|
||||
const u8* source_ptr = source + offset * 2;
|
||||
|
||||
// TODO: component order not verified
|
||||
|
||||
if (disable_alpha) {
|
||||
// Show intensity as red, alpha as green
|
||||
return { source_ptr[0], source_ptr[1], 0, 255 };
|
||||
return { source_ptr[1], source_ptr[0], 0, 255 };
|
||||
} else {
|
||||
return { source_ptr[0], source_ptr[0], source_ptr[0], source_ptr[1]};
|
||||
return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]};
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -418,14 +414,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
|
|||
|
||||
case Regs::TextureFormat::IA4:
|
||||
{
|
||||
const u8* source_ptr = source + offset / 2;
|
||||
const u8* source_ptr = source + offset;
|
||||
|
||||
// TODO: component order not verified
|
||||
|
||||
u8 i = (*source_ptr) & 0xF;
|
||||
u8 a = ((*source_ptr) & 0xF0) >> 4;
|
||||
a |= a << 4;
|
||||
i |= i << 4;
|
||||
u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
|
||||
u8 a = Color::Convert4To8((*source_ptr) & 0xF);
|
||||
|
||||
if (disable_alpha) {
|
||||
// Show intensity as red, alpha as green
|
||||
|
@ -439,15 +431,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
|
|||
{
|
||||
const u8* source_ptr = source + offset / 2;
|
||||
|
||||
// TODO: component order not verified
|
||||
|
||||
u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4);
|
||||
a |= a << 4;
|
||||
a = Color::Convert4To8(a);
|
||||
|
||||
if (disable_alpha) {
|
||||
return { *source_ptr, *source_ptr, *source_ptr, 255 };
|
||||
return { a, a, a, 255 };
|
||||
} else {
|
||||
return { 0, 0, 0, *source_ptr };
|
||||
return { 0, 0, 0, a };
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -50,7 +50,19 @@ struct Regs {
|
|||
|
||||
u32 trigger_irq;
|
||||
|
||||
INSERT_PADDING_WORDS(0x30);
|
||||
INSERT_PADDING_WORDS(0x2f);
|
||||
|
||||
enum class CullMode : u32 {
|
||||
// Select which polygons are considered to be "frontfacing".
|
||||
KeepAll = 0,
|
||||
KeepClockWise = 1,
|
||||
KeepCounterClockWise = 2,
|
||||
// TODO: What does the third value imply?
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 2, CullMode> cull_mode;
|
||||
};
|
||||
|
||||
BitField<0, 24, u32> viewport_size_x;
|
||||
|
||||
|
@ -289,7 +301,7 @@ struct Regs {
|
|||
TevStageConfig tev_stage4;
|
||||
INSERT_PADDING_WORDS(0x3);
|
||||
TevStageConfig tev_stage5;
|
||||
INSERT_PADDING_WORDS(0x13);
|
||||
INSERT_PADDING_WORDS(0x3);
|
||||
|
||||
const std::array<Regs::TevStageConfig,6> GetTevStages() const {
|
||||
return { tev_stage0, tev_stage1,
|
||||
|
@ -297,6 +309,60 @@ struct Regs {
|
|||
tev_stage4, tev_stage5 };
|
||||
};
|
||||
|
||||
struct {
|
||||
enum DepthFunc : u32 {
|
||||
Always = 1,
|
||||
LessThan = 4,
|
||||
GreaterThan = 6,
|
||||
};
|
||||
|
||||
union {
|
||||
// If false, logic blending is used
|
||||
BitField<8, 1, u32> alphablend_enable;
|
||||
};
|
||||
|
||||
union {
|
||||
enum BlendEquation : u32 {
|
||||
Add = 0,
|
||||
};
|
||||
|
||||
enum BlendFactor : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
|
||||
SourceAlpha = 6,
|
||||
OneMinusSourceAlpha = 7,
|
||||
};
|
||||
|
||||
BitField< 0, 8, BlendEquation> blend_equation_rgb;
|
||||
BitField< 8, 8, BlendEquation> blend_equation_a;
|
||||
|
||||
BitField<16, 4, BlendFactor> factor_source_rgb;
|
||||
BitField<20, 4, BlendFactor> factor_dest_rgb;
|
||||
|
||||
BitField<24, 4, BlendFactor> factor_source_a;
|
||||
BitField<28, 4, BlendFactor> factor_dest_a;
|
||||
} alpha_blending;
|
||||
|
||||
union {
|
||||
enum Op {
|
||||
Set = 4,
|
||||
};
|
||||
|
||||
BitField<0, 4, Op> op;
|
||||
} logic_op;
|
||||
|
||||
INSERT_PADDING_WORDS(0x4);
|
||||
|
||||
union {
|
||||
BitField< 0, 1, u32> depth_test_enable;
|
||||
BitField< 4, 3, DepthFunc> depth_test_func;
|
||||
BitField<12, 1, u32> depth_write_enable;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
} output_merger;
|
||||
|
||||
struct {
|
||||
enum ColorFormat : u32 {
|
||||
RGBA8 = 0,
|
||||
|
@ -495,8 +561,14 @@ struct Regs {
|
|||
INSERT_PADDING_WORDS(0x51);
|
||||
|
||||
BitField<0, 16, u32> vs_bool_uniforms;
|
||||
union {
|
||||
BitField< 0, 8, u32> x;
|
||||
BitField< 8, 8, u32> y;
|
||||
BitField<16, 8, u32> z;
|
||||
BitField<24, 8, u32> w;
|
||||
} vs_int_uniforms[4];
|
||||
|
||||
INSERT_PADDING_WORDS(0x9);
|
||||
INSERT_PADDING_WORDS(0x5);
|
||||
|
||||
// Offset to shader program entry point (in words)
|
||||
BitField<0, 16, u32> vs_main_offset;
|
||||
|
@ -599,6 +671,7 @@ struct Regs {
|
|||
} while(false)
|
||||
|
||||
ADD_FIELD(trigger_irq);
|
||||
ADD_FIELD(cull_mode);
|
||||
ADD_FIELD(viewport_size_x);
|
||||
ADD_FIELD(viewport_size_y);
|
||||
ADD_FIELD(viewport_depth_range);
|
||||
|
@ -617,6 +690,7 @@ struct Regs {
|
|||
ADD_FIELD(tev_stage3);
|
||||
ADD_FIELD(tev_stage4);
|
||||
ADD_FIELD(tev_stage5);
|
||||
ADD_FIELD(output_merger);
|
||||
ADD_FIELD(framebuffer);
|
||||
ADD_FIELD(vertex_attributes);
|
||||
ADD_FIELD(index_array);
|
||||
|
@ -625,6 +699,7 @@ struct Regs {
|
|||
ADD_FIELD(trigger_draw_indexed);
|
||||
ADD_FIELD(triangle_topology);
|
||||
ADD_FIELD(vs_bool_uniforms);
|
||||
ADD_FIELD(vs_int_uniforms);
|
||||
ADD_FIELD(vs_main_offset);
|
||||
ADD_FIELD(vs_input_register_map);
|
||||
ADD_FIELD(vs_uniform_setup);
|
||||
|
@ -668,6 +743,7 @@ private:
|
|||
#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(trigger_irq, 0x10);
|
||||
ASSERT_REG_POSITION(cull_mode, 0x40);
|
||||
ASSERT_REG_POSITION(viewport_size_x, 0x41);
|
||||
ASSERT_REG_POSITION(viewport_size_y, 0x43);
|
||||
ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
|
||||
|
@ -688,6 +764,7 @@ ASSERT_REG_POSITION(tev_stage2, 0xd0);
|
|||
ASSERT_REG_POSITION(tev_stage3, 0xd8);
|
||||
ASSERT_REG_POSITION(tev_stage4, 0xf0);
|
||||
ASSERT_REG_POSITION(tev_stage5, 0xf8);
|
||||
ASSERT_REG_POSITION(output_merger, 0x100);
|
||||
ASSERT_REG_POSITION(framebuffer, 0x110);
|
||||
ASSERT_REG_POSITION(vertex_attributes, 0x200);
|
||||
ASSERT_REG_POSITION(index_array, 0x227);
|
||||
|
@ -696,6 +773,7 @@ ASSERT_REG_POSITION(trigger_draw, 0x22e);
|
|||
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
|
||||
ASSERT_REG_POSITION(triangle_topology, 0x25e);
|
||||
ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0);
|
||||
ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1);
|
||||
ASSERT_REG_POSITION(vs_main_offset, 0x2ba);
|
||||
ASSERT_REG_POSITION(vs_input_register_map, 0x2bb);
|
||||
ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0);
|
||||
|
|
|
@ -18,51 +18,82 @@ namespace Pica {
|
|||
namespace Rasterizer {
|
||||
|
||||
static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
|
||||
u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
|
||||
const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
|
||||
u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
|
||||
u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
|
||||
|
||||
// Assuming RGBA8 format until actual framebuffer format handling is implemented
|
||||
*(color_buffer + x + y * registers.framebuffer.GetWidth()) = value;
|
||||
}
|
||||
|
||||
static const Math::Vec4<u8> GetPixel(int x, int y) {
|
||||
const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
|
||||
u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
|
||||
|
||||
u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth());
|
||||
Math::Vec4<u8> ret;
|
||||
ret.a() = value >> 24;
|
||||
ret.r() = (value >> 16) & 0xFF;
|
||||
ret.g() = (value >> 8) & 0xFF;
|
||||
ret.b() = value & 0xFF;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u32 GetDepth(int x, int y) {
|
||||
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
|
||||
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
|
||||
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
|
||||
|
||||
// Assuming 16-bit depth buffer format until actual format handling is implemented
|
||||
return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
|
||||
}
|
||||
|
||||
static void SetDepth(int x, int y, u16 value) {
|
||||
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
|
||||
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
|
||||
u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
|
||||
|
||||
// Assuming 16-bit depth buffer format until actual format handling is implemented
|
||||
*(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
|
||||
}
|
||||
|
||||
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
|
||||
struct Fix12P4 {
|
||||
Fix12P4() {}
|
||||
Fix12P4(u16 val) : val(val) {}
|
||||
|
||||
static u16 FracMask() { return 0xF; }
|
||||
static u16 IntMask() { return (u16)~0xF; }
|
||||
|
||||
operator u16() const {
|
||||
return val;
|
||||
}
|
||||
|
||||
bool operator < (const Fix12P4& oth) const {
|
||||
return (u16)*this < (u16)oth;
|
||||
}
|
||||
|
||||
private:
|
||||
u16 val;
|
||||
};
|
||||
|
||||
/**
|
||||
* Calculate signed area of the triangle spanned by the three argument vertices.
|
||||
* The sign denotes an orientation.
|
||||
*
|
||||
* @todo define orientation concretely.
|
||||
*/
|
||||
static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
|
||||
const Math::Vec2<Fix12P4>& vtx2,
|
||||
const Math::Vec2<Fix12P4>& vtx3) {
|
||||
const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
|
||||
const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
|
||||
// TODO: There is a very small chance this will overflow for sizeof(int) == 4
|
||||
return Math::Cross(vec1, vec2).z;
|
||||
};
|
||||
|
||||
void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
||||
const VertexShader::OutputVertex& v1,
|
||||
const VertexShader::OutputVertex& v2)
|
||||
{
|
||||
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
|
||||
struct Fix12P4 {
|
||||
Fix12P4() {}
|
||||
Fix12P4(u16 val) : val(val) {}
|
||||
|
||||
static u16 FracMask() { return 0xF; }
|
||||
static u16 IntMask() { return (u16)~0xF; }
|
||||
|
||||
operator u16() const {
|
||||
return val;
|
||||
}
|
||||
|
||||
bool operator < (const Fix12P4& oth) const {
|
||||
return (u16)*this < (u16)oth;
|
||||
}
|
||||
|
||||
private:
|
||||
u16 val;
|
||||
};
|
||||
|
||||
// vertex positions in rasterizer coordinates
|
||||
auto FloatToFix = [](float24 flt) {
|
||||
return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f));
|
||||
|
@ -70,10 +101,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
|||
auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) {
|
||||
return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
|
||||
};
|
||||
|
||||
Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos),
|
||||
ScreenToRasterizerCoordinates(v1.screenpos),
|
||||
ScreenToRasterizerCoordinates(v2.screenpos) };
|
||||
|
||||
if (registers.cull_mode == Regs::CullMode::KeepClockWise) {
|
||||
// Reverse vertex order and use the CCW code path.
|
||||
std::swap(vtxpos[1], vtxpos[2]);
|
||||
}
|
||||
|
||||
if (registers.cull_mode != Regs::CullMode::KeepAll) {
|
||||
// Cull away triangles which are wound clockwise.
|
||||
// TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
|
||||
if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: Proper scissor rect test!
|
||||
u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
|
||||
u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
|
||||
|
@ -116,18 +160,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
|||
for (u16 x = min_x; x < max_x; x += 0x10) {
|
||||
|
||||
// Calculate the barycentric coordinates w0, w1 and w2
|
||||
auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
|
||||
const Math::Vec2<Fix12P4>& vtx2,
|
||||
const Math::Vec2<Fix12P4>& vtx3) {
|
||||
const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
|
||||
const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
|
||||
// TODO: There is a very small chance this will overflow for sizeof(int) == 4
|
||||
return Math::Cross(vec1, vec2).z;
|
||||
};
|
||||
|
||||
int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
|
||||
int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
|
||||
int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
|
||||
int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
|
||||
int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
|
||||
int w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
|
||||
int wsum = w0 + w1 + w2;
|
||||
|
||||
// If current pixel is not covered by the current primitive
|
||||
|
@ -201,8 +236,8 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
|||
return 0;
|
||||
}
|
||||
};
|
||||
s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
|
||||
t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
|
||||
s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
|
||||
t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
|
||||
|
||||
u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
|
||||
auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
|
||||
|
@ -279,12 +314,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
|||
}
|
||||
};
|
||||
|
||||
auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
|
||||
static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
|
||||
switch (factor)
|
||||
{
|
||||
case ColorModifier::SourceColor:
|
||||
return values.rgb();
|
||||
|
||||
case ColorModifier::OneMinusSourceColor:
|
||||
return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
|
||||
|
||||
case ColorModifier::SourceAlpha:
|
||||
return { values.a(), values.a(), values.a() };
|
||||
|
||||
|
@ -295,7 +333,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
|||
}
|
||||
};
|
||||
|
||||
auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
|
||||
static auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
|
||||
switch (factor) {
|
||||
case AlphaModifier::SourceAlpha:
|
||||
return value;
|
||||
|
@ -310,7 +348,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
|||
}
|
||||
};
|
||||
|
||||
auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
|
||||
static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
|
||||
switch (op) {
|
||||
case Operation::Replace:
|
||||
return input[0];
|
||||
|
@ -330,6 +368,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
|||
case Operation::Lerp:
|
||||
return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
|
||||
|
||||
case Operation::Subtract:
|
||||
{
|
||||
auto result = input[0].Cast<int>() - input[1].Cast<int>();
|
||||
result.r() = std::max(0, result.r());
|
||||
result.g() = std::max(0, result.g());
|
||||
result.b() = std::max(0, result.b());
|
||||
return result.Cast<u8>();
|
||||
}
|
||||
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
|
||||
_dbg_assert_(HW_GPU, 0);
|
||||
|
@ -337,7 +384,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
|||
}
|
||||
};
|
||||
|
||||
auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
|
||||
static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
|
||||
switch (op) {
|
||||
case Operation::Replace:
|
||||
return input[0];
|
||||
|
@ -351,6 +398,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
|||
case Operation::Lerp:
|
||||
return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
|
||||
|
||||
case Operation::Subtract:
|
||||
return std::max(0, (int)input[0] - (int)input[1]);
|
||||
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
|
||||
_dbg_assert_(HW_GPU, 0);
|
||||
|
@ -381,12 +431,111 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
|||
combiner_output = Math::MakeVec(color_output, alpha_output);
|
||||
}
|
||||
|
||||
// TODO: Not sure if the multiplication by 65535 has already been taken care
|
||||
// of when transforming to screen coordinates or not.
|
||||
u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
|
||||
(float)v1.screenpos[2].ToFloat32() * w1 +
|
||||
(float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
|
||||
SetDepth(x >> 4, y >> 4, z);
|
||||
// TODO: Does depth indeed only get written even if depth testing is enabled?
|
||||
if (registers.output_merger.depth_test_enable) {
|
||||
u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 +
|
||||
v1.screenpos[2].ToFloat32() * w1 +
|
||||
v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
|
||||
u16 ref_z = GetDepth(x >> 4, y >> 4);
|
||||
|
||||
bool pass = false;
|
||||
|
||||
switch (registers.output_merger.depth_test_func) {
|
||||
case registers.output_merger.Always:
|
||||
pass = true;
|
||||
break;
|
||||
|
||||
case registers.output_merger.LessThan:
|
||||
pass = z < ref_z;
|
||||
break;
|
||||
|
||||
case registers.output_merger.GreaterThan:
|
||||
pass = z > ref_z;
|
||||
break;
|
||||
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Unknown depth test function %x", registers.output_merger.depth_test_func.Value());
|
||||
break;
|
||||
}
|
||||
|
||||
if (!pass)
|
||||
continue;
|
||||
|
||||
if (registers.output_merger.depth_write_enable)
|
||||
SetDepth(x >> 4, y >> 4, z);
|
||||
}
|
||||
|
||||
auto dest = GetPixel(x >> 4, y >> 4);
|
||||
|
||||
if (registers.output_merger.alphablend_enable) {
|
||||
auto params = registers.output_merger.alpha_blending;
|
||||
|
||||
auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> {
|
||||
switch(factor) {
|
||||
case params.Zero:
|
||||
return Math::Vec3<u8>(0, 0, 0);
|
||||
|
||||
case params.One:
|
||||
return Math::Vec3<u8>(255, 255, 255);
|
||||
|
||||
case params.SourceAlpha:
|
||||
return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a());
|
||||
|
||||
case params.OneMinusSourceAlpha:
|
||||
return Math::Vec3<u8>(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a());
|
||||
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
|
||||
exit(0);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 {
|
||||
switch(factor) {
|
||||
case params.Zero:
|
||||
return 0;
|
||||
|
||||
case params.One:
|
||||
return 255;
|
||||
|
||||
case params.SourceAlpha:
|
||||
return combiner_output.a();
|
||||
|
||||
case params.OneMinusSourceAlpha:
|
||||
return 255 - combiner_output.a();
|
||||
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor);
|
||||
exit(0);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb),
|
||||
LookupFactorA(params.factor_source_a));
|
||||
auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb),
|
||||
LookupFactorA(params.factor_dest_a));
|
||||
|
||||
switch (params.blend_equation_rgb) {
|
||||
case params.Add:
|
||||
{
|
||||
auto result = (combiner_output * srcfactor + dest * dstfactor) / 255;
|
||||
result.r() = std::min(255, result.r());
|
||||
result.g() = std::min(255, result.g());
|
||||
result.b() = std::min(255, result.b());
|
||||
combiner_output = result.Cast<u8>();
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value());
|
||||
exit(0);
|
||||
}
|
||||
} else {
|
||||
LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
DrawPixel(x >> 4, y >> 4, combiner_output);
|
||||
}
|
||||
|
|
|
@ -8,32 +8,6 @@
|
|||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace FormatPrecision {
|
||||
|
||||
/// Adjust RGBA8 color with RGBA6 precision
|
||||
static inline u32 rgba8_with_rgba6(u32 src) {
|
||||
u32 color = src;
|
||||
color &= 0xFCFCFCFC;
|
||||
color |= (color >> 6) & 0x03030303;
|
||||
return color;
|
||||
}
|
||||
|
||||
/// Adjust RGBA8 color with RGB565 precision
|
||||
static inline u32 rgba8_with_rgb565(u32 src) {
|
||||
u32 color = (src & 0xF8FCF8);
|
||||
color |= (color >> 5) & 0x070007;
|
||||
color |= (color >> 6) & 0x000300;
|
||||
color |= 0xFF000000;
|
||||
return color;
|
||||
}
|
||||
|
||||
/// Adjust Z24 depth value with Z16 precision
|
||||
static inline u32 z24_with_z16(u32 src) {
|
||||
return (src & 0xFFFF00) | (src >> 16);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
/// Structure for the TGA texture format (for dumping)
|
||||
|
|
|
@ -30,6 +30,8 @@ static struct {
|
|||
Math::Vec4<float24> f[96];
|
||||
|
||||
std::array<bool,16> b;
|
||||
|
||||
std::array<Math::Vec4<u8>,4> i;
|
||||
} shader_uniforms;
|
||||
|
||||
// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
|
||||
|
@ -37,33 +39,31 @@ static struct {
|
|||
static std::array<u32, 1024> shader_memory;
|
||||
static std::array<u32, 1024> swizzle_data;
|
||||
|
||||
void SubmitShaderMemoryChange(u32 addr, u32 value)
|
||||
{
|
||||
void SubmitShaderMemoryChange(u32 addr, u32 value) {
|
||||
shader_memory[addr] = value;
|
||||
}
|
||||
|
||||
void SubmitSwizzleDataChange(u32 addr, u32 value)
|
||||
{
|
||||
void SubmitSwizzleDataChange(u32 addr, u32 value) {
|
||||
swizzle_data[addr] = value;
|
||||
}
|
||||
|
||||
Math::Vec4<float24>& GetFloatUniform(u32 index)
|
||||
{
|
||||
Math::Vec4<float24>& GetFloatUniform(u32 index) {
|
||||
return shader_uniforms.f[index];
|
||||
}
|
||||
|
||||
bool& GetBoolUniform(u32 index)
|
||||
{
|
||||
bool& GetBoolUniform(u32 index) {
|
||||
return shader_uniforms.b[index];
|
||||
}
|
||||
|
||||
const std::array<u32, 1024>& GetShaderBinary()
|
||||
{
|
||||
Math::Vec4<u8>& GetIntUniform(u32 index) {
|
||||
return shader_uniforms.i[index];
|
||||
}
|
||||
|
||||
const std::array<u32, 1024>& GetShaderBinary() {
|
||||
return shader_memory;
|
||||
}
|
||||
|
||||
const std::array<u32, 1024>& GetSwizzlePatterns()
|
||||
{
|
||||
const std::array<u32, 1024>& GetSwizzlePatterns() {
|
||||
return swizzle_data;
|
||||
}
|
||||
|
||||
|
@ -437,8 +437,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
|||
}
|
||||
}
|
||||
|
||||
OutputVertex RunShader(const InputVertex& input, int num_attributes)
|
||||
{
|
||||
OutputVertex RunShader(const InputVertex& input, int num_attributes) {
|
||||
VertexShaderState state;
|
||||
|
||||
const u32* main = &shader_memory[registers.vs_main_offset];
|
||||
|
|
|
@ -73,6 +73,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes);
|
|||
|
||||
Math::Vec4<float24>& GetFloatUniform(u32 index);
|
||||
bool& GetBoolUniform(u32 index);
|
||||
Math::Vec4<u8>& GetIntUniform(u32 index);
|
||||
|
||||
const std::array<u32, 1024>& GetShaderBinary();
|
||||
const std::array<u32, 1024>& GetSwizzlePatterns();
|
||||
|
|
Loading…
Reference in a new issue