mirror of
https://gitlab.com/suyu-emu/suyu.git
synced 2024-03-15 23:15:44 +00:00
shader: Add partial rasterizer integration
This commit is contained in:
parent
72990df7ba
commit
260743f371
|
@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC
|
|||
frontend/maxwell/translate/impl/common_funcs.h
|
||||
frontend/maxwell/translate/impl/condition_code_set.cpp
|
||||
frontend/maxwell/translate/impl/double_add.cpp
|
||||
frontend/maxwell/translate/impl/exit_program.cpp
|
||||
frontend/maxwell/translate/impl/find_leading_one.cpp
|
||||
frontend/maxwell/translate/impl/floating_point_add.cpp
|
||||
frontend/maxwell/translate/impl/floating_point_compare.cpp
|
||||
|
@ -121,9 +122,8 @@ add_library(shader_recompiler STATIC
|
|||
ir_opt/texture_pass.cpp
|
||||
ir_opt/verification_pass.cpp
|
||||
object_pool.h
|
||||
program_header.h
|
||||
profile.h
|
||||
recompiler.cpp
|
||||
recompiler.h
|
||||
shader_info.h
|
||||
)
|
||||
|
||||
|
|
|
@ -62,18 +62,15 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
|
|||
}
|
||||
}
|
||||
|
||||
EmitContext::EmitContext(const Profile& profile_, IR::Program& program)
|
||||
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding)
|
||||
: Sirit::Module(0x00010000), profile{profile_} {
|
||||
AddCapability(spv::Capability::Shader);
|
||||
DefineCommonTypes(program.info);
|
||||
DefineCommonConstants();
|
||||
DefineSpecialVariables(program.info);
|
||||
|
||||
u32 binding{};
|
||||
DefineInterfaces(program.info, program.stage);
|
||||
DefineConstantBuffers(program.info, binding);
|
||||
DefineStorageBuffers(program.info, binding);
|
||||
DefineTextures(program.info, binding);
|
||||
|
||||
DefineLabels(program);
|
||||
}
|
||||
|
||||
|
@ -96,6 +93,8 @@ Id EmitContext::Def(const IR::Value& value) {
|
|||
return Constant(F32[1], value.F32());
|
||||
case IR::Type::F64:
|
||||
return Constant(F64[1], value.F64());
|
||||
case IR::Type::Label:
|
||||
return value.Label()->Definition<Id>();
|
||||
default:
|
||||
throw NotImplementedException("Immediate type {}", value.Type());
|
||||
}
|
||||
|
@ -109,6 +108,9 @@ void EmitContext::DefineCommonTypes(const Info& info) {
|
|||
F32.Define(*this, TypeFloat(32), "f32");
|
||||
U32.Define(*this, TypeInt(32, false), "u32");
|
||||
|
||||
input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
|
||||
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
|
||||
|
||||
if (info.uses_int8) {
|
||||
AddCapability(spv::Capability::Int8);
|
||||
U8 = Name(TypeInt(8, false), "u8");
|
||||
|
@ -139,15 +141,20 @@ void EmitContext::DefineCommonConstants() {
|
|||
u32_zero_value = Constant(U32[1], 0U);
|
||||
}
|
||||
|
||||
void EmitContext::DefineSpecialVariables(const Info& info) {
|
||||
const auto define{[this](Id type, spv::BuiltIn builtin, spv::StorageClass storage_class) {
|
||||
const Id pointer_type{TypePointer(storage_class, type)};
|
||||
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::Input)};
|
||||
Decorate(id, spv::Decoration::BuiltIn, builtin);
|
||||
return id;
|
||||
}};
|
||||
void EmitContext::DefineInterfaces(const Info& info, Stage stage) {
|
||||
const auto define{
|
||||
[this](Id type, std::optional<spv::BuiltIn> builtin, spv::StorageClass storage_class) {
|
||||
const Id pointer_type{TypePointer(storage_class, type)};
|
||||
const Id id{AddGlobalVariable(pointer_type, storage_class)};
|
||||
if (builtin) {
|
||||
Decorate(id, spv::Decoration::BuiltIn, *builtin);
|
||||
}
|
||||
interfaces.push_back(id);
|
||||
return id;
|
||||
}};
|
||||
using namespace std::placeholders;
|
||||
const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)};
|
||||
const auto define_output{std::bind(define, _1, _2, spv::StorageClass::Output)};
|
||||
|
||||
if (info.uses_workgroup_id) {
|
||||
workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId);
|
||||
|
@ -155,6 +162,39 @@ void EmitContext::DefineSpecialVariables(const Info& info) {
|
|||
if (info.uses_local_invocation_id) {
|
||||
local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId);
|
||||
}
|
||||
if (info.loads_position) {
|
||||
const bool is_fragment{stage != Stage::Fragment};
|
||||
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
|
||||
input_position = define_input(F32[4], built_in);
|
||||
}
|
||||
for (size_t i = 0; i < info.loads_generics.size(); ++i) {
|
||||
if (info.loads_generics[i]) {
|
||||
// FIXME: Declare size from input
|
||||
input_generics[i] = define_input(F32[4], std::nullopt);
|
||||
Decorate(input_generics[i], spv::Decoration::Location, static_cast<u32>(i));
|
||||
Name(input_generics[i], fmt::format("in_attr{}", i));
|
||||
}
|
||||
}
|
||||
if (info.stores_position) {
|
||||
output_position = define_output(F32[4], spv::BuiltIn::Position);
|
||||
}
|
||||
for (size_t i = 0; i < info.stores_generics.size(); ++i) {
|
||||
if (info.stores_generics[i]) {
|
||||
output_generics[i] = define_output(F32[4], std::nullopt);
|
||||
Decorate(output_generics[i], spv::Decoration::Location, static_cast<u32>(i));
|
||||
Name(output_generics[i], fmt::format("out_attr{}", i));
|
||||
}
|
||||
}
|
||||
if (stage == Stage::Fragment) {
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
if (!info.stores_frag_color[i]) {
|
||||
continue;
|
||||
}
|
||||
frag_color[i] = define_output(F32[4], std::nullopt);
|
||||
Decorate(frag_color[i], spv::Decoration::Location, static_cast<u32>(i));
|
||||
Name(frag_color[i], fmt::format("frag_color{}", i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
|
||||
|
|
|
@ -46,7 +46,7 @@ struct UniformDefinitions {
|
|||
|
||||
class EmitContext final : public Sirit::Module {
|
||||
public:
|
||||
explicit EmitContext(const Profile& profile, IR::Program& program);
|
||||
explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding);
|
||||
~EmitContext();
|
||||
|
||||
[[nodiscard]] Id Def(const IR::Value& value);
|
||||
|
@ -71,6 +71,9 @@ public:
|
|||
|
||||
UniformDefinitions uniform_types;
|
||||
|
||||
Id input_f32{};
|
||||
Id output_f32{};
|
||||
|
||||
Id storage_u32{};
|
||||
|
||||
std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
|
||||
|
@ -80,10 +83,21 @@ public:
|
|||
Id workgroup_id{};
|
||||
Id local_invocation_id{};
|
||||
|
||||
Id input_position{};
|
||||
std::array<Id, 32> input_generics{};
|
||||
|
||||
Id output_position{};
|
||||
std::array<Id, 32> output_generics{};
|
||||
|
||||
std::array<Id, 8> frag_color{};
|
||||
Id frag_depth {};
|
||||
|
||||
std::vector<Id> interfaces;
|
||||
|
||||
private:
|
||||
void DefineCommonTypes(const Info& info);
|
||||
void DefineCommonConstants();
|
||||
void DefineSpecialVariables(const Info& info);
|
||||
void DefineInterfaces(const Info& info, Stage stage);
|
||||
void DefineConstantBuffers(const Info& info, u32& binding);
|
||||
void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding,
|
||||
Id type, char type_char, u32 element_size);
|
||||
|
|
|
@ -54,6 +54,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
|
|||
return arg.U32();
|
||||
} else if constexpr (std::is_same_v<ArgType, IR::Block*>) {
|
||||
return arg.Label();
|
||||
} else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
|
||||
return arg.Attribute();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -197,8 +199,9 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) {
|
|||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) {
|
||||
EmitContext ctx{profile, program};
|
||||
std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program,
|
||||
u32& binding) {
|
||||
EmitContext ctx{profile, program, binding};
|
||||
const Id void_function{ctx.TypeFunction(ctx.void_id)};
|
||||
const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)};
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
|
@ -208,28 +211,41 @@ std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program
|
|||
}
|
||||
}
|
||||
ctx.OpFunctionEnd();
|
||||
boost::container::small_vector<Id, 32> interfaces;
|
||||
const Info& info{program.info};
|
||||
if (info.uses_workgroup_id) {
|
||||
interfaces.push_back(ctx.workgroup_id);
|
||||
}
|
||||
if (info.uses_local_invocation_id) {
|
||||
interfaces.push_back(ctx.local_invocation_id);
|
||||
}
|
||||
const std::span interfaces_span(interfaces.data(), interfaces.size());
|
||||
ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span);
|
||||
|
||||
const std::array<u32, 3> workgroup_size{env.WorkgroupSize()};
|
||||
ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1],
|
||||
workgroup_size[2]);
|
||||
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
|
||||
spv::ExecutionModel execution_model{};
|
||||
switch (env.ShaderStage()) {
|
||||
case Shader::Stage::Compute: {
|
||||
const std::array<u32, 3> workgroup_size{env.WorkgroupSize()};
|
||||
execution_model = spv::ExecutionModel::GLCompute;
|
||||
ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0],
|
||||
workgroup_size[1], workgroup_size[2]);
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::VertexB:
|
||||
execution_model = spv::ExecutionModel::Vertex;
|
||||
break;
|
||||
case Shader::Stage::Fragment:
|
||||
execution_model = spv::ExecutionModel::Fragment;
|
||||
ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Stage {}", env.ShaderStage());
|
||||
}
|
||||
ctx.AddEntryPoint(execution_model, func, "main", interfaces);
|
||||
|
||||
SetupDenormControl(profile, program, ctx, func);
|
||||
const Info& info{program.info};
|
||||
if (info.uses_sampled_1d) {
|
||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||
}
|
||||
if (info.uses_sparse_residency) {
|
||||
ctx.AddCapability(spv::Capability::SparseResidency);
|
||||
}
|
||||
if (info.uses_demote_to_helper_invocation) {
|
||||
ctx.AddExtension("SPV_EXT_demote_to_helper_invocation");
|
||||
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
|
||||
}
|
||||
// TODO: Track this usage
|
||||
ctx.AddCapability(spv::Capability::ImageGatherExtended);
|
||||
|
||||
|
|
|
@ -16,18 +16,18 @@
|
|||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env,
|
||||
IR::Program& program);
|
||||
IR::Program& program, u32& binding);
|
||||
|
||||
// Microinstruction emitters
|
||||
Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
|
||||
void EmitVoid(EmitContext& ctx);
|
||||
Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
|
||||
void EmitBranch(EmitContext& ctx, IR::Block* label);
|
||||
void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label,
|
||||
IR::Block* false_label);
|
||||
void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label);
|
||||
void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label);
|
||||
void EmitBranch(EmitContext& ctx, Id label);
|
||||
void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label);
|
||||
void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label);
|
||||
void EmitSelectionMerge(EmitContext& ctx, Id merge_label);
|
||||
void EmitReturn(EmitContext& ctx);
|
||||
void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label);
|
||||
void EmitGetRegister(EmitContext& ctx);
|
||||
void EmitSetRegister(EmitContext& ctx);
|
||||
void EmitGetPred(EmitContext& ctx);
|
||||
|
@ -41,10 +41,12 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
|
|||
Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
|
||||
Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
|
||||
Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
|
||||
void EmitGetAttribute(EmitContext& ctx);
|
||||
void EmitSetAttribute(EmitContext& ctx);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value);
|
||||
void EmitGetAttributeIndexed(EmitContext& ctx);
|
||||
void EmitSetAttributeIndexed(EmitContext& ctx);
|
||||
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
|
||||
void EmitSetFragDepth(EmitContext& ctx, Id value);
|
||||
void EmitGetZFlag(EmitContext& ctx);
|
||||
void EmitGetSFlag(EmitContext& ctx);
|
||||
void EmitGetCFlag(EmitContext& ctx);
|
||||
|
|
|
@ -5,6 +5,43 @@
|
|||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
|
||||
const u32 element{static_cast<u32>(attr) % 4};
|
||||
const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};
|
||||
if (IR::IsGeneric(attr)) {
|
||||
const u32 index{IR::GenericAttributeIndex(attr)};
|
||||
return ctx.OpAccessChain(ctx.input_f32, ctx.input_generics.at(index), element_id());
|
||||
}
|
||||
switch (attr) {
|
||||
case IR::Attribute::PositionX:
|
||||
case IR::Attribute::PositionY:
|
||||
case IR::Attribute::PositionZ:
|
||||
case IR::Attribute::PositionW:
|
||||
return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id());
|
||||
default:
|
||||
throw NotImplementedException("Read attribute {}", attr);
|
||||
}
|
||||
}
|
||||
|
||||
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
|
||||
const u32 element{static_cast<u32>(attr) % 4};
|
||||
const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};
|
||||
if (IR::IsGeneric(attr)) {
|
||||
const u32 index{IR::GenericAttributeIndex(attr)};
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id());
|
||||
}
|
||||
switch (attr) {
|
||||
case IR::Attribute::PositionX:
|
||||
case IR::Attribute::PositionY:
|
||||
case IR::Attribute::PositionZ:
|
||||
case IR::Attribute::PositionW:
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id());
|
||||
default:
|
||||
throw NotImplementedException("Read attribute {}", attr);
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void EmitGetRegister(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
|
@ -87,12 +124,12 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
|
|||
return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset);
|
||||
}
|
||||
|
||||
void EmitGetAttribute(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {
|
||||
return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr));
|
||||
}
|
||||
|
||||
void EmitSetAttribute(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) {
|
||||
ctx.OpStore(OutputAttrPointer(ctx, attr), value);
|
||||
}
|
||||
|
||||
void EmitGetAttributeIndexed(EmitContext&) {
|
||||
|
@ -103,6 +140,16 @@ void EmitSetAttributeIndexed(EmitContext&) {
|
|||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
|
||||
const Id component_id{ctx.Constant(ctx.U32[1], component)};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
void EmitSetFragDepth(EmitContext& ctx, Id value) {
|
||||
ctx.OpStore(ctx.frag_depth, value);
|
||||
}
|
||||
|
||||
void EmitGetZFlag(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
|
|
@ -6,26 +6,29 @@
|
|||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
void EmitBranch(EmitContext& ctx, IR::Block* label) {
|
||||
ctx.OpBranch(label->Definition<Id>());
|
||||
void EmitBranch(EmitContext& ctx, Id label) {
|
||||
ctx.OpBranch(label);
|
||||
}
|
||||
|
||||
void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label,
|
||||
IR::Block* false_label) {
|
||||
ctx.OpBranchConditional(condition, true_label->Definition<Id>(), false_label->Definition<Id>());
|
||||
void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label) {
|
||||
ctx.OpBranchConditional(condition, true_label, false_label);
|
||||
}
|
||||
|
||||
void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) {
|
||||
ctx.OpLoopMerge(merge_label->Definition<Id>(), continue_label->Definition<Id>(),
|
||||
spv::LoopControlMask::MaskNone);
|
||||
void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label) {
|
||||
ctx.OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone);
|
||||
}
|
||||
|
||||
void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) {
|
||||
ctx.OpSelectionMerge(merge_label->Definition<Id>(), spv::SelectionControlMask::MaskNone);
|
||||
void EmitSelectionMerge(EmitContext& ctx, Id merge_label) {
|
||||
ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
||||
}
|
||||
|
||||
void EmitReturn(EmitContext& ctx) {
|
||||
ctx.OpReturn();
|
||||
}
|
||||
|
||||
void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) {
|
||||
ctx.OpDemoteToHelperInvocationEXT();
|
||||
ctx.OpBranch(continue_label);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
#include <array>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/stage.h"
|
||||
#include "shader_recompiler/program_header.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
|
@ -15,6 +17,18 @@ public:
|
|||
[[nodiscard]] virtual u32 TextureBoundBuffer() = 0;
|
||||
|
||||
[[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0;
|
||||
|
||||
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
|
||||
return sph;
|
||||
}
|
||||
|
||||
[[nodiscard]] Stage ShaderStage() const noexcept {
|
||||
return stage;
|
||||
}
|
||||
|
||||
protected:
|
||||
ProgramHeader sph{};
|
||||
Stage stage{};
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -13,7 +13,7 @@ bool IsGeneric(Attribute attribute) noexcept {
|
|||
return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X;
|
||||
}
|
||||
|
||||
int GenericAttributeIndex(Attribute attribute) {
|
||||
u32 GenericAttributeIndex(Attribute attribute) {
|
||||
if (!IsGeneric(attribute)) {
|
||||
throw InvalidArgument("Attribute is not generic {}", attribute);
|
||||
}
|
||||
|
|
|
@ -224,7 +224,7 @@ enum class Attribute : u64 {
|
|||
|
||||
[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept;
|
||||
|
||||
[[nodiscard]] int GenericAttributeIndex(Attribute attribute);
|
||||
[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute);
|
||||
|
||||
[[nodiscard]] std::string NameOf(Attribute attribute);
|
||||
|
||||
|
|
|
@ -82,6 +82,12 @@ void IREmitter::Return() {
|
|||
Inst(Opcode::Return);
|
||||
}
|
||||
|
||||
void IREmitter::DemoteToHelperInvocation(Block* continue_label) {
|
||||
block->SetBranch(continue_label);
|
||||
continue_label->AddImmediatePredecessor(block);
|
||||
Inst(Opcode::DemoteToHelperInvocation, continue_label);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetReg(IR::Reg reg) {
|
||||
return Inst<U32>(Opcode::GetRegister, reg);
|
||||
}
|
||||
|
@ -248,6 +254,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) {
|
|||
Inst(Opcode::SetAttribute, attribute, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
|
||||
Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
|
||||
}
|
||||
|
||||
void IREmitter::SetFragDepth(const F32& value) {
|
||||
Inst(Opcode::SetFragDepth, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::WorkgroupIdX() {
|
||||
return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)};
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ public:
|
|||
void LoopMerge(Block* merge_block, Block* continue_target);
|
||||
void SelectionMerge(Block* merge_block);
|
||||
void Return();
|
||||
void DemoteToHelperInvocation(Block* continue_label);
|
||||
|
||||
[[nodiscard]] U32 GetReg(IR::Reg reg);
|
||||
void SetReg(IR::Reg reg, const U32& value);
|
||||
|
@ -67,6 +68,9 @@ public:
|
|||
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
|
||||
void SetAttribute(IR::Attribute attribute, const F32& value);
|
||||
|
||||
void SetFragColor(u32 index, u32 component, const F32& value);
|
||||
void SetFragDepth(const F32& value);
|
||||
|
||||
[[nodiscard]] U32 WorkgroupIdX();
|
||||
[[nodiscard]] U32 WorkgroupIdY();
|
||||
[[nodiscard]] U32 WorkgroupIdZ();
|
||||
|
|
|
@ -55,8 +55,11 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::LoopMerge:
|
||||
case Opcode::SelectionMerge:
|
||||
case Opcode::Return:
|
||||
case Opcode::DemoteToHelperInvocation:
|
||||
case Opcode::SetAttribute:
|
||||
case Opcode::SetAttributeIndexed:
|
||||
case Opcode::SetFragColor:
|
||||
case Opcode::SetFragDepth:
|
||||
case Opcode::WriteGlobalU8:
|
||||
case Opcode::WriteGlobalS8:
|
||||
case Opcode::WriteGlobalU16:
|
||||
|
|
|
@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1,
|
|||
OPCODE(LoopMerge, Void, Label, Label, )
|
||||
OPCODE(SelectionMerge, Void, Label, )
|
||||
OPCODE(Return, Void, )
|
||||
OPCODE(DemoteToHelperInvocation, Void, Label, )
|
||||
|
||||
// Context getters/setters
|
||||
OPCODE(GetRegister, U32, Reg, )
|
||||
|
@ -28,10 +29,12 @@ OPCODE(GetCbufS16, U32, U32,
|
|||
OPCODE(GetCbufU32, U32, U32, U32, )
|
||||
OPCODE(GetCbufF32, F32, U32, U32, )
|
||||
OPCODE(GetCbufU64, U64, U32, U32, )
|
||||
OPCODE(GetAttribute, U32, Attribute, )
|
||||
OPCODE(SetAttribute, Void, Attribute, U32, )
|
||||
OPCODE(GetAttributeIndexed, U32, U32, )
|
||||
OPCODE(SetAttributeIndexed, Void, U32, U32, )
|
||||
OPCODE(GetAttribute, F32, Attribute, )
|
||||
OPCODE(SetAttribute, Void, Attribute, F32, )
|
||||
OPCODE(GetAttributeIndexed, F32, U32, )
|
||||
OPCODE(SetAttributeIndexed, Void, U32, F32, )
|
||||
OPCODE(SetFragColor, Void, U32, U32, F32, )
|
||||
OPCODE(SetFragDepth, Void, F32, )
|
||||
OPCODE(GetZFlag, U1, Void, )
|
||||
OPCODE(GetSFlag, U1, Void, )
|
||||
OPCODE(GetCFlag, U1, Void, )
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "shader_recompiler/stage.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
@ -17,6 +18,7 @@ struct Program {
|
|||
BlockList blocks;
|
||||
BlockList post_order_blocks;
|
||||
Info info;
|
||||
Stage stage{};
|
||||
};
|
||||
|
||||
[[nodiscard]] std::string DumpProgram(const Program& program);
|
||||
|
|
|
@ -293,12 +293,12 @@ constexpr size_t NUM_REGS = 256;
|
|||
return reg + (-num);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Reg operator++(Reg& reg) {
|
||||
constexpr Reg operator++(Reg& reg) {
|
||||
reg = reg + 1;
|
||||
return reg;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Reg operator++(Reg& reg, int) {
|
||||
constexpr Reg operator++(Reg& reg, int) {
|
||||
const Reg copy{reg};
|
||||
reg = reg + 1;
|
||||
return copy;
|
||||
|
|
|
@ -104,6 +104,7 @@ bool HasFlowTest(Opcode opcode) {
|
|||
case Opcode::EXIT:
|
||||
case Opcode::JMP:
|
||||
case Opcode::JMX:
|
||||
case Opcode::KIL:
|
||||
case Opcode::BRK:
|
||||
case Opcode::CONT:
|
||||
case Opcode::LONGJMP:
|
||||
|
@ -287,6 +288,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
|
|||
block->end = pc;
|
||||
return AnalysisState::Branch;
|
||||
}
|
||||
case Opcode::KIL: {
|
||||
const Predicate pred{inst.Pred()};
|
||||
const auto ir_pred{static_cast<IR::Pred>(pred.index)};
|
||||
const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
|
||||
AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
|
||||
return AnalysisState::Branch;
|
||||
}
|
||||
case Opcode::PBK:
|
||||
case Opcode::PCNT:
|
||||
case Opcode::PEXIT:
|
||||
|
@ -324,13 +332,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
|
|||
return AnalysisState::Continue;
|
||||
}
|
||||
const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
|
||||
AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true);
|
||||
AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
|
||||
return AnalysisState::Branch;
|
||||
}
|
||||
|
||||
void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
|
||||
EndClass insn_end_class, IR::Condition cond,
|
||||
bool visit_conditional_inst) {
|
||||
EndClass insn_end_class, IR::Condition cond) {
|
||||
if (block->begin != pc) {
|
||||
// If the block doesn't start in the conditional instruction
|
||||
// mark it as a label to visit it later
|
||||
|
@ -356,14 +363,16 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
|
|||
// Impersonate the visited block with a virtual block
|
||||
*block = std::move(virtual_block);
|
||||
// Set the end properties of the conditional instruction
|
||||
conditional_block->end = visit_conditional_inst ? (pc + 1) : pc;
|
||||
conditional_block->end = pc + 1;
|
||||
conditional_block->end_class = insn_end_class;
|
||||
// Add a label to the instruction after the conditional instruction
|
||||
Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
|
||||
// Branch to the next instruction from the virtual block
|
||||
block->branch_false = endif_block;
|
||||
// And branch to it from the conditional instruction if it is a branch
|
||||
if (insn_end_class == EndClass::Branch) {
|
||||
// And branch to it from the conditional instruction if it is a branch or a kill instruction
|
||||
// Kill instructions are considered a branch because they demote to a helper invocation and
|
||||
// execution may continue.
|
||||
if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
|
||||
conditional_block->cond = IR::Condition{true};
|
||||
conditional_block->branch_true = endif_block;
|
||||
conditional_block->branch_false = nullptr;
|
||||
|
@ -415,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati
|
|||
throw NotImplementedException("Conditional EXIT with PEXIT token");
|
||||
}
|
||||
const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
|
||||
AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false);
|
||||
AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
|
||||
return AnalysisState::Branch;
|
||||
}
|
||||
if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
|
||||
|
@ -425,7 +434,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati
|
|||
block->branch_false = nullptr;
|
||||
return AnalysisState::Branch;
|
||||
}
|
||||
block->end = pc;
|
||||
block->end = pc + 1;
|
||||
block->end_class = EndClass::Exit;
|
||||
return AnalysisState::Branch;
|
||||
}
|
||||
|
@ -505,6 +514,12 @@ std::string CFG::Dot() const {
|
|||
node_uid);
|
||||
++node_uid;
|
||||
break;
|
||||
case EndClass::Kill:
|
||||
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
|
||||
dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
|
||||
node_uid);
|
||||
++node_uid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (function.entrypoint == 8) {
|
||||
|
|
|
@ -29,6 +29,7 @@ enum class EndClass {
|
|||
Call,
|
||||
Exit,
|
||||
Return,
|
||||
Kill,
|
||||
};
|
||||
|
||||
enum class Token {
|
||||
|
@ -130,7 +131,7 @@ private:
|
|||
AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
|
||||
|
||||
void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
|
||||
IR::Condition cond, bool visit_conditional_inst);
|
||||
IR::Condition cond);
|
||||
|
||||
/// Return true when the branch instruction is confirmed to be a branch
|
||||
bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
|
||||
|
|
|
@ -32,6 +32,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
IR::Program program;
|
||||
program.blocks = VisitAST(inst_pool, block_pool, env, cfg);
|
||||
program.post_order_blocks = PostOrder(program.blocks);
|
||||
program.stage = env.ShaderStage();
|
||||
RemoveUnreachableBlocks(program);
|
||||
|
||||
// Replace instructions before the SSA rewrite
|
||||
|
|
|
@ -45,6 +45,7 @@ enum class StatementType {
|
|||
Loop,
|
||||
Break,
|
||||
Return,
|
||||
Kill,
|
||||
Function,
|
||||
Identity,
|
||||
Not,
|
||||
|
@ -70,6 +71,7 @@ struct If {};
|
|||
struct Loop {};
|
||||
struct Break {};
|
||||
struct Return {};
|
||||
struct Kill {};
|
||||
struct FunctionTag {};
|
||||
struct Identity {};
|
||||
struct Not {};
|
||||
|
@ -93,6 +95,7 @@ struct Statement : ListBaseHook {
|
|||
Statement(Break, Statement* cond_, Statement* up_)
|
||||
: cond{cond_}, up{up_}, type{StatementType::Break} {}
|
||||
Statement(Return) : type{StatementType::Return} {}
|
||||
Statement(Kill) : type{StatementType::Kill} {}
|
||||
Statement(FunctionTag) : children{}, type{StatementType::Function} {}
|
||||
Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {}
|
||||
Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {}
|
||||
|
@ -174,6 +177,9 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) {
|
|||
case StatementType::Return:
|
||||
ret += fmt::format("{} return;\n", indent);
|
||||
break;
|
||||
case StatementType::Kill:
|
||||
ret += fmt::format("{} kill;\n", indent);
|
||||
break;
|
||||
case StatementType::SetVariable:
|
||||
ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
|
||||
break;
|
||||
|
@ -424,6 +430,9 @@ private:
|
|||
gotos.push_back(root.insert(ip, *goto_stmt));
|
||||
break;
|
||||
}
|
||||
case Flow::EndClass::Kill:
|
||||
root.insert(ip, *pool.Create(Kill{}));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -729,6 +738,15 @@ private:
|
|||
current_block = nullptr;
|
||||
break;
|
||||
}
|
||||
case StatementType::Kill: {
|
||||
if (!current_block) {
|
||||
current_block = block_pool.Create(inst_pool);
|
||||
block_list.push_back(current_block);
|
||||
}
|
||||
IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block);
|
||||
current_block = nullptr;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Statement type {}", stmt.type);
|
||||
}
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
|
||||
void TranslatorVisitor::EXIT(u64) {
|
||||
ir.Exit();
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
|
@ -0,0 +1,43 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
void ExitFragment(TranslatorVisitor& v) {
|
||||
const ProgramHeader sph{v.env.SPH()};
|
||||
IR::Reg src_reg{IR::Reg::R0};
|
||||
for (u32 render_target = 0; render_target < 8; ++render_target) {
|
||||
const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (!mask[component]) {
|
||||
continue;
|
||||
}
|
||||
v.ir.SetFragColor(render_target, component, v.F(src_reg));
|
||||
++src_reg;
|
||||
}
|
||||
}
|
||||
if (sph.ps.omap.sample_mask != 0) {
|
||||
throw NotImplementedException("Sample mask");
|
||||
}
|
||||
if (sph.ps.omap.depth != 0) {
|
||||
throw NotImplementedException("Fragment depth");
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::EXIT() {
|
||||
switch (env.ShaderStage()) {
|
||||
case Stage::Fragment:
|
||||
ExitFragment(*this);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
|
@ -108,7 +108,7 @@ public:
|
|||
void DSETP_reg(u64 insn);
|
||||
void DSETP_cbuf(u64 insn);
|
||||
void DSETP_imm(u64 insn);
|
||||
void EXIT(u64 insn);
|
||||
void EXIT();
|
||||
void F2F_reg(u64 insn);
|
||||
void F2F_cbuf(u64 insn);
|
||||
void F2F_imm(u64 insn);
|
||||
|
@ -220,7 +220,7 @@ public:
|
|||
void JCAL(u64 insn);
|
||||
void JMP(u64 insn);
|
||||
void JMX(u64 insn);
|
||||
void KIL(u64 insn);
|
||||
void KIL();
|
||||
void LD(u64 insn);
|
||||
void LDC(u64 insn);
|
||||
void LDG(u64 insn);
|
||||
|
|
|
@ -11,6 +11,13 @@
|
|||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Size : u64 {
|
||||
B32,
|
||||
B64,
|
||||
B96,
|
||||
B128,
|
||||
};
|
||||
|
||||
enum class InterpolationMode : u64 {
|
||||
Pass,
|
||||
Multiply,
|
||||
|
@ -23,8 +30,85 @@ enum class SampleMode : u64 {
|
|||
Centroid,
|
||||
Offset,
|
||||
};
|
||||
|
||||
int NumElements(Size size) {
|
||||
switch (size) {
|
||||
case Size::B32:
|
||||
return 1;
|
||||
case Size::B64:
|
||||
return 2;
|
||||
case Size::B96:
|
||||
return 3;
|
||||
case Size::B128:
|
||||
return 4;
|
||||
}
|
||||
throw InvalidArgument("Invalid size {}", size);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ALD(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> index_reg;
|
||||
BitField<20, 10, u64> absolute_offset;
|
||||
BitField<20, 11, s64> relative_offset;
|
||||
BitField<39, 8, IR::Reg> stream_reg;
|
||||
BitField<32, 1, u64> o;
|
||||
BitField<31, 1, u64> patch;
|
||||
BitField<47, 2, Size> size;
|
||||
} const ald{insn};
|
||||
|
||||
if (ald.o != 0) {
|
||||
throw NotImplementedException("O");
|
||||
}
|
||||
if (ald.patch != 0) {
|
||||
throw NotImplementedException("P");
|
||||
}
|
||||
if (ald.index_reg != IR::Reg::RZ) {
|
||||
throw NotImplementedException("Indexed");
|
||||
}
|
||||
const u64 offset{ald.absolute_offset.Value()};
|
||||
if (offset % 4 != 0) {
|
||||
throw NotImplementedException("Unaligned absolute offset {}", offset);
|
||||
}
|
||||
const int num_elements{NumElements(ald.size)};
|
||||
for (int element = 0; element < num_elements; ++element) {
|
||||
F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element}));
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::AST(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> src_reg;
|
||||
BitField<8, 8, IR::Reg> index_reg;
|
||||
BitField<20, 10, u64> absolute_offset;
|
||||
BitField<20, 11, s64> relative_offset;
|
||||
BitField<31, 1, u64> patch;
|
||||
BitField<39, 8, IR::Reg> stream_reg;
|
||||
BitField<47, 2, Size> size;
|
||||
} const ast{insn};
|
||||
|
||||
if (ast.patch != 0) {
|
||||
throw NotImplementedException("P");
|
||||
}
|
||||
if (ast.stream_reg != IR::Reg::RZ) {
|
||||
throw NotImplementedException("Stream store");
|
||||
}
|
||||
if (ast.index_reg != IR::Reg::RZ) {
|
||||
throw NotImplementedException("Indexed store");
|
||||
}
|
||||
const u64 offset{ast.absolute_offset.Value()};
|
||||
if (offset % 4 != 0) {
|
||||
throw NotImplementedException("Unaligned absolute offset {}", offset);
|
||||
}
|
||||
const int num_elements{NumElements(ast.size)};
|
||||
for (int element = 0; element < num_elements; ++element) {
|
||||
ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element));
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::IPA(u64 insn) {
|
||||
// IPA is the instruction used to read varyings from a fragment shader.
|
||||
// gl_FragCoord is mapped to the gl_Position attribute.
|
||||
|
@ -51,7 +135,7 @@ void TranslatorVisitor::IPA(u64 insn) {
|
|||
// }
|
||||
const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
|
||||
if (is_indexed) {
|
||||
throw NotImplementedException("IPA.IDX");
|
||||
throw NotImplementedException("IDX");
|
||||
}
|
||||
|
||||
const IR::Attribute attribute{ipa.attribute};
|
||||
|
|
|
@ -17,14 +17,6 @@ void TranslatorVisitor::AL2P(u64) {
|
|||
ThrowNotImplemented(Opcode::AL2P);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ALD(u64) {
|
||||
ThrowNotImplemented(Opcode::ALD);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::AST(u64) {
|
||||
ThrowNotImplemented(Opcode::AST);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ATOM_cas(u64) {
|
||||
ThrowNotImplemented(Opcode::ATOM_cas);
|
||||
}
|
||||
|
@ -153,10 +145,6 @@ void TranslatorVisitor::DSETP_imm(u64) {
|
|||
ThrowNotImplemented(Opcode::DSETP_imm);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::EXIT(u64) {
|
||||
throw LogicError("Visting EXIT instruction");
|
||||
}
|
||||
|
||||
void TranslatorVisitor::F2F_reg(u64) {
|
||||
ThrowNotImplemented(Opcode::F2F_reg);
|
||||
}
|
||||
|
@ -345,8 +333,8 @@ void TranslatorVisitor::JMX(u64) {
|
|||
ThrowNotImplemented(Opcode::JMX);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::KIL(u64) {
|
||||
ThrowNotImplemented(Opcode::KIL);
|
||||
void TranslatorVisitor::KIL() {
|
||||
// KIL is a no-op
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LD(u64) {
|
||||
|
|
|
@ -215,7 +215,7 @@ void TranslatorVisitor::TEX(u64 insn) {
|
|||
BitField<36, 13, u64> cbuf_offset;
|
||||
} const tex{insn};
|
||||
|
||||
Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset));
|
||||
Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TEX_b(u64 insn) {
|
||||
|
|
|
@ -70,7 +70,7 @@ IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
|
|||
|
||||
IR::Value Sample(TranslatorVisitor& v, u64 insn) {
|
||||
const Encoding texs{insn};
|
||||
const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset))};
|
||||
const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
|
||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||
const IR::Reg reg_a{texs.src_reg_a};
|
||||
const IR::Reg reg_b{texs.src_reg_b};
|
||||
|
|
|
@ -17,10 +17,47 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
|
|||
return;
|
||||
}
|
||||
info.constant_buffer_mask |= 1U << index;
|
||||
info.constant_buffer_descriptors.push_back({
|
||||
.index{index},
|
||||
.count{1},
|
||||
});
|
||||
|
||||
auto& cbufs{info.constant_buffer_descriptors};
|
||||
cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
|
||||
ConstantBufferDescriptor{
|
||||
.index{index},
|
||||
.count{1},
|
||||
});
|
||||
}
|
||||
|
||||
void GetAttribute(Info& info, IR::Attribute attribute) {
|
||||
if (IR::IsGeneric(attribute)) {
|
||||
info.loads_generics.at(IR::GenericAttributeIndex(attribute)) = true;
|
||||
return;
|
||||
}
|
||||
switch (attribute) {
|
||||
case IR::Attribute::PositionX:
|
||||
case IR::Attribute::PositionY:
|
||||
case IR::Attribute::PositionZ:
|
||||
case IR::Attribute::PositionW:
|
||||
info.loads_position = true;
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Get attribute {}", attribute);
|
||||
}
|
||||
}
|
||||
|
||||
void SetAttribute(Info& info, IR::Attribute attribute) {
|
||||
if (IR::IsGeneric(attribute)) {
|
||||
info.stores_generics.at(IR::GenericAttributeIndex(attribute)) = true;
|
||||
return;
|
||||
}
|
||||
switch (attribute) {
|
||||
case IR::Attribute::PositionX:
|
||||
case IR::Attribute::PositionY:
|
||||
case IR::Attribute::PositionZ:
|
||||
case IR::Attribute::PositionW:
|
||||
info.stores_position = true;
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Set attribute {}", attribute);
|
||||
}
|
||||
}
|
||||
|
||||
void VisitUsages(Info& info, IR::Inst& inst) {
|
||||
|
@ -162,6 +199,21 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
|||
break;
|
||||
}
|
||||
switch (inst.Opcode()) {
|
||||
case IR::Opcode::DemoteToHelperInvocation:
|
||||
info.uses_demote_to_helper_invocation = true;
|
||||
break;
|
||||
case IR::Opcode::GetAttribute:
|
||||
GetAttribute(info, inst.Arg(0).Attribute());
|
||||
break;
|
||||
case IR::Opcode::SetAttribute:
|
||||
SetAttribute(info, inst.Arg(0).Attribute());
|
||||
break;
|
||||
case IR::Opcode::SetFragColor:
|
||||
info.stores_frag_color[inst.Arg(0).U32()] = true;
|
||||
break;
|
||||
case IR::Opcode::SetFragDepth:
|
||||
info.stores_frag_depth = true;
|
||||
break;
|
||||
case IR::Opcode::WorkgroupId:
|
||||
info.uses_workgroup_id = true;
|
||||
break;
|
||||
|
|
|
@ -169,7 +169,7 @@ private:
|
|||
const size_t num_args{phi.NumArgs()};
|
||||
for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
|
||||
const IR::Value& op{phi.Arg(arg_index)};
|
||||
if (op == same || op == IR::Value{&phi}) {
|
||||
if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
|
||||
// Unique value or self-reference
|
||||
continue;
|
||||
}
|
||||
|
|
143
src/shader_recompiler/program_header.h
Normal file
143
src/shader_recompiler/program_header.h
Normal file
|
@ -0,0 +1,143 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
enum class OutputTopology : u32 {
|
||||
PointList = 1,
|
||||
LineStrip = 6,
|
||||
TriangleStrip = 7,
|
||||
};
|
||||
|
||||
enum class PixelImap : u8 {
|
||||
Unused = 0,
|
||||
Constant = 1,
|
||||
Perspective = 2,
|
||||
ScreenLinear = 3,
|
||||
};
|
||||
|
||||
// Documentation in:
|
||||
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
|
||||
struct ProgramHeader {
|
||||
union {
|
||||
BitField<0, 5, u32> sph_type;
|
||||
BitField<5, 5, u32> version;
|
||||
BitField<10, 4, u32> shader_type;
|
||||
BitField<14, 1, u32> mrt_enable;
|
||||
BitField<15, 1, u32> kills_pixels;
|
||||
BitField<16, 1, u32> does_global_store;
|
||||
BitField<17, 4, u32> sass_version;
|
||||
BitField<21, 5, u32> reserved;
|
||||
BitField<26, 1, u32> does_load_or_store;
|
||||
BitField<27, 1, u32> does_fp64;
|
||||
BitField<28, 4, u32> stream_out_mask;
|
||||
} common0;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_low_size;
|
||||
BitField<24, 8, u32> per_patch_attribute_count;
|
||||
} common1;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_high_size;
|
||||
BitField<24, 8, u32> threads_per_input_primitive;
|
||||
} common2;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_crs_size;
|
||||
BitField<24, 4, OutputTopology> output_topology;
|
||||
BitField<28, 4, u32> reserved;
|
||||
} common3;
|
||||
|
||||
union {
|
||||
BitField<0, 12, u32> max_output_vertices;
|
||||
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
|
||||
BitField<20, 4, u32> reserved;
|
||||
BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
|
||||
} common4;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
|
||||
INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
|
||||
union {
|
||||
BitField<0, 8, u16> clip_distances;
|
||||
BitField<8, 1, u16> point_sprite_s;
|
||||
BitField<9, 1, u16> point_sprite_t;
|
||||
BitField<10, 1, u16> fog_coordinate;
|
||||
BitField<12, 1, u16> tessellation_eval_point_u;
|
||||
BitField<13, 1, u16> tessellation_eval_point_v;
|
||||
BitField<14, 1, u16> instance_id;
|
||||
BitField<15, 1, u16> vertex_id;
|
||||
};
|
||||
INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
|
||||
INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
|
||||
INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC
|
||||
INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
|
||||
} vtg;
|
||||
|
||||
struct {
|
||||
INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
|
||||
|
||||
union {
|
||||
BitField<0, 2, PixelImap> x;
|
||||
BitField<2, 2, PixelImap> y;
|
||||
BitField<4, 2, PixelImap> z;
|
||||
BitField<6, 2, PixelImap> w;
|
||||
u8 raw;
|
||||
} imap_generic_vector[32];
|
||||
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
|
||||
INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
|
||||
|
||||
struct {
|
||||
u32 target;
|
||||
union {
|
||||
BitField<0, 1, u32> sample_mask;
|
||||
BitField<1, 1, u32> depth;
|
||||
BitField<2, 30, u32> reserved;
|
||||
};
|
||||
} omap;
|
||||
|
||||
[[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept {
|
||||
const u32 bits{omap.target >> (rt * 4)};
|
||||
return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0};
|
||||
}
|
||||
|
||||
[[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const {
|
||||
const auto& vector{imap_generic_vector[attribute]};
|
||||
return {vector.x, vector.y, vector.z, vector.w};
|
||||
}
|
||||
} ps;
|
||||
|
||||
std::array<u32, 0xf> raw;
|
||||
};
|
||||
|
||||
[[nodiscard]] u64 LocalMemorySize() const noexcept {
|
||||
return (common1.shader_local_memory_low_size |
|
||||
(common2.shader_local_memory_high_size << 24));
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size");
|
||||
|
||||
} // namespace Shader
|
|
@ -1,28 +0,0 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/frontend/maxwell/control_flow.h"
|
||||
#include "shader_recompiler/frontend/maxwell/program.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, Environment& env,
|
||||
u32 start_address) {
|
||||
ObjectPool<Maxwell::Flow::Block> flow_block_pool;
|
||||
ObjectPool<IR::Inst> inst_pool;
|
||||
ObjectPool<IR::Block> block_pool;
|
||||
|
||||
Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address};
|
||||
IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)};
|
||||
return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)};
|
||||
}
|
||||
|
||||
} // namespace Shader
|
|
@ -1,20 +0,0 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
[[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile,
|
||||
Environment& env, u32 start_address);
|
||||
|
||||
} // namespace Shader
|
|
@ -56,6 +56,15 @@ struct Info {
|
|||
|
||||
bool uses_workgroup_id{};
|
||||
bool uses_local_invocation_id{};
|
||||
|
||||
std::array<bool, 32> loads_generics{};
|
||||
bool loads_position{};
|
||||
|
||||
std::array<bool, 8> stores_frag_color{};
|
||||
bool stores_frag_depth{};
|
||||
std::array<bool, 32> stores_generics{};
|
||||
bool stores_position{};
|
||||
|
||||
bool uses_fp16{};
|
||||
bool uses_fp64{};
|
||||
bool uses_fp16_denorms_flush{};
|
||||
|
@ -68,6 +77,7 @@ struct Info {
|
|||
bool uses_image_1d{};
|
||||
bool uses_sampled_1d{};
|
||||
bool uses_sparse_residency{};
|
||||
bool uses_demote_to_helper_invocation{};
|
||||
|
||||
IR::Type used_constant_buffer_types{};
|
||||
|
||||
|
|
19
src/shader_recompiler/stage.h
Normal file
19
src/shader_recompiler/stage.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Shader {
|
||||
|
||||
enum class Stage {
|
||||
Compute,
|
||||
VertexA,
|
||||
VertexB,
|
||||
TessellationControl,
|
||||
TessellationEval,
|
||||
Geometry,
|
||||
Fragment,
|
||||
};
|
||||
|
||||
} // namespace Shader
|
|
@ -100,6 +100,7 @@ add_library(video_core STATIC
|
|||
renderer_vulkan/fixed_pipeline_state.h
|
||||
renderer_vulkan/maxwell_to_vk.cpp
|
||||
renderer_vulkan/maxwell_to_vk.h
|
||||
renderer_vulkan/pipeline_helper.h
|
||||
renderer_vulkan/renderer_vulkan.h
|
||||
renderer_vulkan/renderer_vulkan.cpp
|
||||
renderer_vulkan/vk_blit_screen.cpp
|
||||
|
@ -116,15 +117,18 @@ add_library(video_core STATIC
|
|||
renderer_vulkan/vk_descriptor_pool.h
|
||||
renderer_vulkan/vk_fence_manager.cpp
|
||||
renderer_vulkan/vk_fence_manager.h
|
||||
renderer_vulkan/vk_graphics_pipeline.cpp
|
||||
renderer_vulkan/vk_graphics_pipeline.h
|
||||
renderer_vulkan/vk_master_semaphore.cpp
|
||||
renderer_vulkan/vk_master_semaphore.h
|
||||
renderer_vulkan/vk_pipeline_cache.cpp
|
||||
renderer_vulkan/vk_pipeline_cache.h
|
||||
renderer_vulkan/vk_pipeline.h
|
||||
renderer_vulkan/vk_query_cache.cpp
|
||||
renderer_vulkan/vk_query_cache.h
|
||||
renderer_vulkan/vk_rasterizer.cpp
|
||||
renderer_vulkan/vk_rasterizer.h
|
||||
renderer_vulkan/vk_render_pass_cache.cpp
|
||||
renderer_vulkan/vk_render_pass_cache.h
|
||||
renderer_vulkan/vk_resource_pool.cpp
|
||||
renderer_vulkan/vk_resource_pool.h
|
||||
renderer_vulkan/vk_scheduler.cpp
|
||||
|
|
|
@ -72,6 +72,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
|
|||
regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
|
||||
alpha_test_func.Assign(PackComparisonOp(test_func));
|
||||
early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
|
||||
depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
|
||||
depth_format.Assign(static_cast<u32>(regs.zeta.format));
|
||||
std::ranges::transform(regs.rt, color_formats.begin(),
|
||||
[](const auto& rt) { return static_cast<u8>(rt.format); });
|
||||
|
||||
alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
|
||||
point_size = Common::BitCast<u32>(regs.point_size);
|
||||
|
|
|
@ -60,7 +60,7 @@ struct FixedPipelineState {
|
|||
|
||||
void Refresh(const Maxwell& regs, size_t index);
|
||||
|
||||
constexpr std::array<bool, 4> Mask() const noexcept {
|
||||
std::array<bool, 4> Mask() const noexcept {
|
||||
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
|
||||
}
|
||||
|
||||
|
@ -97,11 +97,11 @@ struct FixedPipelineState {
|
|||
BitField<20, 3, u32> type;
|
||||
BitField<23, 6, u32> size;
|
||||
|
||||
constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
|
||||
Maxwell::VertexAttribute::Type Type() const noexcept {
|
||||
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
|
||||
}
|
||||
|
||||
constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
|
||||
Maxwell::VertexAttribute::Size Size() const noexcept {
|
||||
return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
|
||||
}
|
||||
};
|
||||
|
@ -187,7 +187,10 @@ struct FixedPipelineState {
|
|||
u32 raw2;
|
||||
BitField<0, 3, u32> alpha_test_func;
|
||||
BitField<3, 1, u32> early_z;
|
||||
BitField<4, 1, u32> depth_enabled;
|
||||
BitField<5, 5, u32> depth_format;
|
||||
};
|
||||
std::array<u8, Maxwell::NumRenderTargets> color_formats;
|
||||
|
||||
u32 alpha_test_ref;
|
||||
u32 point_size;
|
||||
|
|
|
@ -741,4 +741,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti
|
|||
return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
|
||||
}
|
||||
|
||||
VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
|
||||
switch (msaa_mode) {
|
||||
case Tegra::Texture::MsaaMode::Msaa1x1:
|
||||
return VK_SAMPLE_COUNT_1_BIT;
|
||||
case Tegra::Texture::MsaaMode::Msaa2x1:
|
||||
case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
|
||||
return VK_SAMPLE_COUNT_2_BIT;
|
||||
case Tegra::Texture::MsaaMode::Msaa2x2:
|
||||
case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
|
||||
case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
|
||||
return VK_SAMPLE_COUNT_4_BIT;
|
||||
case Tegra::Texture::MsaaMode::Msaa4x2:
|
||||
case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
|
||||
case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
|
||||
case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
|
||||
return VK_SAMPLE_COUNT_8_BIT;
|
||||
case Tegra::Texture::MsaaMode::Msaa4x4:
|
||||
return VK_SAMPLE_COUNT_16_BIT;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
|
||||
return VK_SAMPLE_COUNT_1_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan::MaxwellToVK
|
||||
|
|
|
@ -71,4 +71,6 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
|
|||
|
||||
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
|
||||
|
||||
VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
|
||||
|
||||
} // namespace Vulkan::MaxwellToVK
|
||||
|
|
162
src/video_core/renderer_vulkan/pipeline_helper.h
Normal file
162
src/video_core/renderer_vulkan/pipeline_helper.h
Normal file
|
@ -0,0 +1,162 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct TextureHandle {
|
||||
explicit TextureHandle(u32 data, bool via_header_index) {
|
||||
[[likely]] if (via_header_index) {
|
||||
image = data;
|
||||
sampler = data;
|
||||
} else {
|
||||
const Tegra::Texture::TextureHandle handle{data};
|
||||
image = handle.tic_id;
|
||||
sampler = via_header_index ? image : handle.tsc_id.Value();
|
||||
}
|
||||
}
|
||||
|
||||
u32 image;
|
||||
u32 sampler;
|
||||
};
|
||||
|
||||
struct DescriptorLayoutTuple {
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
|
||||
};
|
||||
|
||||
class DescriptorLayoutBuilder {
|
||||
public:
|
||||
DescriptorLayoutTuple Create(const vk::Device& device) {
|
||||
DescriptorLayoutTuple result;
|
||||
if (!bindings.empty()) {
|
||||
result.descriptor_set_layout = device.CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
});
|
||||
}
|
||||
result.pipeline_layout = device.CreatePipelineLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.setLayoutCount = result.descriptor_set_layout ? 1U : 0U,
|
||||
.pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(),
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr,
|
||||
});
|
||||
if (!entries.empty()) {
|
||||
result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
|
||||
.pDescriptorUpdateEntries = entries.data(),
|
||||
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
|
||||
.descriptorSetLayout = *result.descriptor_set_layout,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.pipelineLayout = *result.pipeline_layout,
|
||||
.set = 0,
|
||||
});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void Add(const Shader::Info& info, VkShaderStageFlags stage) {
|
||||
for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
|
||||
Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage);
|
||||
}
|
||||
for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
|
||||
Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage);
|
||||
}
|
||||
for ([[maybe_unused]] const auto& desc : info.texture_descriptors) {
|
||||
Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void Add(VkDescriptorType type, VkShaderStageFlags stage) {
|
||||
bindings.push_back({
|
||||
.binding = binding,
|
||||
.descriptorType = type,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = stage,
|
||||
.pImmutableSamplers = nullptr,
|
||||
});
|
||||
entries.push_back(VkDescriptorUpdateTemplateEntryKHR{
|
||||
.dstBinding = binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = type,
|
||||
.offset = offset,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
});
|
||||
++binding;
|
||||
offset += sizeof(DescriptorUpdateEntry);
|
||||
}
|
||||
|
||||
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
|
||||
boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
|
||||
u32 binding{};
|
||||
size_t offset{};
|
||||
};
|
||||
|
||||
inline VideoCommon::ImageViewType CastType(Shader::TextureType type) {
|
||||
switch (type) {
|
||||
case Shader::TextureType::Color1D:
|
||||
case Shader::TextureType::Shadow1D:
|
||||
return VideoCommon::ImageViewType::e1D;
|
||||
case Shader::TextureType::ColorArray1D:
|
||||
case Shader::TextureType::ShadowArray1D:
|
||||
return VideoCommon::ImageViewType::e1DArray;
|
||||
case Shader::TextureType::Color2D:
|
||||
case Shader::TextureType::Shadow2D:
|
||||
return VideoCommon::ImageViewType::e2D;
|
||||
case Shader::TextureType::ColorArray2D:
|
||||
case Shader::TextureType::ShadowArray2D:
|
||||
return VideoCommon::ImageViewType::e2DArray;
|
||||
case Shader::TextureType::Color3D:
|
||||
case Shader::TextureType::Shadow3D:
|
||||
return VideoCommon::ImageViewType::e3D;
|
||||
case Shader::TextureType::ColorCube:
|
||||
case Shader::TextureType::ShadowCube:
|
||||
return VideoCommon::ImageViewType::Cube;
|
||||
case Shader::TextureType::ColorArrayCube:
|
||||
case Shader::TextureType::ShadowArrayCube:
|
||||
return VideoCommon::ImageViewType::CubeArray;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid texture type {}", type);
|
||||
return {};
|
||||
}
|
||||
|
||||
inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers,
|
||||
const ImageId* image_view_ids, TextureCache& texture_cache,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) {
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
const VkSampler sampler{samplers[index]};
|
||||
ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])};
|
||||
const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))};
|
||||
update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "video_core/renderer_vulkan/pipeline_helper.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
|
@ -17,140 +18,10 @@
|
|||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) {
|
||||
boost::container::small_vector<VkDescriptorSetLayoutBinding, 24> bindings;
|
||||
u32 binding{};
|
||||
for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
|
||||
bindings.push_back({
|
||||
.binding = binding,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
});
|
||||
++binding;
|
||||
}
|
||||
for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
|
||||
bindings.push_back({
|
||||
.binding = binding,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
});
|
||||
++binding;
|
||||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
bindings.push_back({
|
||||
.binding = binding,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
});
|
||||
++binding;
|
||||
}
|
||||
return device.GetLogical().CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
});
|
||||
}
|
||||
|
||||
vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
|
||||
const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout,
|
||||
VkPipelineLayout pipeline_layout) {
|
||||
boost::container::small_vector<VkDescriptorUpdateTemplateEntry, 24> entries;
|
||||
size_t offset{};
|
||||
u32 binding{};
|
||||
for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
|
||||
entries.push_back({
|
||||
.dstBinding = binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.offset = offset,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
});
|
||||
++binding;
|
||||
offset += sizeof(DescriptorUpdateEntry);
|
||||
}
|
||||
for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
|
||||
entries.push_back({
|
||||
.dstBinding = binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = offset,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
});
|
||||
++binding;
|
||||
offset += sizeof(DescriptorUpdateEntry);
|
||||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
entries.push_back({
|
||||
.dstBinding = binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.offset = offset,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
});
|
||||
++binding;
|
||||
offset += sizeof(DescriptorUpdateEntry);
|
||||
}
|
||||
return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
|
||||
.pDescriptorUpdateEntries = entries.data(),
|
||||
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET,
|
||||
.descriptorSetLayout = descriptor_set_layout,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
.pipelineLayout = pipeline_layout,
|
||||
.set = 0,
|
||||
});
|
||||
}
|
||||
|
||||
struct TextureHandle {
|
||||
explicit TextureHandle(u32 data, bool via_header_index) {
|
||||
const Tegra::Texture::TextureHandle handle{data};
|
||||
image = handle.tic_id;
|
||||
sampler = via_header_index ? image : handle.tsc_id.Value();
|
||||
}
|
||||
|
||||
u32 image;
|
||||
u32 sampler;
|
||||
};
|
||||
|
||||
VideoCommon::ImageViewType CastType(Shader::TextureType type) {
|
||||
switch (type) {
|
||||
case Shader::TextureType::Color1D:
|
||||
case Shader::TextureType::Shadow1D:
|
||||
return VideoCommon::ImageViewType::e1D;
|
||||
case Shader::TextureType::ColorArray1D:
|
||||
case Shader::TextureType::ShadowArray1D:
|
||||
return VideoCommon::ImageViewType::e1DArray;
|
||||
case Shader::TextureType::Color2D:
|
||||
case Shader::TextureType::Shadow2D:
|
||||
return VideoCommon::ImageViewType::e2D;
|
||||
case Shader::TextureType::ColorArray2D:
|
||||
case Shader::TextureType::ShadowArray2D:
|
||||
return VideoCommon::ImageViewType::e2DArray;
|
||||
case Shader::TextureType::Color3D:
|
||||
case Shader::TextureType::Shadow3D:
|
||||
return VideoCommon::ImageViewType::e3D;
|
||||
case Shader::TextureType::ColorCube:
|
||||
case Shader::TextureType::ShadowCube:
|
||||
return VideoCommon::ImageViewType::Cube;
|
||||
case Shader::TextureType::ColorArrayCube:
|
||||
case Shader::TextureType::ShadowArrayCube:
|
||||
return VideoCommon::ImageViewType::CubeArray;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid texture type {}", type);
|
||||
DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) {
|
||||
DescriptorLayoutBuilder builder;
|
||||
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
return builder.Create(device.GetLogical());
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
|
@ -158,37 +29,31 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip
|
|||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
const Shader::Info& info_, vk::ShaderModule spv_module_)
|
||||
: update_descriptor_queue{&update_descriptor_queue_}, info{info_},
|
||||
spv_module(std::move(spv_module_)),
|
||||
descriptor_set_layout(CreateDescriptorSetLayout(device, info)),
|
||||
descriptor_allocator(descriptor_pool, *descriptor_set_layout),
|
||||
pipeline_layout{device.GetLogical().CreatePipelineLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = descriptor_set_layout.address(),
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr,
|
||||
})},
|
||||
descriptor_update_template{
|
||||
CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)},
|
||||
pipeline{device.GetLogical().CreateComputePipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *spv_module,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
},
|
||||
.layout = *pipeline_layout,
|
||||
.basePipelineHandle = 0,
|
||||
.basePipelineIndex = 0,
|
||||
})} {}
|
||||
spv_module(std::move(spv_module_)) {
|
||||
DescriptorLayoutTuple tuple{CreateLayout(device, info)};
|
||||
descriptor_set_layout = std::move(tuple.descriptor_set_layout);
|
||||
pipeline_layout = std::move(tuple.pipeline_layout);
|
||||
descriptor_update_template = std::move(tuple.descriptor_update_template);
|
||||
descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);
|
||||
|
||||
pipeline = device.GetLogical().CreateComputePipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *spv_module,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
},
|
||||
.layout = *pipeline_layout,
|
||||
.basePipelineHandle = 0,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
}
|
||||
|
||||
void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) {
|
||||
buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask);
|
||||
|
@ -211,7 +76,7 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple
|
|||
static constexpr size_t max_elements = 64;
|
||||
std::array<ImageId, max_elements> image_view_ids;
|
||||
boost::container::static_vector<u32, max_elements> image_view_indices;
|
||||
boost::container::static_vector<VkSampler, max_elements> sampler_handles;
|
||||
boost::container::static_vector<VkSampler, max_elements> samplers;
|
||||
|
||||
const auto& launch_desc{kepler_compute.launch_description};
|
||||
const auto& cbufs{launch_desc.const_buffer_config};
|
||||
|
@ -228,20 +93,14 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple
|
|||
image_view_indices.push_back(handle.image);
|
||||
|
||||
Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
|
||||
sampler_handles.push_back(sampler->Handle());
|
||||
samplers.push_back(sampler->Handle());
|
||||
}
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
|
||||
|
||||
size_t index{};
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
const VkSampler vk_sampler{sampler_handles[index]};
|
||||
ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])};
|
||||
const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))};
|
||||
update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler);
|
||||
++index;
|
||||
}
|
||||
PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache,
|
||||
*update_descriptor_queue, index);
|
||||
}
|
||||
|
||||
VkDescriptorSet ComputePipeline::UpdateDescriptorSet() {
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
@ -18,7 +17,7 @@ namespace Vulkan {
|
|||
|
||||
class Device;
|
||||
|
||||
class ComputePipeline : public Pipeline {
|
||||
class ComputePipeline {
|
||||
public:
|
||||
explicit ComputePipeline() = default;
|
||||
explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
|
||||
|
|
445
src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
Normal file
445
src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
Normal file
|
@ -0,0 +1,445 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <span>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/pipeline_helper.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
using boost::container::small_vector;
|
||||
using boost::container::static_vector;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::PixelFormatFromDepthFormat;
|
||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||
|
||||
DescriptorLayoutTuple CreateLayout(const Device& device, std::span<const Shader::Info> infos) {
|
||||
DescriptorLayoutBuilder builder;
|
||||
for (size_t index = 0; index < infos.size(); ++index) {
|
||||
static constexpr std::array stages{
|
||||
VK_SHADER_STAGE_VERTEX_BIT,
|
||||
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
|
||||
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
|
||||
VK_SHADER_STAGE_GEOMETRY_BIT,
|
||||
VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
};
|
||||
builder.Add(infos[index], stages.at(index));
|
||||
}
|
||||
return builder.Create(device.GetLogical());
|
||||
}
|
||||
|
||||
template <class StencilFace>
|
||||
VkStencilOpState GetStencilFaceState(const StencilFace& face) {
|
||||
return {
|
||||
.failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()),
|
||||
.passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()),
|
||||
.depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()),
|
||||
.compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()),
|
||||
.compareMask = 0,
|
||||
.writeMask = 0,
|
||||
.reference = 0,
|
||||
};
|
||||
}
|
||||
|
||||
bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
|
||||
static constexpr std::array unsupported_topologies{
|
||||
VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
|
||||
VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
|
||||
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
|
||||
VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
|
||||
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
|
||||
VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,
|
||||
// VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT,
|
||||
};
|
||||
return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end();
|
||||
}
|
||||
|
||||
VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
|
||||
union Swizzle {
|
||||
u32 raw;
|
||||
BitField<0, 3, Maxwell::ViewportSwizzle> x;
|
||||
BitField<4, 3, Maxwell::ViewportSwizzle> y;
|
||||
BitField<8, 3, Maxwell::ViewportSwizzle> z;
|
||||
BitField<12, 3, Maxwell::ViewportSwizzle> w;
|
||||
};
|
||||
const Swizzle unpacked{swizzle};
|
||||
return VkViewportSwizzleNV{
|
||||
.x = MaxwellToVK::ViewportSwizzle(unpacked.x),
|
||||
.y = MaxwellToVK::ViewportSwizzle(unpacked.y),
|
||||
.z = MaxwellToVK::ViewportSwizzle(unpacked.z),
|
||||
.w = MaxwellToVK::ViewportSwizzle(unpacked.w),
|
||||
};
|
||||
}
|
||||
|
||||
PixelFormat DecodeFormat(u8 encoded_format) {
|
||||
const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)};
|
||||
if (format == Tegra::RenderTargetFormat::NONE) {
|
||||
return PixelFormat::Invalid;
|
||||
}
|
||||
return PixelFormatFromRenderTargetFormat(format);
|
||||
}
|
||||
|
||||
RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) {
|
||||
RenderPassKey key;
|
||||
std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat);
|
||||
if (state.depth_enabled != 0) {
|
||||
const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())};
|
||||
key.depth_format = PixelFormatFromDepthFormat(depth_format);
|
||||
} else {
|
||||
key.depth_format = PixelFormat::Invalid;
|
||||
}
|
||||
key.samples = MaxwellToVK::MsaaMode(state.msaa_mode);
|
||||
return key;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_,
|
||||
BufferCache& buffer_cache_, TextureCache& texture_cache_,
|
||||
const Device& device, VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
RenderPassCache& render_pass_cache,
|
||||
const FixedPipelineState& state,
|
||||
std::array<vk::ShaderModule, NUM_STAGES> stages,
|
||||
const std::array<const Shader::Info*, NUM_STAGES>& infos)
|
||||
: maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_},
|
||||
buffer_cache{&buffer_cache_}, scheduler{&scheduler_},
|
||||
update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} {
|
||||
std::ranges::transform(infos, stage_infos.begin(),
|
||||
[](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
|
||||
|
||||
DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)};
|
||||
descriptor_set_layout = std::move(tuple.descriptor_set_layout);
|
||||
pipeline_layout = std::move(tuple.pipeline_layout);
|
||||
descriptor_update_template = std::move(tuple.descriptor_update_template);
|
||||
descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);
|
||||
|
||||
const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))};
|
||||
MakePipeline(device, state, render_pass);
|
||||
}
|
||||
|
||||
void GraphicsPipeline::Configure(bool is_indexed) {
|
||||
static constexpr size_t max_images_elements = 64;
|
||||
std::array<ImageId, max_images_elements> image_view_ids;
|
||||
static_vector<u32, max_images_elements> image_view_indices;
|
||||
static_vector<VkSampler, max_images_elements> samplers;
|
||||
|
||||
texture_cache->SynchronizeGraphicsDescriptors();
|
||||
texture_cache->UpdateRenderTargets(false);
|
||||
|
||||
const auto& regs{maxwell3d->regs};
|
||||
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
|
||||
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
const Shader::Info& info{stage_infos[stage]};
|
||||
buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask);
|
||||
buffer_cache->UnbindGraphicsStorageBuffers(stage);
|
||||
size_t index{};
|
||||
for (const auto& desc : info.storage_buffers_descriptors) {
|
||||
ASSERT(desc.count == 1);
|
||||
buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset,
|
||||
true);
|
||||
++index;
|
||||
}
|
||||
const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers};
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
const u32 cbuf_index{desc.cbuf_index};
|
||||
const u32 cbuf_offset{desc.cbuf_offset};
|
||||
ASSERT(cbufs[cbuf_index].enabled);
|
||||
const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset};
|
||||
const u32 raw_handle{gpu_memory->Read<u32>(addr)};
|
||||
|
||||
const TextureHandle handle(raw_handle, via_header_index);
|
||||
image_view_indices.push_back(handle.image);
|
||||
|
||||
Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)};
|
||||
samplers.push_back(sampler->Handle());
|
||||
}
|
||||
}
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
buffer_cache->UpdateGraphicsBuffers(is_indexed);
|
||||
texture_cache->FillGraphicsImageViews(indices_span, image_view_ids);
|
||||
|
||||
buffer_cache->BindHostGeometryBuffers(is_indexed);
|
||||
|
||||
size_t index{};
|
||||
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
buffer_cache->BindHostStageBuffers(stage);
|
||||
PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(),
|
||||
*texture_cache, *update_descriptor_queue, index);
|
||||
}
|
||||
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
||||
update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
|
||||
|
||||
scheduler->BindGraphicsPipeline(*pipeline);
|
||||
scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
|
||||
nullptr);
|
||||
});
|
||||
}
|
||||
|
||||
void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state,
|
||||
VkRenderPass render_pass) {
|
||||
FixedPipelineState::DynamicState dynamic{};
|
||||
if (!device.IsExtExtendedDynamicStateSupported()) {
|
||||
dynamic = state.dynamic_state;
|
||||
}
|
||||
static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
|
||||
static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
|
||||
for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
|
||||
const bool instanced = state.binding_divisors[index] != 0;
|
||||
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
|
||||
vertex_bindings.push_back({
|
||||
.binding = static_cast<u32>(index),
|
||||
.stride = dynamic.vertex_strides[index],
|
||||
.inputRate = rate,
|
||||
});
|
||||
if (instanced) {
|
||||
vertex_binding_divisors.push_back({
|
||||
.binding = static_cast<u32>(index),
|
||||
.divisor = state.binding_divisors[index],
|
||||
});
|
||||
}
|
||||
}
|
||||
static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
|
||||
const auto& input_attributes = stage_infos[0].loads_generics;
|
||||
for (size_t index = 0; index < state.attributes.size(); ++index) {
|
||||
const auto& attribute = state.attributes[index];
|
||||
if (!attribute.enabled || !input_attributes[index]) {
|
||||
continue;
|
||||
}
|
||||
vertex_attributes.push_back({
|
||||
.location = static_cast<u32>(index),
|
||||
.binding = attribute.buffer,
|
||||
.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
|
||||
.offset = attribute.offset,
|
||||
});
|
||||
}
|
||||
VkPipelineVertexInputStateCreateInfo vertex_input_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
|
||||
.pVertexBindingDescriptions = vertex_bindings.data(),
|
||||
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
|
||||
.pVertexAttributeDescriptions = vertex_attributes.data(),
|
||||
};
|
||||
const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()),
|
||||
.pVertexBindingDivisors = vertex_binding_divisors.data(),
|
||||
};
|
||||
if (!vertex_binding_divisors.empty()) {
|
||||
vertex_input_ci.pNext = &input_divisor_ci;
|
||||
}
|
||||
const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
|
||||
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.topology = MaxwellToVK::PrimitiveTopology(device, state.topology),
|
||||
.primitiveRestartEnable = state.primitive_restart_enable != 0 &&
|
||||
SupportsPrimitiveRestart(input_assembly_topology),
|
||||
};
|
||||
const VkPipelineTessellationStateCreateInfo tessellation_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
|
||||
};
|
||||
VkPipelineViewportStateCreateInfo viewport_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.viewportCount = Maxwell::NumViewports,
|
||||
.pViewports = nullptr,
|
||||
.scissorCount = Maxwell::NumViewports,
|
||||
.pScissors = nullptr,
|
||||
};
|
||||
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
|
||||
std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
|
||||
VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.viewportCount = Maxwell::NumViewports,
|
||||
.pViewportSwizzles = swizzles.data(),
|
||||
};
|
||||
if (device.IsNvViewportSwizzleSupported()) {
|
||||
viewport_ci.pNext = &swizzle_ci;
|
||||
}
|
||||
|
||||
const VkPipelineRasterizationStateCreateInfo rasterization_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.depthClampEnable =
|
||||
static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
|
||||
.rasterizerDiscardEnable =
|
||||
static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
|
||||
.polygonMode = VK_POLYGON_MODE_FILL,
|
||||
.cullMode = static_cast<VkCullModeFlags>(
|
||||
dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
|
||||
.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
|
||||
.depthBiasEnable = state.depth_bias_enable,
|
||||
.depthBiasConstantFactor = 0.0f,
|
||||
.depthBiasClamp = 0.0f,
|
||||
.depthBiasSlopeFactor = 0.0f,
|
||||
.lineWidth = 1.0f,
|
||||
};
|
||||
const VkPipelineMultisampleStateCreateInfo multisample_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode),
|
||||
.sampleShadingEnable = VK_FALSE,
|
||||
.minSampleShading = 0.0f,
|
||||
.pSampleMask = nullptr,
|
||||
.alphaToCoverageEnable = VK_FALSE,
|
||||
.alphaToOneEnable = VK_FALSE,
|
||||
};
|
||||
const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.depthTestEnable = dynamic.depth_test_enable,
|
||||
.depthWriteEnable = dynamic.depth_write_enable,
|
||||
.depthCompareOp = dynamic.depth_test_enable
|
||||
? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
|
||||
: VK_COMPARE_OP_ALWAYS,
|
||||
.depthBoundsTestEnable = dynamic.depth_bounds_enable,
|
||||
.stencilTestEnable = dynamic.stencil_enable,
|
||||
.front = GetStencilFaceState(dynamic.front),
|
||||
.back = GetStencilFaceState(dynamic.back),
|
||||
.minDepthBounds = 0.0f,
|
||||
.maxDepthBounds = 0.0f,
|
||||
};
|
||||
static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
|
||||
for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
static constexpr std::array mask_table{
|
||||
VK_COLOR_COMPONENT_R_BIT,
|
||||
VK_COLOR_COMPONENT_G_BIT,
|
||||
VK_COLOR_COMPONENT_B_BIT,
|
||||
VK_COLOR_COMPONENT_A_BIT,
|
||||
};
|
||||
const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])};
|
||||
if (format == Tegra::RenderTargetFormat::NONE) {
|
||||
continue;
|
||||
}
|
||||
const auto& blend{state.attachments[index]};
|
||||
const std::array mask{blend.Mask()};
|
||||
VkColorComponentFlags write_mask{};
|
||||
for (size_t i = 0; i < mask_table.size(); ++i) {
|
||||
write_mask |= mask[i] ? mask_table[i] : 0;
|
||||
}
|
||||
cb_attachments.push_back({
|
||||
.blendEnable = blend.enable != 0,
|
||||
.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
|
||||
.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
|
||||
.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
|
||||
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
|
||||
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
|
||||
.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
|
||||
.colorWriteMask = write_mask,
|
||||
});
|
||||
}
|
||||
const VkPipelineColorBlendStateCreateInfo color_blend_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.logicOpEnable = VK_FALSE,
|
||||
.logicOp = VK_LOGIC_OP_COPY,
|
||||
.attachmentCount = static_cast<u32>(cb_attachments.size()),
|
||||
.pAttachments = cb_attachments.data(),
|
||||
.blendConstants = {},
|
||||
};
|
||||
static_vector<VkDynamicState, 17> dynamic_states{
|
||||
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
|
||||
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
|
||||
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
|
||||
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
|
||||
};
|
||||
if (device.IsExtExtendedDynamicStateSupported()) {
|
||||
static constexpr std::array extended{
|
||||
VK_DYNAMIC_STATE_CULL_MODE_EXT,
|
||||
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
|
||||
VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT,
|
||||
VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
|
||||
VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
|
||||
VK_DYNAMIC_STATE_STENCIL_OP_EXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
|
||||
}
|
||||
const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
|
||||
.pDynamicStates = dynamic_states.data(),
|
||||
};
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.requiredSubgroupSize = GuestWarpSize,
|
||||
};
|
||||
static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages;
|
||||
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
if (!spv_modules[stage]) {
|
||||
continue;
|
||||
}
|
||||
[[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)),
|
||||
.module = *spv_modules[stage],
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
});
|
||||
/*
|
||||
if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
|
||||
stage_ci.pNext = &subgroup_size_ci;
|
||||
}
|
||||
*/
|
||||
}
|
||||
pipeline = device.GetLogical().CreateGraphicsPipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stageCount = static_cast<u32>(shader_stages.size()),
|
||||
.pStages = shader_stages.data(),
|
||||
.pVertexInputState = &vertex_input_ci,
|
||||
.pInputAssemblyState = &input_assembly_ci,
|
||||
.pTessellationState = &tessellation_ci,
|
||||
.pViewportState = &viewport_ci,
|
||||
.pRasterizationState = &rasterization_ci,
|
||||
.pMultisampleState = &multisample_ci,
|
||||
.pDepthStencilState = &depth_stencil_ci,
|
||||
.pColorBlendState = &color_blend_ci,
|
||||
.pDynamicState = &dynamic_state_ci,
|
||||
.layout = *pipeline_layout,
|
||||
.renderPass = render_pass,
|
||||
.subpass = 0,
|
||||
.basePipelineHandle = nullptr,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
66
src/video_core/renderer_vulkan/vk_graphics_pipeline.h
Normal file
66
src/video_core/renderer_vulkan/vk_graphics_pipeline.h
Normal file
|
@ -0,0 +1,66 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class RenderPassCache;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
class GraphicsPipeline {
|
||||
static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
|
||||
|
||||
public:
|
||||
explicit GraphicsPipeline() = default;
|
||||
explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
|
||||
BufferCache& buffer_cache,
|
||||
TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
RenderPassCache& render_pass_cache, const FixedPipelineState& state,
|
||||
std::array<vk::ShaderModule, NUM_STAGES> stages,
|
||||
const std::array<const Shader::Info*, NUM_STAGES>& infos);
|
||||
|
||||
void Configure(bool is_indexed);
|
||||
|
||||
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default;
|
||||
GraphicsPipeline(GraphicsPipeline&&) noexcept = default;
|
||||
|
||||
GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
|
||||
GraphicsPipeline(const GraphicsPipeline&) = delete;
|
||||
|
||||
private:
|
||||
void MakePipeline(const Device& device, const FixedPipelineState& state,
|
||||
VkRenderPass render_pass);
|
||||
|
||||
Tegra::Engines::Maxwell3D* maxwell3d{};
|
||||
Tegra::MemoryManager* gpu_memory{};
|
||||
TextureCache* texture_cache{};
|
||||
BufferCache* buffer_cache{};
|
||||
VKScheduler* scheduler{};
|
||||
VKUpdateDescriptorQueue* update_descriptor_queue{};
|
||||
|
||||
std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
|
||||
std::array<Shader::Info, NUM_STAGES> stage_infos;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
|
||||
vk::Pipeline pipeline;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -1,36 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Pipeline {
|
||||
public:
|
||||
/// Add a reference count to the pipeline
|
||||
void AddRef() noexcept {
|
||||
++ref_count;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool RemoveRef() noexcept {
|
||||
--ref_count;
|
||||
return ref_count == 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] u64 UsageTick() const noexcept {
|
||||
return usage_tick;
|
||||
}
|
||||
|
||||
protected:
|
||||
u64 usage_tick{};
|
||||
|
||||
private:
|
||||
size_t ref_count{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -12,8 +12,11 @@
|
|||
#include "common/microprofile.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "shader_recompiler/frontend/maxwell/control_flow.h"
|
||||
#include "shader_recompiler/frontend/maxwell/program.h"
|
||||
#include "shader_recompiler/program_header.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
@ -34,18 +37,18 @@
|
|||
namespace Vulkan {
|
||||
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
|
||||
|
||||
using Tegra::Engines::ShaderType;
|
||||
|
||||
namespace {
|
||||
class Environment final : public Shader::Environment {
|
||||
using Shader::Backend::SPIRV::EmitSPIRV;
|
||||
|
||||
class GenericEnvironment : public Shader::Environment {
|
||||
public:
|
||||
explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
|
||||
: kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {}
|
||||
explicit GenericEnvironment() = default;
|
||||
explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
|
||||
: gpu_memory{&gpu_memory_}, program_base{program_base_} {}
|
||||
|
||||
~Environment() override = default;
|
||||
~GenericEnvironment() override = default;
|
||||
|
||||
[[nodiscard]] std::optional<u128> Analyze(u32 start_address) {
|
||||
std::optional<u128> Analyze(u32 start_address) {
|
||||
const std::optional<u64> size{TryFindSize(start_address)};
|
||||
if (!size) {
|
||||
return std::nullopt;
|
||||
|
@ -55,52 +58,47 @@ public:
|
|||
return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size());
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t ShaderSize() const noexcept {
|
||||
[[nodiscard]] size_t CachedSize() const noexcept {
|
||||
return cached_highest - cached_lowest + INST_SIZE;
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t ReadSize() const noexcept {
|
||||
return read_highest - read_lowest + INST_SIZE;
|
||||
}
|
||||
|
||||
[[nodiscard]] u128 ComputeHash() const {
|
||||
const size_t size{ShaderSize()};
|
||||
[[nodiscard]] u128 CalculateHash() const {
|
||||
const size_t size{ReadSize()};
|
||||
auto data = std::make_unique<u64[]>(size);
|
||||
gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size);
|
||||
gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
|
||||
return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size);
|
||||
}
|
||||
|
||||
u64 ReadInstruction(u32 address) override {
|
||||
u64 ReadInstruction(u32 address) final {
|
||||
read_lowest = std::min(read_lowest, address);
|
||||
read_highest = std::max(read_highest, address);
|
||||
|
||||
if (address >= cached_lowest && address < cached_highest) {
|
||||
return code[address / INST_SIZE];
|
||||
}
|
||||
return gpu_memory.Read<u64>(program_base + address);
|
||||
return gpu_memory->Read<u64>(program_base + address);
|
||||
}
|
||||
|
||||
u32 TextureBoundBuffer() override {
|
||||
return kepler_compute.regs.tex_cb_index;
|
||||
}
|
||||
|
||||
std::array<u32, 3> WorkgroupSize() override {
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
|
||||
}
|
||||
|
||||
private:
|
||||
protected:
|
||||
static constexpr size_t INST_SIZE = sizeof(u64);
|
||||
static constexpr size_t BLOCK_SIZE = 0x1000;
|
||||
static constexpr size_t MAXIMUM_SIZE = 0x100000;
|
||||
|
||||
static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
|
||||
static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
|
||||
std::optional<u64> TryFindSize(GPUVAddr guest_addr) {
|
||||
constexpr size_t BLOCK_SIZE = 0x1000;
|
||||
constexpr size_t MAXIMUM_SIZE = 0x100000;
|
||||
|
||||
constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
|
||||
constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
|
||||
|
||||
std::optional<u64> TryFindSize(u32 start_address) {
|
||||
GPUVAddr guest_addr = program_base + start_address;
|
||||
size_t offset = 0;
|
||||
size_t size = BLOCK_SIZE;
|
||||
while (size <= MAXIMUM_SIZE) {
|
||||
code.resize(size / INST_SIZE);
|
||||
u64* const data = code.data() + offset / INST_SIZE;
|
||||
gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE);
|
||||
gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
|
||||
for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) {
|
||||
const u64 inst = data[i / INST_SIZE];
|
||||
if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
|
||||
|
@ -114,17 +112,87 @@ private:
|
|||
return std::nullopt;
|
||||
}
|
||||
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
GPUVAddr program_base;
|
||||
|
||||
u32 read_lowest = 0;
|
||||
u32 read_highest = 0;
|
||||
Tegra::MemoryManager* gpu_memory{};
|
||||
GPUVAddr program_base{};
|
||||
|
||||
std::vector<u64> code;
|
||||
|
||||
u32 read_lowest = std::numeric_limits<u32>::max();
|
||||
u32 read_highest = 0;
|
||||
|
||||
u32 cached_lowest = std::numeric_limits<u32>::max();
|
||||
u32 cached_highest = 0;
|
||||
};
|
||||
|
||||
class GraphicsEnvironment final : public GenericEnvironment {
|
||||
public:
|
||||
explicit GraphicsEnvironment() = default;
|
||||
explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program,
|
||||
GPUVAddr program_base_, u32 start_offset)
|
||||
: GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} {
|
||||
gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph));
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
stage = Shader::Stage::VertexA;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
stage = Shader::Stage::VertexB;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::TesselationControl:
|
||||
stage = Shader::Stage::TessellationControl;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::TesselationEval:
|
||||
stage = Shader::Stage::TessellationEval;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
stage = Shader::Stage::Geometry;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
stage = Shader::Stage::Fragment;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid program={}", program);
|
||||
}
|
||||
}
|
||||
|
||||
~GraphicsEnvironment() override = default;
|
||||
|
||||
u32 TextureBoundBuffer() override {
|
||||
return maxwell3d->regs.tex_cb_index;
|
||||
}
|
||||
|
||||
std::array<u32, 3> WorkgroupSize() override {
|
||||
throw Shader::LogicError("Requesting workgroup size in a graphics stage");
|
||||
}
|
||||
|
||||
private:
|
||||
Tegra::Engines::Maxwell3D* maxwell3d{};
|
||||
};
|
||||
|
||||
class ComputeEnvironment final : public GenericEnvironment {
|
||||
public:
|
||||
explicit ComputeEnvironment() = default;
|
||||
explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
|
||||
: GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} {
|
||||
stage = Shader::Stage::Compute;
|
||||
}
|
||||
|
||||
~ComputeEnvironment() override = default;
|
||||
|
||||
u32 TextureBoundBuffer() override {
|
||||
return kepler_compute->regs.tex_cb_index;
|
||||
}
|
||||
|
||||
std::array<u32, 3> WorkgroupSize() override {
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
|
||||
}
|
||||
|
||||
private:
|
||||
Tegra::Engines::KeplerCompute* kepler_compute{};
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
size_t ComputePipelineCacheKey::Hash() const noexcept {
|
||||
|
@ -136,19 +204,67 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
|
|||
return std::memcmp(&rhs, this, sizeof *this) == 0;
|
||||
}
|
||||
|
||||
size_t GraphicsPipelineCacheKey::Hash() const noexcept {
|
||||
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
|
||||
return static_cast<size_t>(hash);
|
||||
}
|
||||
|
||||
bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
|
||||
return std::memcmp(&rhs, this, Size()) == 0;
|
||||
}
|
||||
|
||||
PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_,
|
||||
VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
|
||||
TextureCache& texture_cache_)
|
||||
: VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
|
||||
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
|
||||
scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
|
||||
update_descriptor_queue_} {}
|
||||
scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
|
||||
buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {
|
||||
const auto& float_control{device.FloatControlProperties()};
|
||||
profile = Shader::Profile{
|
||||
.unified_descriptor_binding = true,
|
||||
.support_float_controls = true,
|
||||
.support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
|
||||
VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
|
||||
.support_separate_rounding_mode =
|
||||
float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
|
||||
.support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
|
||||
.support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
|
||||
.support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
|
||||
.support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
|
||||
.support_fp16_signed_zero_nan_preserve =
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
|
||||
.support_fp32_signed_zero_nan_preserve =
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
|
||||
.has_broken_spirv_clamp = true, // TODO: is_intel
|
||||
};
|
||||
}
|
||||
|
||||
PipelineCache::~PipelineCache() = default;
|
||||
|
||||
GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
|
||||
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
|
||||
|
||||
if (!RefreshStages()) {
|
||||
return nullptr;
|
||||
}
|
||||
graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
|
||||
|
||||
const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
|
||||
auto& pipeline{pair->second};
|
||||
if (!is_new) {
|
||||
return &pipeline;
|
||||
}
|
||||
pipeline = CreateGraphicsPipeline();
|
||||
return &pipeline;
|
||||
}
|
||||
|
||||
ComputePipeline* PipelineCache::CurrentComputePipeline() {
|
||||
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
|
||||
|
||||
|
@ -170,45 +286,130 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
|
|||
return &pipeline;
|
||||
}
|
||||
pipeline = CreateComputePipeline(shader);
|
||||
shader->compute_users.push_back(key);
|
||||
return &pipeline;
|
||||
}
|
||||
|
||||
bool PipelineCache::RefreshStages() {
|
||||
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
|
||||
graphics_key.unique_hashes[index] = u128{};
|
||||
continue;
|
||||
}
|
||||
const auto& shader_config{maxwell3d.regs.shader_config[index]};
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
const GPUVAddr shader_addr{base_addr + shader_config.offset};
|
||||
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
|
||||
if (!cpu_shader_addr) {
|
||||
LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr);
|
||||
return false;
|
||||
}
|
||||
const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
|
||||
if (!shader_info) {
|
||||
const u32 offset{shader_config.offset};
|
||||
shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr);
|
||||
}
|
||||
graphics_key.unique_hashes[index] = shader_info->unique_hash;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr,
|
||||
u32 start_address, VAddr cpu_addr) {
|
||||
GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
|
||||
auto info = std::make_unique<ShaderInfo>();
|
||||
if (const std::optional<u128> cached_hash{env.Analyze(start_address)}) {
|
||||
info->unique_hash = *cached_hash;
|
||||
info->size_bytes = env.CachedSize();
|
||||
} else {
|
||||
// Slow path, not really hit on commercial games
|
||||
// Build a control flow graph to get the real shader size
|
||||
flow_block_pool.ReleaseContents();
|
||||
Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address};
|
||||
info->unique_hash = env.CalculateHash();
|
||||
info->size_bytes = env.ReadSize();
|
||||
}
|
||||
const size_t size_bytes{info->size_bytes};
|
||||
const ShaderInfo* const result{info.get()};
|
||||
Register(std::move(info), cpu_addr, size_bytes);
|
||||
return result;
|
||||
}
|
||||
|
||||
GraphicsPipeline PipelineCache::CreateGraphicsPipeline() {
|
||||
flow_block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
block_pool.ReleaseContents();
|
||||
|
||||
std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> envs;
|
||||
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
|
||||
|
||||
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
if (graphics_key.unique_hashes[index] == u128{}) {
|
||||
continue;
|
||||
}
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
GraphicsEnvironment& env{envs[index]};
|
||||
const u32 start_address{maxwell3d.regs.shader_config[index].offset};
|
||||
env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
|
||||
|
||||
const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader);
|
||||
Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset);
|
||||
programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg);
|
||||
}
|
||||
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
|
||||
std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
|
||||
|
||||
u32 binding{0};
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
if (graphics_key.unique_hashes[index] == u128{}) {
|
||||
continue;
|
||||
}
|
||||
UNIMPLEMENTED_IF(index == 0);
|
||||
|
||||
GraphicsEnvironment& env{envs[index]};
|
||||
Shader::IR::Program& program{programs[index]};
|
||||
|
||||
const size_t stage_index{index - 1};
|
||||
infos[stage_index] = &program.info;
|
||||
std::vector<u32> code{EmitSPIRV(profile, env, program, binding)};
|
||||
|
||||
FILE* file = fopen("D:\\shader.spv", "wb");
|
||||
fwrite(code.data(), 4, code.size(), file);
|
||||
fclose(file);
|
||||
std::system("spirv-cross --vulkan-semantics D:\\shader.spv");
|
||||
|
||||
modules[stage_index] = BuildShader(device, code);
|
||||
}
|
||||
return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device,
|
||||
descriptor_pool, update_descriptor_queue, render_pass_cache,
|
||||
graphics_key.state, std::move(modules), infos);
|
||||
}
|
||||
|
||||
ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {
|
||||
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
Environment env{kepler_compute, gpu_memory, program_base};
|
||||
ComputeEnvironment env{kepler_compute, gpu_memory, program_base};
|
||||
if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) {
|
||||
// TODO: Load from cache
|
||||
}
|
||||
const auto& float_control{device.FloatControlProperties()};
|
||||
const Shader::Profile profile{
|
||||
.unified_descriptor_binding = true,
|
||||
.support_float_controls = true,
|
||||
.support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
|
||||
VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
|
||||
.support_separate_rounding_mode =
|
||||
float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
|
||||
.support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
|
||||
.support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
|
||||
.support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
|
||||
.support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
|
||||
.support_fp16_signed_zero_nan_preserve =
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
|
||||
.support_fp32_signed_zero_nan_preserve =
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
|
||||
.has_broken_spirv_clamp = true, // TODO: is_intel
|
||||
};
|
||||
const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)};
|
||||
flow_block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
block_pool.ReleaseContents();
|
||||
|
||||
Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start};
|
||||
Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)};
|
||||
u32 binding{0};
|
||||
std::vector<u32> code{EmitSPIRV(profile, env, program, binding)};
|
||||
/*
|
||||
FILE* file = fopen("D:\\shader.spv", "wb");
|
||||
fwrite(code.data(), 4, code.size(), file);
|
||||
fclose(file);
|
||||
std::system("spirv-dis D:\\shader.spv");
|
||||
*/
|
||||
shader_info->unique_hash = env.ComputeHash();
|
||||
shader_info->size_bytes = env.ShaderSize();
|
||||
return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info,
|
||||
shader_info->unique_hash = env.CalculateHash();
|
||||
shader_info->size_bytes = env.ReadSize();
|
||||
return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info,
|
||||
BuildShader(device, code)};
|
||||
}
|
||||
|
||||
|
@ -216,9 +417,6 @@ ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_
|
|||
ShaderInfo shader;
|
||||
ComputePipeline pipeline{CreateComputePipeline(&shader)};
|
||||
const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)};
|
||||
shader.compute_users.push_back(key);
|
||||
pipeline.AddRef();
|
||||
|
||||
const size_t size_bytes{shader.size_bytes};
|
||||
Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes);
|
||||
return &compute_cache.emplace(key, std::move(pipeline)).first->second;
|
||||
|
@ -233,18 +431,4 @@ ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash)
|
|||
};
|
||||
}
|
||||
|
||||
void PipelineCache::OnShaderRemoval(ShaderInfo* shader) {
|
||||
for (const ComputePipelineCacheKey& key : shader->compute_users) {
|
||||
const auto it = compute_cache.find(key);
|
||||
ASSERT(it != compute_cache.end());
|
||||
|
||||
Pipeline& pipeline = it->second;
|
||||
if (pipeline.RemoveRef()) {
|
||||
// Wait for the pipeline to be free of GPU usage before destroying it
|
||||
scheduler.Wait(pipeline.UsageTick());
|
||||
compute_cache.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -12,11 +12,18 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/microinstruction.h"
|
||||
#include "shader_recompiler/frontend/maxwell/control_flow.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
|
@ -26,13 +33,6 @@ class System;
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class RasterizerVulkan;
|
||||
class ComputePipeline;
|
||||
class VKDescriptorPool;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct ComputePipelineCacheKey {
|
||||
|
@ -52,6 +52,26 @@ static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>)
|
|||
static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
|
||||
static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
|
||||
|
||||
struct GraphicsPipelineCacheKey {
|
||||
std::array<u128, 6> unique_hashes;
|
||||
FixedPipelineState state;
|
||||
|
||||
size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
size_t Size() const noexcept {
|
||||
return sizeof(unique_hashes) + state.Size();
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
|
||||
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
|
||||
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
|
@ -63,14 +83,28 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
|
|||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::GraphicsPipelineCacheKey> {
|
||||
size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class ComputePipeline;
|
||||
class Device;
|
||||
class RasterizerVulkan;
|
||||
class RenderPassCache;
|
||||
class VKDescriptorPool;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
struct ShaderInfo {
|
||||
u128 unique_hash{};
|
||||
size_t size_bytes{};
|
||||
std::vector<ComputePipelineCacheKey> compute_users;
|
||||
};
|
||||
|
||||
class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> {
|
||||
|
@ -80,15 +114,23 @@ public:
|
|||
Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
Tegra::MemoryManager& gpu_memory, const Device& device,
|
||||
VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue);
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
|
||||
TextureCache& texture_cache);
|
||||
~PipelineCache() override;
|
||||
|
||||
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
|
||||
|
||||
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
|
||||
|
||||
protected:
|
||||
void OnShaderRemoval(ShaderInfo* shader) override;
|
||||
|
||||
private:
|
||||
bool RefreshStages();
|
||||
|
||||
const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr,
|
||||
u32 start_address, VAddr cpu_addr);
|
||||
|
||||
GraphicsPipeline CreateGraphicsPipeline();
|
||||
|
||||
ComputePipeline CreateComputePipeline(ShaderInfo* shader);
|
||||
|
||||
ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr);
|
||||
|
@ -104,8 +146,20 @@ private:
|
|||
VKScheduler& scheduler;
|
||||
VKDescriptorPool& descriptor_pool;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
RenderPassCache& render_pass_cache;
|
||||
BufferCache& buffer_cache;
|
||||
TextureCache& texture_cache;
|
||||
|
||||
GraphicsPipelineCacheKey graphics_key{};
|
||||
|
||||
std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache;
|
||||
std::unordered_map<GraphicsPipelineCacheKey, GraphicsPipeline> graphics_cache;
|
||||
|
||||
Shader::ObjectPool<Shader::IR::Inst> inst_pool;
|
||||
Shader::ObjectPool<Shader::IR::Block> block_pool;
|
||||
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block_pool;
|
||||
|
||||
Shader::Profile profile;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -141,15 +141,18 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
blit_image(device, scheduler, state_tracker, descriptor_pool),
|
||||
astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
|
||||
memory_allocator),
|
||||
texture_cache_runtime{device, scheduler, memory_allocator,
|
||||
staging_pool, blit_image, astc_decoder_pass},
|
||||
render_pass_cache(device), texture_cache_runtime{device, scheduler,
|
||||
memory_allocator, staging_pool,
|
||||
blit_image, astc_decoder_pass,
|
||||
render_pass_cache},
|
||||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
|
||||
update_descriptor_queue, descriptor_pool),
|
||||
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
|
||||
pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
|
||||
descriptor_pool, update_descriptor_queue),
|
||||
query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
|
||||
descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
|
||||
texture_cache),
|
||||
query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache },
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
|
||||
wfi_event(device.GetLogical().CreateEvent()) {
|
||||
scheduler.SetQueryCache(query_cache);
|
||||
|
@ -158,7 +161,39 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
RasterizerVulkan::~RasterizerVulkan() = default;
|
||||
|
||||
void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||
UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced);
|
||||
MICROPROFILE_SCOPE(Vulkan_Drawing);
|
||||
|
||||
SCOPE_EXIT({ gpu.TickWork(); });
|
||||
FlushWork();
|
||||
|
||||
query_cache.UpdateCounters();
|
||||
|
||||
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
|
||||
if (!pipeline) {
|
||||
return;
|
||||
}
|
||||
update_descriptor_queue.Acquire();
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
pipeline->Configure(is_indexed);
|
||||
|
||||
BeginTransformFeedback();
|
||||
|
||||
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
|
||||
UpdateDynamicStates();
|
||||
|
||||
const auto& regs{maxwell3d.regs};
|
||||
const u32 num_instances{maxwell3d.mme_draw.instance_count};
|
||||
const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
|
||||
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
|
||||
if (draw_params.is_indexed) {
|
||||
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
|
||||
draw_params.base_vertex, draw_params.base_instance);
|
||||
} else {
|
||||
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
|
||||
draw_params.base_vertex, draw_params.base_instance);
|
||||
}
|
||||
});
|
||||
EndTransformFeedback();
|
||||
}
|
||||
|
||||
void RasterizerVulkan::Clear() {
|
||||
|
@ -487,13 +522,11 @@ void RasterizerVulkan::FlushWork() {
|
|||
if ((++draw_counter & 7) != 7) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (draw_counter < DRAWS_TO_DISPATCH) {
|
||||
// Send recorded tasks to the worker thread
|
||||
scheduler.DispatchWork();
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise (every certain number of draws) flush execution.
|
||||
// This submits commands to the Vulkan driver.
|
||||
scheduler.Flush();
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
|
@ -148,6 +149,7 @@ private:
|
|||
VKUpdateDescriptorQueue update_descriptor_queue;
|
||||
BlitImageHelper blit_image;
|
||||
ASTCDecoderPass astc_decoder_pass;
|
||||
RenderPassCache render_pass_cache;
|
||||
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
|
|
100
src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
Normal file
100
src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
Normal file
|
@ -0,0 +1,100 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
|
||||
constexpr std::array ATTACHMENT_REFERENCES{
|
||||
VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
|
||||
};
|
||||
|
||||
VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
|
||||
VkSampleCountFlagBits samples) {
|
||||
using MaxwellToVK::SurfaceFormat;
|
||||
return {
|
||||
.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
|
||||
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
|
||||
.samples = samples,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
|
||||
|
||||
VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
|
||||
const auto [pair, is_new] = cache.try_emplace(key);
|
||||
if (!is_new) {
|
||||
return *pair->second;
|
||||
}
|
||||
boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
|
||||
u32 num_images{0};
|
||||
|
||||
for (size_t index = 0; index < key.color_formats.size(); ++index) {
|
||||
const PixelFormat format{key.color_formats[index]};
|
||||
if (format == PixelFormat::Invalid) {
|
||||
continue;
|
||||
}
|
||||
descriptions.push_back(AttachmentDescription(*device, format, key.samples));
|
||||
++num_images;
|
||||
}
|
||||
const size_t num_colors{descriptions.size()};
|
||||
const VkAttachmentReference* depth_attachment{};
|
||||
if (key.depth_format != PixelFormat::Invalid) {
|
||||
depth_attachment = &ATTACHMENT_REFERENCES[num_colors];
|
||||
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
|
||||
}
|
||||
const VkSubpassDescription subpass{
|
||||
.flags = 0,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.inputAttachmentCount = 0,
|
||||
.pInputAttachments = nullptr,
|
||||
.colorAttachmentCount = static_cast<u32>(num_colors),
|
||||
.pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
|
||||
.pResolveAttachments = nullptr,
|
||||
.pDepthStencilAttachment = depth_attachment,
|
||||
.preserveAttachmentCount = 0,
|
||||
.pPreserveAttachments = nullptr,
|
||||
};
|
||||
pair->second = device->GetLogical().CreateRenderPass({
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.attachmentCount = static_cast<u32>(descriptions.size()),
|
||||
.pAttachments = descriptions.data(),
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &subpass,
|
||||
.dependencyCount = 0,
|
||||
.pDependencies = nullptr,
|
||||
});
|
||||
return *pair->second;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
53
src/video_core/renderer_vulkan/vk_render_pass_cache.h
Normal file
53
src/video_core/renderer_vulkan/vk_render_pass_cache.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct RenderPassKey {
|
||||
auto operator<=>(const RenderPassKey&) const noexcept = default;
|
||||
|
||||
std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
|
||||
VideoCore::Surface::PixelFormat depth_format;
|
||||
VkSampleCountFlagBits samples;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Vulkan::RenderPassKey> {
|
||||
[[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
|
||||
size_t value = static_cast<size_t>(key.depth_format) << 48;
|
||||
value ^= static_cast<size_t>(key.samples) << 52;
|
||||
for (size_t i = 0; i < key.color_formats.size(); ++i) {
|
||||
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
|
||||
class RenderPassCache {
|
||||
public:
|
||||
explicit RenderPassCache(const Device& device_);
|
||||
|
||||
VkRenderPass Get(const RenderPassKey& key);
|
||||
|
||||
private:
|
||||
const Device* device{};
|
||||
std::unordered_map<RenderPassKey, vk::RenderPass> cache;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -18,6 +18,7 @@
|
|||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange;
|
|||
using VideoCore::Surface::IsPixelFormatASTC;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::array ATTACHMENT_REFERENCES{
|
||||
VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
|
||||
VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
|
||||
};
|
||||
|
||||
constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
if (color == std::array<float, 4>{0, 0, 0, 0}) {
|
||||
return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
|
||||
|
@ -226,23 +214,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
|||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
|
||||
const ImageView* image_view) {
|
||||
using MaxwellToVK::SurfaceFormat;
|
||||
const PixelFormat pixel_format = image_view->format;
|
||||
return VkAttachmentDescription{
|
||||
.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
|
||||
.format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format,
|
||||
.samples = image_view->Samples(),
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
|
||||
switch (swizzle) {
|
||||
case SwizzleSource::Zero:
|
||||
|
@ -1164,7 +1135,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
|
|||
|
||||
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
|
||||
ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
|
||||
std::vector<VkAttachmentDescription> descriptions;
|
||||
std::vector<VkImageView> attachments;
|
||||
RenderPassKey renderpass_key{};
|
||||
s32 num_layers = 1;
|
||||
|
@ -1175,7 +1145,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
|
|||
renderpass_key.color_formats[index] = PixelFormat::Invalid;
|
||||
continue;
|
||||
}
|
||||
descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
|
||||
attachments.push_back(color_buffer->RenderTarget());
|
||||
renderpass_key.color_formats[index] = color_buffer->format;
|
||||
num_layers = std::max(num_layers, color_buffer->range.extent.layers);
|
||||
|
@ -1185,10 +1154,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
|
|||
++num_images;
|
||||
}
|
||||
const size_t num_colors = attachments.size();
|
||||
const VkAttachmentReference* depth_attachment =
|
||||
depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
|
||||
if (depth_buffer) {
|
||||
descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
|
||||
attachments.push_back(depth_buffer->RenderTarget());
|
||||
renderpass_key.depth_format = depth_buffer->format;
|
||||
num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
|
||||
|
@ -1201,40 +1167,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
|
|||
}
|
||||
renderpass_key.samples = samples;
|
||||
|
||||
const auto& device = runtime.device.GetLogical();
|
||||
const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key);
|
||||
if (is_new) {
|
||||
const VkSubpassDescription subpass{
|
||||
.flags = 0,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.inputAttachmentCount = 0,
|
||||
.pInputAttachments = nullptr,
|
||||
.colorAttachmentCount = static_cast<u32>(num_colors),
|
||||
.pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
|
||||
.pResolveAttachments = nullptr,
|
||||
.pDepthStencilAttachment = depth_attachment,
|
||||
.preserveAttachmentCount = 0,
|
||||
.pPreserveAttachments = nullptr,
|
||||
};
|
||||
cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.attachmentCount = static_cast<u32>(descriptions.size()),
|
||||
.pAttachments = descriptions.data(),
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &subpass,
|
||||
.dependencyCount = 0,
|
||||
.pDependencies = nullptr,
|
||||
});
|
||||
}
|
||||
renderpass = *cache_pair->second;
|
||||
renderpass = runtime.render_pass_cache.Get(renderpass_key);
|
||||
|
||||
render_area = VkExtent2D{
|
||||
.width = key.size.width,
|
||||
.height = key.size.height,
|
||||
};
|
||||
num_color_buffers = static_cast<u32>(num_colors);
|
||||
framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{
|
||||
framebuffer = runtime.device.GetLogical().CreateFramebuffer({
|
||||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
|
|
|
@ -26,35 +26,10 @@ class Device;
|
|||
class Image;
|
||||
class ImageView;
|
||||
class Framebuffer;
|
||||
class RenderPassCache;
|
||||
class StagingBufferPool;
|
||||
class VKScheduler;
|
||||
|
||||
struct RenderPassKey {
|
||||
constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
|
||||
|
||||
std::array<PixelFormat, NUM_RT> color_formats;
|
||||
PixelFormat depth_format;
|
||||
VkSampleCountFlagBits samples;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Vulkan::RenderPassKey> {
|
||||
[[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
|
||||
size_t value = static_cast<size_t>(key.depth_format) << 48;
|
||||
value ^= static_cast<size_t>(key.samples) << 52;
|
||||
for (size_t i = 0; i < key.color_formats.size(); ++i) {
|
||||
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct TextureCacheRuntime {
|
||||
const Device& device;
|
||||
VKScheduler& scheduler;
|
||||
|
@ -62,7 +37,7 @@ struct TextureCacheRuntime {
|
|||
StagingBufferPool& staging_buffer_pool;
|
||||
BlitImageHelper& blit_image_helper;
|
||||
ASTCDecoderPass& astc_decoder_pass;
|
||||
std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{};
|
||||
RenderPassCache& render_pass_cache;
|
||||
|
||||
void Finish();
|
||||
|
||||
|
|
|
@ -49,6 +49,7 @@ constexpr std::array REQUIRED_EXTENSIONS{
|
|||
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
|
||||
VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
|
||||
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
|
||||
#ifdef _WIN32
|
||||
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
|
||||
#endif
|
||||
|
@ -312,6 +313,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
|||
};
|
||||
SetNext(next, host_query_reset);
|
||||
|
||||
VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT,
|
||||
.pNext = nullptr,
|
||||
.shaderDemoteToHelperInvocation = true,
|
||||
};
|
||||
SetNext(next, demote);
|
||||
|
||||
VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
|
||||
if (is_float16_supported) {
|
||||
float16_int8 = {
|
||||
|
@ -597,8 +605,14 @@ void Device::CheckSuitability(bool requires_swapchain) const {
|
|||
throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
|
||||
}
|
||||
}
|
||||
VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{};
|
||||
demote.sType =
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT;
|
||||
demote.pNext = nullptr;
|
||||
|
||||
VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
|
||||
robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
|
||||
robustness2.pNext = &demote;
|
||||
|
||||
VkPhysicalDeviceFeatures2KHR features2{};
|
||||
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
||||
|
@ -625,6 +639,7 @@ void Device::CheckSuitability(bool requires_swapchain) const {
|
|||
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
|
||||
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
|
||||
"shaderStorageImageWriteWithoutFormat"),
|
||||
std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
|
||||
std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
|
||||
std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
|
||||
std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
|
||||
|
|
Loading…
Reference in a new issue