suyu/src/video_core/shader/decode/arithmetic.cpp
Nguyen Dac Nam db2f547434
shader: FMUL switch to using LUT (#3441)
* shader: add FmulPostFactor LUT table

* shader: FMUL apply LUT

* Update src/video_core/engines/shader_bytecode.h

Co-Authored-By: Mat M. <mathew1800@gmail.com>

* nit: mistype

* clang-format & add missing import

* shader: remove post factor LUT.

* shader: move post factor LUT to function and fix incorrect order.

* clang-format

* shader: FMUL: add static to post factor LUT

* nit: typo

Co-authored-by: Mat M. <mathew1800@gmail.com>
2020-02-27 11:14:25 -05:00

166 lines
5.9 KiB
C++

// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::SubOp;
u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
Node op_a = GetRegister(instr.gpr8);
Node op_b = [&] {
if (instr.is_b_imm) {
return GetImmediate19(instr);
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
switch (opcode->get().GetId()) {
case OpCode::Id::MOV_C:
case OpCode::Id::MOV_R: {
// MOV does not have neither 'abs' nor 'neg' bits.
SetRegister(bb, instr.gpr0, op_b);
break;
}
case OpCode::Id::FMUL_C:
case OpCode::Id::FMUL_R:
case OpCode::Id::FMUL_IMM: {
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
if (instr.fmul.tab5cb8_2 != 0) {
LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
instr.fmul.tab5cb8_2.Value());
}
if (instr.fmul.tab5c68_0 != 1) {
LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
instr.fmul.tab5c68_0.Value());
}
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
static constexpr std::array FmulPostFactor = {
1.000f, // None
0.500f, // Divide 2
0.250f, // Divide 4
0.125f, // Divide 8
8.000f, // Mul 8
4.000f, // Mul 4
2.000f, // Mul 2
};
if (instr.fmul.postfactor != 0) {
op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
Immediate(FmulPostFactor[instr.fmul.postfactor]));
}
// TODO(Rodrigo): Should precise be used when there's a postfactor?
Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FADD_C:
case OpCode::Id::FADD_R:
case OpCode::Id::FADD_IMM: {
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::MUFU: {
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
Node value = [&]() {
switch (instr.sub_op) {
case SubOp::Cos:
return Operation(OperationCode::FCos, PRECISE, op_a);
case SubOp::Sin:
return Operation(OperationCode::FSin, PRECISE, op_a);
case SubOp::Ex2:
return Operation(OperationCode::FExp2, PRECISE, op_a);
case SubOp::Lg2:
return Operation(OperationCode::FLog2, PRECISE, op_a);
case SubOp::Rcp:
return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
case SubOp::Rsq:
return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
case SubOp::Sqrt:
return Operation(OperationCode::FSqrt, PRECISE, op_a);
default:
UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
static_cast<unsigned>(instr.sub_op.Value()));
return Immediate(0);
}
}();
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FMNMX_C:
case OpCode::Id::FMNMX_R:
case OpCode::Id::FMNMX_IMM: {
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FCMP_R: {
UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
Node op_c = GetRegister(instr.gpr39);
Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
SetRegister(
bb, instr.gpr0,
Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
break;
}
case OpCode::Id::RRO_C:
case OpCode::Id::RRO_R:
case OpCode::Id::RRO_IMM: {
LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
// Currently RRO is only implemented as a register move.
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
SetRegister(bb, instr.gpr0, op_b);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
}
return pc;
}
} // namespace VideoCommon::Shader