mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	Merge pull request #933 from neobrain/shader_debugger
Shader debugger improvements
This commit is contained in:
		
						commit
						7312894a6a
					
				
					 11 changed files with 674 additions and 97 deletions
				
			
		|  | @ -6,9 +6,16 @@ | ||||||
| #include <sstream> | #include <sstream> | ||||||
| 
 | 
 | ||||||
| #include <QBoxLayout> | #include <QBoxLayout> | ||||||
|  | #include <QFileDialog> | ||||||
|  | #include <QGroupBox> | ||||||
|  | #include <QLabel> | ||||||
|  | #include <QLineEdit> | ||||||
|  | #include <QPushButton> | ||||||
|  | #include <QSignalMapper> | ||||||
|  | #include <QSpinBox> | ||||||
| #include <QTreeView> | #include <QTreeView> | ||||||
| 
 | 
 | ||||||
| #include "video_core/shader/shader_interpreter.h" | #include "video_core/shader/shader.h" | ||||||
| 
 | 
 | ||||||
| #include "graphics_vertex_shader.h" | #include "graphics_vertex_shader.h" | ||||||
| 
 | 
 | ||||||
|  | @ -17,7 +24,7 @@ using nihstro::Instruction; | ||||||
| using nihstro::SourceRegister; | using nihstro::SourceRegister; | ||||||
| using nihstro::SwizzlePattern; | using nihstro::SwizzlePattern; | ||||||
| 
 | 
 | ||||||
| GraphicsVertexShaderModel::GraphicsVertexShaderModel(QObject* parent): QAbstractItemModel(parent) { | GraphicsVertexShaderModel::GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent): QAbstractItemModel(parent), par(parent) { | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -34,7 +41,7 @@ int GraphicsVertexShaderModel::columnCount(const QModelIndex& parent) const { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int GraphicsVertexShaderModel::rowCount(const QModelIndex& parent) const { | int GraphicsVertexShaderModel::rowCount(const QModelIndex& parent) const { | ||||||
|     return static_cast<int>(info.code.size()); |     return static_cast<int>(par->info.code.size()); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| QVariant GraphicsVertexShaderModel::headerData(int section, Qt::Orientation orientation, int role) const { | QVariant GraphicsVertexShaderModel::headerData(int section, Qt::Orientation orientation, int role) const { | ||||||
|  | @ -62,21 +69,21 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | ||||||
|     { |     { | ||||||
|         switch (index.column()) { |         switch (index.column()) { | ||||||
|         case 0: |         case 0: | ||||||
|             if (info.HasLabel(index.row())) |             if (par->info.HasLabel(index.row())) | ||||||
|                 return QString::fromStdString(info.GetLabel(index.row())); |                 return QString::fromStdString(par->info.GetLabel(index.row())); | ||||||
| 
 | 
 | ||||||
|             return QString("%1").arg(4*index.row(), 4, 16, QLatin1Char('0')); |             return QString("%1").arg(4*index.row(), 4, 16, QLatin1Char('0')); | ||||||
| 
 | 
 | ||||||
|         case 1: |         case 1: | ||||||
|             return QString("%1").arg(info.code[index.row()].hex, 8, 16, QLatin1Char('0')); |             return QString("%1").arg(par->info.code[index.row()].hex, 8, 16, QLatin1Char('0')); | ||||||
| 
 | 
 | ||||||
|         case 2: |         case 2: | ||||||
|         { |         { | ||||||
|             std::stringstream output; |             std::stringstream output; | ||||||
|             output.flags(std::ios::hex); |             output.flags(std::ios::hex); | ||||||
| 
 | 
 | ||||||
|             Instruction instr = info.code[index.row()]; |             Instruction instr = par->info.code[index.row()]; | ||||||
|             const SwizzlePattern& swizzle = info.swizzle_info[instr.common.operand_desc_id].pattern; |             const SwizzlePattern& swizzle = par->info.swizzle_info[instr.common.operand_desc_id].pattern; | ||||||
| 
 | 
 | ||||||
|             // longest known instruction name: "setemit "
 |             // longest known instruction name: "setemit "
 | ||||||
|             output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name; |             output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name; | ||||||
|  | @ -130,13 +137,13 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | ||||||
| 
 | 
 | ||||||
|                     print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(0,1), instr.common.AddressRegisterName()); |                     print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(0,1), instr.common.AddressRegisterName()); | ||||||
|                     output << " " << instr.common.compare_op.ToString(instr.common.compare_op.x) << " "; |                     output << " " << instr.common.compare_op.ToString(instr.common.compare_op.x) << " "; | ||||||
|                     print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false).substr(0,1)); |                     print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true).substr(0,1)); | ||||||
| 
 | 
 | ||||||
|                     output << ", "; |                     output << ", "; | ||||||
| 
 | 
 | ||||||
|                     print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(1,1), instr.common.AddressRegisterName()); |                     print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(1,1), instr.common.AddressRegisterName()); | ||||||
|                     output << " " << instr.common.compare_op.ToString(instr.common.compare_op.y) << " "; |                     output << " " << instr.common.compare_op.ToString(instr.common.compare_op.y) << " "; | ||||||
|                     print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false).substr(1,1)); |                     print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true).substr(1,1)); | ||||||
| 
 | 
 | ||||||
|                     break; |                     break; | ||||||
|                 } |                 } | ||||||
|  | @ -167,7 +174,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | ||||||
|                     // TODO: In some cases, the Address Register is used as an index for SRC2 instead of SRC1
 |                     // TODO: In some cases, the Address Register is used as an index for SRC2 instead of SRC1
 | ||||||
|                     if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src2) { |                     if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src2) { | ||||||
|                         SourceRegister src2 = instr.common.GetSrc2(src_is_inverted); |                         SourceRegister src2 = instr.common.GetSrc2(src_is_inverted); | ||||||
|                         print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false)); |                         print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true)); | ||||||
|                     } |                     } | ||||||
|                     break; |                     break; | ||||||
|                 } |                 } | ||||||
|  | @ -240,6 +247,18 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | ||||||
|     case Qt::FontRole: |     case Qt::FontRole: | ||||||
|         return QFont("monospace"); |         return QFont("monospace"); | ||||||
| 
 | 
 | ||||||
|  |     case Qt::BackgroundRole: | ||||||
|  |         // Highlight instructions which have no debug data associated to them
 | ||||||
|  |         for (const auto& record : par->debug_data.records) | ||||||
|  |             if (index.row() == record.instruction_offset) | ||||||
|  |                 return QVariant(); | ||||||
|  | 
 | ||||||
|  |         return QBrush(QColor(255, 255, 127)); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     // TODO: Draw arrows for each "reachable" instruction to visualize control flow
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|     default: |     default: | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  | @ -247,53 +266,232 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | ||||||
|     return QVariant(); |     return QVariant(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GraphicsVertexShaderModel::OnUpdate() | void GraphicsVertexShaderWidget::DumpShader() { | ||||||
| { |     QString filename = QFileDialog::getSaveFileName(this, tr("Save Shader Dump"), "shader_dump.shbin", | ||||||
|     beginResetModel(); |                                                     tr("Shader Binary (*.shbin)")); | ||||||
| 
 | 
 | ||||||
|     info.Clear(); |     if (filename.isEmpty()) { | ||||||
| 
 |         // If the user canceled the dialog, don't dump anything.
 | ||||||
|     for (auto instr : Pica::g_state.vs.program_code) |         return; | ||||||
|         info.code.push_back({instr}); |  | ||||||
| 
 |  | ||||||
|     for (auto pattern : Pica::g_state.vs.swizzle_data) |  | ||||||
|         info.swizzle_info.push_back({pattern}); |  | ||||||
| 
 |  | ||||||
|     info.labels.insert({ Pica::g_state.regs.vs.main_offset, "main" }); |  | ||||||
| 
 |  | ||||||
|     endResetModel(); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     auto& setup  = Pica::g_state.vs; | ||||||
|  |     auto& config = Pica::g_state.regs.vs; | ||||||
|  | 
 | ||||||
|  |     Pica::DebugUtils::DumpShader(filename.toStdString(), config, setup, Pica::g_state.regs.vs_output_attributes); | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::DebugContext > debug_context, | GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::DebugContext > debug_context, | ||||||
|                                                        QWidget* parent) |                                                        QWidget* parent) | ||||||
|         : BreakPointObserverDock(debug_context, "Pica Vertex Shader", parent) { |         : BreakPointObserverDock(debug_context, "Pica Vertex Shader", parent) { | ||||||
|     setObjectName("PicaVertexShader"); |     setObjectName("PicaVertexShader"); | ||||||
| 
 | 
 | ||||||
|     auto binary_model = new GraphicsVertexShaderModel(this); |     auto input_data_mapper = new QSignalMapper(this); | ||||||
|     auto binary_list = new QTreeView; | 
 | ||||||
|     binary_list->setModel(binary_model); |     // TODO: Support inputting data in hexadecimal raw format
 | ||||||
|  |     for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) { | ||||||
|  |         input_data[i] = new QLineEdit; | ||||||
|  |         input_data[i]->setValidator(new QDoubleValidator(input_data[i])); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)")); | ||||||
|  | 
 | ||||||
|  |     // TODO: Add some button for jumping to the shader entry point
 | ||||||
|  | 
 | ||||||
|  |     model = new GraphicsVertexShaderModel(this); | ||||||
|  |     binary_list = new QTreeView; | ||||||
|  |     binary_list->setModel(model); | ||||||
|     binary_list->setRootIsDecorated(false); |     binary_list->setRootIsDecorated(false); | ||||||
|     binary_list->setAlternatingRowColors(true); |     binary_list->setAlternatingRowColors(true); | ||||||
| 
 | 
 | ||||||
|     connect(this, SIGNAL(Update()), binary_model, SLOT(OnUpdate())); |     auto dump_shader = new QPushButton(QIcon::fromTheme("document-save"), tr("Dump")); | ||||||
|  | 
 | ||||||
|  |     instruction_description = new QLabel; | ||||||
|  | 
 | ||||||
|  |     cycle_index = new QSpinBox; | ||||||
|  | 
 | ||||||
|  |     connect(this, SIGNAL(SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags)), | ||||||
|  |             binary_list->selectionModel(), SLOT(select(const QModelIndex&, QItemSelectionModel::SelectionFlags))); | ||||||
|  | 
 | ||||||
|  |     connect(dump_shader, SIGNAL(clicked()), this, SLOT(DumpShader())); | ||||||
|  | 
 | ||||||
|  |     connect(cycle_index, SIGNAL(valueChanged(int)), this, SLOT(OnCycleIndexChanged(int))); | ||||||
|  | 
 | ||||||
|  |     for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) { | ||||||
|  |         connect(input_data[i], SIGNAL(textEdited(const QString&)), input_data_mapper, SLOT(map())); | ||||||
|  |         input_data_mapper->setMapping(input_data[i], i); | ||||||
|  |     } | ||||||
|  |     connect(input_data_mapper, SIGNAL(mapped(int)), this, SLOT(OnInputAttributeChanged(int))); | ||||||
| 
 | 
 | ||||||
|     auto main_widget = new QWidget; |     auto main_widget = new QWidget; | ||||||
|     auto main_layout = new QVBoxLayout; |     auto main_layout = new QVBoxLayout; | ||||||
|  |     { | ||||||
|  |         auto input_data_group = new QGroupBox(tr("Input Data")); | ||||||
|  | 
 | ||||||
|  |         // For each vertex attribute, add a QHBoxLayout consisting of:
 | ||||||
|  |         // - A QLabel denoting the source attribute index
 | ||||||
|  |         // - Four QLineEdits for showing and manipulating attribute data
 | ||||||
|  |         // - A QLabel denoting the shader input attribute index
 | ||||||
|  |         auto sub_layout = new QVBoxLayout; | ||||||
|  |         for (unsigned i = 0; i < 16; ++i) { | ||||||
|  |             // Create an HBoxLayout to store the widgets used to specify a particular attribute
 | ||||||
|  |             // and store it in a QWidget to allow for easy hiding and unhiding.
 | ||||||
|  |             auto row_layout = new QHBoxLayout; | ||||||
|  |             row_layout->addWidget(new QLabel(tr("Attribute %1").arg(i, 2))); | ||||||
|  |             for (unsigned comp = 0; comp < 4; ++comp) | ||||||
|  |                 row_layout->addWidget(input_data[4 * i + comp]); | ||||||
|  | 
 | ||||||
|  |             row_layout->addWidget(input_data_mapping[i] = new QLabel); | ||||||
|  | 
 | ||||||
|  |             input_data_container[i] = new QWidget; | ||||||
|  |             input_data_container[i]->setLayout(row_layout); | ||||||
|  |             input_data_container[i]->hide(); | ||||||
|  | 
 | ||||||
|  |             sub_layout->addWidget(input_data_container[i]); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         sub_layout->addWidget(breakpoint_warning); | ||||||
|  |         breakpoint_warning->hide(); | ||||||
|  | 
 | ||||||
|  |         input_data_group->setLayout(sub_layout); | ||||||
|  |         main_layout->addWidget(input_data_group); | ||||||
|  |     } | ||||||
|     { |     { | ||||||
|         auto sub_layout = new QHBoxLayout; |         auto sub_layout = new QHBoxLayout; | ||||||
|         sub_layout->addWidget(binary_list); |         sub_layout->addWidget(binary_list); | ||||||
|         main_layout->addLayout(sub_layout); |         main_layout->addLayout(sub_layout); | ||||||
|     } |     } | ||||||
|  |     main_layout->addWidget(dump_shader); | ||||||
|  |     { | ||||||
|  |         auto sub_layout = new QHBoxLayout; | ||||||
|  |         sub_layout->addWidget(new QLabel(tr("Cycle Index:"))); | ||||||
|  |         sub_layout->addWidget(cycle_index); | ||||||
|  |         main_layout->addLayout(sub_layout); | ||||||
|  |     } | ||||||
|  |     main_layout->addWidget(instruction_description); | ||||||
|  |     main_layout->addStretch(); | ||||||
|     main_widget->setLayout(main_layout); |     main_widget->setLayout(main_layout); | ||||||
|     setWidget(main_widget); |     setWidget(main_widget); | ||||||
|  | 
 | ||||||
|  |     widget()->setEnabled(false); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { | void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { | ||||||
|     emit Update(); |     auto input = static_cast<Pica::Shader::InputVertex*>(data); | ||||||
|  |     if (event == Pica::DebugContext::Event::VertexLoaded) { | ||||||
|  |         Reload(true, data); | ||||||
|  |     } else { | ||||||
|  |         // No vertex data is retrievable => invalidate currently stored vertex data
 | ||||||
|  |         Reload(true, nullptr); | ||||||
|  |     } | ||||||
|     widget()->setEnabled(true); |     widget()->setEnabled(true); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_data) { | ||||||
|  |     model->beginResetModel(); | ||||||
|  | 
 | ||||||
|  |     if (replace_vertex_data) { | ||||||
|  |         if (vertex_data) { | ||||||
|  |             memcpy(&input_vertex, vertex_data, sizeof(input_vertex)); | ||||||
|  |             for (unsigned attr = 0; attr < 16; ++attr) { | ||||||
|  |                 for (unsigned comp = 0; comp < 4; ++comp) { | ||||||
|  |                     input_data[4 * attr + comp]->setText(QString("%1").arg(input_vertex.attr[attr][comp].ToFloat32())); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             breakpoint_warning->hide(); | ||||||
|  |         } else { | ||||||
|  |             for (unsigned attr = 0; attr < 16; ++attr) { | ||||||
|  |                 for (unsigned comp = 0; comp < 4; ++comp) { | ||||||
|  |                     input_data[4 * attr + comp]->setText(QString("???")); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             breakpoint_warning->show(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Reload shader code
 | ||||||
|  |     info.Clear(); | ||||||
|  | 
 | ||||||
|  |     auto& shader_setup = Pica::g_state.vs; | ||||||
|  |     auto& shader_config = Pica::g_state.regs.vs; | ||||||
|  |     for (auto instr : shader_setup.program_code) | ||||||
|  |         info.code.push_back({instr}); | ||||||
|  | 
 | ||||||
|  |     for (auto pattern : shader_setup.swizzle_data) | ||||||
|  |         info.swizzle_info.push_back({pattern}); | ||||||
|  | 
 | ||||||
|  |     u32 entry_point = Pica::g_state.regs.vs.main_offset; | ||||||
|  |     info.labels.insert({ entry_point, "main" }); | ||||||
|  | 
 | ||||||
|  |     // Generate debug information
 | ||||||
|  |     debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, 1, shader_config, shader_setup); | ||||||
|  | 
 | ||||||
|  |     // Reload widget state
 | ||||||
|  | 
 | ||||||
|  |     // Only show input attributes which are used as input to the shader
 | ||||||
|  |     for (unsigned int attr = 0; attr < 16; ++attr) { | ||||||
|  |         input_data_container[attr]->setVisible(false); | ||||||
|  |     } | ||||||
|  |     for (unsigned int attr = 0; attr < Pica::g_state.regs.vertex_attributes.GetNumTotalAttributes(); ++attr) { | ||||||
|  |         unsigned source_attr = shader_config.input_register_map.GetRegisterForAttribute(attr); | ||||||
|  |         input_data_mapping[source_attr]->setText(QString("-> v%1").arg(attr)); | ||||||
|  |         input_data_container[source_attr]->setVisible(true); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Initialize debug info text for current cycle count
 | ||||||
|  |     cycle_index->setMaximum(debug_data.records.size() - 1); | ||||||
|  |     OnCycleIndexChanged(cycle_index->value()); | ||||||
|  | 
 | ||||||
|  |     model->endResetModel(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void GraphicsVertexShaderWidget::OnResumed() { | void GraphicsVertexShaderWidget::OnResumed() { | ||||||
|     widget()->setEnabled(false); |     widget()->setEnabled(false); | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | void GraphicsVertexShaderWidget::OnInputAttributeChanged(int index) { | ||||||
|  |     float value = input_data[index]->text().toFloat(); | ||||||
|  |     Reload(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void GraphicsVertexShaderWidget::OnCycleIndexChanged(int index) { | ||||||
|  |     QString text; | ||||||
|  | 
 | ||||||
|  |     auto& record = debug_data.records[index]; | ||||||
|  |     if (record.mask & Pica::Shader::DebugDataRecord::SRC1) | ||||||
|  |         text += tr("SRC1: %1, %2, %3, %4\n").arg(record.src1.x.ToFloat32()).arg(record.src1.y.ToFloat32()).arg(record.src1.z.ToFloat32()).arg(record.src1.w.ToFloat32()); | ||||||
|  |     if (record.mask & Pica::Shader::DebugDataRecord::SRC2) | ||||||
|  |         text += tr("SRC2: %1, %2, %3, %4\n").arg(record.src2.x.ToFloat32()).arg(record.src2.y.ToFloat32()).arg(record.src2.z.ToFloat32()).arg(record.src2.w.ToFloat32()); | ||||||
|  |     if (record.mask & Pica::Shader::DebugDataRecord::SRC3) | ||||||
|  |         text += tr("SRC3: %1, %2, %3, %4\n").arg(record.src3.x.ToFloat32()).arg(record.src3.y.ToFloat32()).arg(record.src3.z.ToFloat32()).arg(record.src3.w.ToFloat32()); | ||||||
|  |     if (record.mask & Pica::Shader::DebugDataRecord::DEST_IN) | ||||||
|  |         text += tr("DEST_IN: %1, %2, %3, %4\n").arg(record.dest_in.x.ToFloat32()).arg(record.dest_in.y.ToFloat32()).arg(record.dest_in.z.ToFloat32()).arg(record.dest_in.w.ToFloat32()); | ||||||
|  |     if (record.mask & Pica::Shader::DebugDataRecord::DEST_OUT) | ||||||
|  |         text += tr("DEST_OUT: %1, %2, %3, %4\n").arg(record.dest_out.x.ToFloat32()).arg(record.dest_out.y.ToFloat32()).arg(record.dest_out.z.ToFloat32()).arg(record.dest_out.w.ToFloat32()); | ||||||
|  | 
 | ||||||
|  |     if (record.mask & Pica::Shader::DebugDataRecord::ADDR_REG_OUT) | ||||||
|  |         text += tr("Addres Registers: %1, %2\n").arg(record.address_registers[0]).arg(record.address_registers[1]); | ||||||
|  |     if (record.mask & Pica::Shader::DebugDataRecord::CMP_RESULT) | ||||||
|  |         text += tr("Compare Result: %1, %2\n").arg(record.conditional_code[0] ? "true" : "false").arg(record.conditional_code[1] ? "true" : "false"); | ||||||
|  | 
 | ||||||
|  |     if (record.mask & Pica::Shader::DebugDataRecord::COND_BOOL_IN) | ||||||
|  |         text += tr("Static Condition: %1\n").arg(record.cond_bool ? "true" : "false"); | ||||||
|  |     if (record.mask & Pica::Shader::DebugDataRecord::COND_CMP_IN) | ||||||
|  |         text += tr("Dynamic Conditions: %1, %2\n").arg(record.cond_cmp[0] ? "true" : "false").arg(record.cond_cmp[1] ? "true" : "false"); | ||||||
|  |     if (record.mask & Pica::Shader::DebugDataRecord::LOOP_INT_IN) | ||||||
|  |         text += tr("Loop Parameters: %1 (repeats), %2 (initializer), %3 (increment), %4\n").arg(record.loop_int.x).arg(record.loop_int.y).arg(record.loop_int.z).arg(record.loop_int.w); | ||||||
|  | 
 | ||||||
|  |     text += tr("Instruction offset: 0x%1").arg(4 * record.instruction_offset, 4, 16, QLatin1Char('0')); | ||||||
|  |     if (record.mask & Pica::Shader::DebugDataRecord::NEXT_INSTR) { | ||||||
|  |         text += tr(" -> 0x%2").arg(4 * record.next_instruction, 4, 16, QLatin1Char('0')); | ||||||
|  |     } else { | ||||||
|  |         text += tr(" (last instruction)"); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     instruction_description->setText(text); | ||||||
|  | 
 | ||||||
|  |     // Scroll to current instruction
 | ||||||
|  |     const QModelIndex& instr_index = model->index(record.instruction_offset, 0); | ||||||
|  |     emit SelectCommand(instr_index, QItemSelectionModel::ClearAndSelect | QItemSelectionModel::Rows); | ||||||
|  |     binary_list->scrollTo(instr_index, QAbstractItemView::EnsureVisible); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -10,11 +10,18 @@ | ||||||
| 
 | 
 | ||||||
| #include "nihstro/parser_shbin.h" | #include "nihstro/parser_shbin.h" | ||||||
| 
 | 
 | ||||||
|  | #include "video_core/shader/shader.h" | ||||||
|  | 
 | ||||||
|  | class QLabel; | ||||||
|  | class QSpinBox; | ||||||
|  | 
 | ||||||
|  | class GraphicsVertexShaderWidget; | ||||||
|  | 
 | ||||||
| class GraphicsVertexShaderModel : public QAbstractItemModel { | class GraphicsVertexShaderModel : public QAbstractItemModel { | ||||||
|     Q_OBJECT |     Q_OBJECT | ||||||
| 
 | 
 | ||||||
| public: | public: | ||||||
|     GraphicsVertexShaderModel(QObject* parent); |     GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent); | ||||||
| 
 | 
 | ||||||
|     QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override; |     QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override; | ||||||
|     QModelIndex parent(const QModelIndex& child) const override; |     QModelIndex parent(const QModelIndex& child) const override; | ||||||
|  | @ -23,11 +30,10 @@ public: | ||||||
|     QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override; |     QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override; | ||||||
|     QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override; |     QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override; | ||||||
| 
 | 
 | ||||||
| public slots: |  | ||||||
|     void OnUpdate(); |  | ||||||
| 
 |  | ||||||
| private: | private: | ||||||
|     nihstro::ShaderInfo info; |     GraphicsVertexShaderWidget* par; | ||||||
|  | 
 | ||||||
|  |     friend class GraphicsVertexShaderWidget; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class GraphicsVertexShaderWidget : public BreakPointObserverDock { | class GraphicsVertexShaderWidget : public BreakPointObserverDock { | ||||||
|  | @ -43,9 +49,42 @@ private slots: | ||||||
|     void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; |     void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; | ||||||
|     void OnResumed() override; |     void OnResumed() override; | ||||||
| 
 | 
 | ||||||
|  |     void OnInputAttributeChanged(int index); | ||||||
|  | 
 | ||||||
|  |     void OnCycleIndexChanged(int index); | ||||||
|  | 
 | ||||||
|  |     void DumpShader(); | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Reload widget based on the current PICA200 state | ||||||
|  |      * @param replace_vertex_data If true, invalidate all current vertex data | ||||||
|  |      * @param vertex_data New vertex data to use, as passed to OnBreakPointHit. May be nullptr to specify that no valid vertex data can be retrieved currently. Only used if replace_vertex_data is true. | ||||||
|  |      */ | ||||||
|  |     void Reload(bool replace_vertex_data = false, void* vertex_data = nullptr); | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| signals: | signals: | ||||||
|     void Update(); |     // Call this to change the current command selection in the disassembly view
 | ||||||
|  |     void SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags); | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|  |     QLabel* instruction_description; | ||||||
|  |     QTreeView* binary_list; | ||||||
|  |     GraphicsVertexShaderModel* model; | ||||||
| 
 | 
 | ||||||
|  |     /// TODO: Move these into a single struct
 | ||||||
|  |     std::array<QLineEdit*, 4*16> input_data;  // A text box for each of the 4 components of up to 16 vertex attributes
 | ||||||
|  |     std::array<QWidget*, 16> input_data_container; // QWidget containing the QLayout containing each vertex attribute
 | ||||||
|  |     std::array<QLabel*, 16> input_data_mapping; // A QLabel denoting the shader input attribute which the vertex attribute maps to
 | ||||||
|  | 
 | ||||||
|  |     // Text to be shown when input vertex data is not retrievable
 | ||||||
|  |     QLabel* breakpoint_warning; | ||||||
|  | 
 | ||||||
|  |     QSpinBox* cycle_index; | ||||||
|  | 
 | ||||||
|  |     nihstro::ShaderInfo info; | ||||||
|  |     Pica::Shader::DebugData<true> debug_data; | ||||||
|  |     Pica::Shader::InputVertex input_vertex; | ||||||
|  | 
 | ||||||
|  |     friend class GraphicsVertexShaderModel; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -215,7 +215,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||||
|             unsigned int vertex_cache_pos = 0; |             unsigned int vertex_cache_pos = 0; | ||||||
|             vertex_cache_ids.fill(-1); |             vertex_cache_ids.fill(-1); | ||||||
| 
 | 
 | ||||||
|             Shader::UnitState shader_unit; |             Shader::UnitState<false> shader_unit; | ||||||
|             Shader::Setup(shader_unit); |             Shader::Setup(shader_unit); | ||||||
| 
 | 
 | ||||||
|             for (unsigned int index = 0; index < regs.num_vertices; ++index) |             for (unsigned int index = 0; index < regs.num_vertices; ++index) | ||||||
|  |  | ||||||
|  | @ -14,6 +14,7 @@ | ||||||
| #include <png.h> | #include <png.h> | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | #include <nihstro/float24.h> | ||||||
| #include <nihstro/shader_binary.h> | #include <nihstro/shader_binary.h> | ||||||
| 
 | 
 | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
|  | @ -110,8 +111,7 @@ void GeometryDumper::Dump() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) | ||||||
|                 u32 main_offset, const Regs::VSOutputAttributes* output_attributes) |  | ||||||
| { | { | ||||||
|     struct StuffToWrite { |     struct StuffToWrite { | ||||||
|         u8* pointer; |         u8* pointer; | ||||||
|  | @ -132,10 +132,13 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | ||||||
|     union OutputRegisterInfo { |     union OutputRegisterInfo { | ||||||
|         enum Type : u64 { |         enum Type : u64 { | ||||||
|             POSITION   = 0, |             POSITION   = 0, | ||||||
|  |             QUATERNION = 1, | ||||||
|             COLOR      = 2, |             COLOR      = 2, | ||||||
|             TEXCOORD0  = 3, |             TEXCOORD0  = 3, | ||||||
|             TEXCOORD1  = 5, |             TEXCOORD1  = 5, | ||||||
|             TEXCOORD2  = 6, |             TEXCOORD2  = 6, | ||||||
|  | 
 | ||||||
|  |             VIEW       = 8, | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
|         BitField< 0, 64, u64> hex; |         BitField< 0, 64, u64> hex; | ||||||
|  | @ -157,6 +160,10 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | ||||||
|                 { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, |                 { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, | ||||||
|                 { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, |                 { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, | ||||||
|                 { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, |                 { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, | ||||||
|  |                 { OutputAttributes::QUATERNION_X, { OutputRegisterInfo::QUATERNION, 1} }, | ||||||
|  |                 { OutputAttributes::QUATERNION_Y, { OutputRegisterInfo::QUATERNION, 2} }, | ||||||
|  |                 { OutputAttributes::QUATERNION_Z, { OutputRegisterInfo::QUATERNION, 4} }, | ||||||
|  |                 { OutputAttributes::QUATERNION_W, { OutputRegisterInfo::QUATERNION, 8} }, | ||||||
|                 { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, |                 { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, | ||||||
|                 { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, |                 { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, | ||||||
|                 { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, |                 { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, | ||||||
|  | @ -166,7 +173,10 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | ||||||
|                 { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, |                 { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, | ||||||
|                 { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, |                 { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, | ||||||
|                 { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, |                 { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, | ||||||
|                 { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} } |                 { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} }, | ||||||
|  |                 { OutputAttributes::VIEW_X, { OutputRegisterInfo::VIEW, 1} }, | ||||||
|  |                 { OutputAttributes::VIEW_Y, { OutputRegisterInfo::VIEW, 2} }, | ||||||
|  |                 { OutputAttributes::VIEW_Z, { OutputRegisterInfo::VIEW, 4} } | ||||||
|             }; |             }; | ||||||
| 
 | 
 | ||||||
|             for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ |             for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ | ||||||
|  | @ -221,28 +231,69 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | ||||||
| 
 | 
 | ||||||
|     // TODO: Reduce the amount of binary code written to relevant portions
 |     // TODO: Reduce the amount of binary code written to relevant portions
 | ||||||
|     dvlp.binary_offset = write_offset - dvlp_offset; |     dvlp.binary_offset = write_offset - dvlp_offset; | ||||||
|     dvlp.binary_size_words = binary_size; |     dvlp.binary_size_words = setup.program_code.size(); | ||||||
|     QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); |     QueueForWriting((u8*)setup.program_code.data(), setup.program_code.size() * sizeof(u32)); | ||||||
| 
 | 
 | ||||||
|     dvlp.swizzle_info_offset = write_offset - dvlp_offset; |     dvlp.swizzle_info_offset = write_offset - dvlp_offset; | ||||||
|     dvlp.swizzle_info_num_entries = swizzle_size; |     dvlp.swizzle_info_num_entries = setup.swizzle_data.size(); | ||||||
|     u32 dummy = 0; |     u32 dummy = 0; | ||||||
|     for (unsigned int i = 0; i < swizzle_size; ++i) { |     for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { | ||||||
|         QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); |         QueueForWriting((u8*)&setup.swizzle_data[i], sizeof(setup.swizzle_data[i])); | ||||||
|         QueueForWriting((u8*)&dummy, sizeof(dummy)); |         QueueForWriting((u8*)&dummy, sizeof(dummy)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     dvle.main_offset_words = main_offset; |     dvle.main_offset_words = config.main_offset; | ||||||
|     dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; |     dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; | ||||||
|     dvle.output_register_table_size = static_cast<u32>(output_info_table.size()); |     dvle.output_register_table_size = static_cast<u32>(output_info_table.size()); | ||||||
|     QueueForWriting((u8*)output_info_table.data(), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); |     QueueForWriting((u8*)output_info_table.data(), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); | ||||||
| 
 | 
 | ||||||
|     // TODO: Create a label table for "main"
 |     // TODO: Create a label table for "main"
 | ||||||
| 
 | 
 | ||||||
|  |     std::vector<nihstro::ConstantInfo> constant_table; | ||||||
|  |     for (unsigned i = 0; i < setup.uniforms.b.size(); ++i) { | ||||||
|  |         nihstro::ConstantInfo constant; | ||||||
|  |         memset(&constant, 0, sizeof(constant)); | ||||||
|  |         constant.type = nihstro::ConstantInfo::Bool; | ||||||
|  |         constant.regid = i; | ||||||
|  |         constant.b = setup.uniforms.b[i]; | ||||||
|  |         constant_table.emplace_back(constant); | ||||||
|  |     } | ||||||
|  |     for (unsigned i = 0; i < setup.uniforms.i.size(); ++i) { | ||||||
|  |         nihstro::ConstantInfo constant; | ||||||
|  |         memset(&constant, 0, sizeof(constant)); | ||||||
|  |         constant.type = nihstro::ConstantInfo::Int; | ||||||
|  |         constant.regid = i; | ||||||
|  |         constant.i.x = setup.uniforms.i[i].x; | ||||||
|  |         constant.i.y = setup.uniforms.i[i].y; | ||||||
|  |         constant.i.z = setup.uniforms.i[i].z; | ||||||
|  |         constant.i.w = setup.uniforms.i[i].w; | ||||||
|  |         constant_table.emplace_back(constant); | ||||||
|  |     } | ||||||
|  |     for (unsigned i = 0; i < sizeof(setup.uniforms.f) / sizeof(setup.uniforms.f[0]); ++i) { | ||||||
|  |         nihstro::ConstantInfo constant; | ||||||
|  |         memset(&constant, 0, sizeof(constant)); | ||||||
|  |         constant.type = nihstro::ConstantInfo::Float; | ||||||
|  |         constant.regid = i; | ||||||
|  |         constant.f.x = nihstro::to_float24(setup.uniforms.f[i].x.ToFloat32()); | ||||||
|  |         constant.f.y = nihstro::to_float24(setup.uniforms.f[i].y.ToFloat32()); | ||||||
|  |         constant.f.z = nihstro::to_float24(setup.uniforms.f[i].z.ToFloat32()); | ||||||
|  |         constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32()); | ||||||
|  | 
 | ||||||
|  |         // Store constant if it's different from zero..
 | ||||||
|  |         if (setup.uniforms.f[i].x.ToFloat32() != 0.0 || | ||||||
|  |             setup.uniforms.f[i].y.ToFloat32() != 0.0 || | ||||||
|  |             setup.uniforms.f[i].z.ToFloat32() != 0.0 || | ||||||
|  |             setup.uniforms.f[i].w.ToFloat32() != 0.0) | ||||||
|  |             constant_table.emplace_back(constant); | ||||||
|  |     } | ||||||
|  |     dvle.constant_table_offset = write_offset - dvlb.dvle_offset; | ||||||
|  |     dvle.constant_table_size = constant_table.size(); | ||||||
|  |     for (const auto& constant : constant_table) { | ||||||
|  |         QueueForWriting((uint8_t*)&constant, sizeof(constant)); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     // Write data to file
 |     // Write data to file
 | ||||||
|     static int dump_index = 0; |     static int dump_index = 0; | ||||||
|     std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin"); |  | ||||||
|     std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); |     std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); | ||||||
| 
 | 
 | ||||||
|     for (auto& chunk : writing_queue) { |     for (auto& chunk : writing_queue) { | ||||||
|  |  | ||||||
|  | @ -158,7 +158,6 @@ extern std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this g | ||||||
| namespace DebugUtils { | namespace DebugUtils { | ||||||
| 
 | 
 | ||||||
| #define PICA_DUMP_GEOMETRY 0 | #define PICA_DUMP_GEOMETRY 0 | ||||||
| #define PICA_DUMP_SHADERS 0 |  | ||||||
| #define PICA_DUMP_TEXTURES 0 | #define PICA_DUMP_TEXTURES 0 | ||||||
| #define PICA_LOG_TEV 0 | #define PICA_LOG_TEV 0 | ||||||
| 
 | 
 | ||||||
|  | @ -182,8 +181,8 @@ private: | ||||||
|     std::vector<Face> faces; |     std::vector<Face> faces; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, | void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, | ||||||
|                 u32 main_offset, const Regs::VSOutputAttributes* output_attributes); |                 const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| // Utility class to log Pica commands.
 | // Utility class to log Pica commands.
 | ||||||
|  |  | ||||||
|  | @ -80,6 +80,11 @@ struct Regs { | ||||||
|             POSITION_Z   =  2, |             POSITION_Z   =  2, | ||||||
|             POSITION_W   =  3, |             POSITION_W   =  3, | ||||||
| 
 | 
 | ||||||
|  |             QUATERNION_X =  4, | ||||||
|  |             QUATERNION_Y =  5, | ||||||
|  |             QUATERNION_Z =  6, | ||||||
|  |             QUATERNION_W =  7, | ||||||
|  | 
 | ||||||
|             COLOR_R      =  8, |             COLOR_R      =  8, | ||||||
|             COLOR_G      =  9, |             COLOR_G      =  9, | ||||||
|             COLOR_B      = 10, |             COLOR_B      = 10, | ||||||
|  | @ -89,6 +94,12 @@ struct Regs { | ||||||
|             TEXCOORD0_V  = 13, |             TEXCOORD0_V  = 13, | ||||||
|             TEXCOORD1_U  = 14, |             TEXCOORD1_U  = 14, | ||||||
|             TEXCOORD1_V  = 15, |             TEXCOORD1_V  = 15, | ||||||
|  | 
 | ||||||
|  |             // TODO: Not verified
 | ||||||
|  |             VIEW_X       = 18, | ||||||
|  |             VIEW_Y       = 19, | ||||||
|  |             VIEW_Z       = 20, | ||||||
|  | 
 | ||||||
|             TEXCOORD2_U  = 22, |             TEXCOORD2_U  = 22, | ||||||
|             TEXCOORD2_V  = 23, |             TEXCOORD2_V  = 23, | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -5,6 +5,8 @@ | ||||||
| #include <memory> | #include <memory> | ||||||
| #include <unordered_map> | #include <unordered_map> | ||||||
| 
 | 
 | ||||||
|  | #include <boost/range/algorithm/fill.hpp> | ||||||
|  | 
 | ||||||
| #include "common/hash.h" | #include "common/hash.h" | ||||||
| #include "common/make_unique.h" | #include "common/make_unique.h" | ||||||
| #include "common/profiler.h" | #include "common/profiler.h" | ||||||
|  | @ -30,7 +32,7 @@ static JitCompiler jit; | ||||||
| static CompiledShader* jit_shader; | static CompiledShader* jit_shader; | ||||||
| #endif // ARCHITECTURE_x86_64
 | #endif // ARCHITECTURE_x86_64
 | ||||||
| 
 | 
 | ||||||
| void Setup(UnitState& state) { | void Setup(UnitState<false>& state) { | ||||||
| #ifdef ARCHITECTURE_x86_64 | #ifdef ARCHITECTURE_x86_64 | ||||||
|     if (VideoCore::g_shader_jit_enabled) { |     if (VideoCore::g_shader_jit_enabled) { | ||||||
|         u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ |         u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | ||||||
|  | @ -54,9 +56,8 @@ void Shutdown() { | ||||||
| 
 | 
 | ||||||
| static Common::Profiling::TimingCategory shader_category("Vertex Shader"); | static Common::Profiling::TimingCategory shader_category("Vertex Shader"); | ||||||
| 
 | 
 | ||||||
| OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { | ||||||
|     auto& config = g_state.regs.vs; |     auto& config = g_state.regs.vs; | ||||||
|     auto& setup = g_state.vs; |  | ||||||
| 
 | 
 | ||||||
|     Common::Profiling::ScopeTimer timer(shader_category); |     Common::Profiling::ScopeTimer timer(shader_category); | ||||||
| 
 | 
 | ||||||
|  | @ -67,6 +68,8 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | ||||||
|     // Setup input register table
 |     // Setup input register table
 | ||||||
|     const auto& attribute_register_map = config.input_register_map; |     const auto& attribute_register_map = config.input_register_map; | ||||||
| 
 | 
 | ||||||
|  |     // TODO: Instead of this cumbersome logic, just load the input data directly like
 | ||||||
|  |     // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; }
 | ||||||
|     if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; |     if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; | ||||||
|     if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; |     if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; | ||||||
|     if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; |     if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; | ||||||
|  | @ -96,12 +99,6 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | ||||||
|     RunInterpreter(state); |     RunInterpreter(state); | ||||||
| #endif // ARCHITECTURE_x86_64
 | #endif // ARCHITECTURE_x86_64
 | ||||||
| 
 | 
 | ||||||
| #if PICA_DUMP_SHADERS |  | ||||||
|     DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(), |  | ||||||
|         state.debug.max_opdesc_id, config.main_offset, |  | ||||||
|         g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here
 |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
|     // Setup output data
 |     // Setup output data
 | ||||||
|     OutputVertex ret; |     OutputVertex ret; | ||||||
|     // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
 |     // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
 | ||||||
|  | @ -132,14 +129,52 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | ||||||
|             std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); |             std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |     LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | ||||||
|         ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |         ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||||||
|  |         ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | ||||||
|         ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |         ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | ||||||
|         ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); |         ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); | ||||||
| 
 | 
 | ||||||
|     return ret; |     return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) { | ||||||
|  |     UnitState<true> state; | ||||||
|  | 
 | ||||||
|  |     const auto& shader_memory = setup.program_code; | ||||||
|  |     state.program_counter = config.main_offset; | ||||||
|  |     state.debug.max_offset = 0; | ||||||
|  |     state.debug.max_opdesc_id = 0; | ||||||
|  | 
 | ||||||
|  |     // Setup input register table
 | ||||||
|  |     const auto& attribute_register_map = config.input_register_map; | ||||||
|  |     float24 dummy_register; | ||||||
|  |     boost::fill(state.registers.input, &dummy_register); | ||||||
|  | 
 | ||||||
|  |     if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x; | ||||||
|  |     if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x; | ||||||
|  |     if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x; | ||||||
|  |     if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x; | ||||||
|  |     if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x; | ||||||
|  |     if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x; | ||||||
|  |     if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x; | ||||||
|  |     if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x; | ||||||
|  |     if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x; | ||||||
|  |     if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x; | ||||||
|  |     if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x; | ||||||
|  |     if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x; | ||||||
|  |     if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x; | ||||||
|  |     if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x; | ||||||
|  |     if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x; | ||||||
|  |     if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x; | ||||||
|  | 
 | ||||||
|  |     state.conditional_code[0] = false; | ||||||
|  |     state.conditional_code[1] = false; | ||||||
|  | 
 | ||||||
|  |     RunInterpreter(state); | ||||||
|  |     return state.debug; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Shader
 | } // namespace Shader
 | ||||||
| 
 | 
 | ||||||
| } // namespace Pica
 | } // namespace Pica
 | ||||||
|  |  | ||||||
|  | @ -4,7 +4,10 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include <vector> | ||||||
|  | 
 | ||||||
| #include <boost/container/static_vector.hpp> | #include <boost/container/static_vector.hpp> | ||||||
|  | 
 | ||||||
| #include <nihstro/shader_binary.h> | #include <nihstro/shader_binary.h> | ||||||
| 
 | 
 | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
|  | @ -72,12 +75,185 @@ struct OutputVertex { | ||||||
| static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | ||||||
| static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | // Helper structure used to keep track of data useful for inspection of shader emulation
 | ||||||
|  | template<bool full_debugging> | ||||||
|  | struct DebugData; | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | struct DebugData<false> { | ||||||
|  |     // TODO: Hide these behind and interface and move them to DebugData<true>
 | ||||||
|  |     u32 max_offset; // maximum program counter ever reached
 | ||||||
|  |     u32 max_opdesc_id; // maximum swizzle pattern index ever used
 | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | struct DebugData<true> { | ||||||
|  |     // Records store the input and output operands of a particular instruction.
 | ||||||
|  |     struct Record { | ||||||
|  |         enum Type { | ||||||
|  |             // Floating point arithmetic operands
 | ||||||
|  |             SRC1         = 0x1, | ||||||
|  |             SRC2         = 0x2, | ||||||
|  |             SRC3         = 0x4, | ||||||
|  | 
 | ||||||
|  |             // Initial and final output operand value
 | ||||||
|  |             DEST_IN      = 0x8, | ||||||
|  |             DEST_OUT     = 0x10, | ||||||
|  | 
 | ||||||
|  |             // Current and next instruction offset (in words)
 | ||||||
|  |             CUR_INSTR    = 0x20, | ||||||
|  |             NEXT_INSTR   = 0x40, | ||||||
|  | 
 | ||||||
|  |             // Output address register value
 | ||||||
|  |             ADDR_REG_OUT = 0x80, | ||||||
|  | 
 | ||||||
|  |             // Result of a comparison instruction
 | ||||||
|  |             CMP_RESULT   = 0x100, | ||||||
|  | 
 | ||||||
|  |             // Input values for conditional flow control instructions
 | ||||||
|  |             COND_BOOL_IN = 0x200, | ||||||
|  |             COND_CMP_IN  = 0x400, | ||||||
|  | 
 | ||||||
|  |             // Input values for a loop
 | ||||||
|  |             LOOP_INT_IN  = 0x800, | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         Math::Vec4<float24> src1; | ||||||
|  |         Math::Vec4<float24> src2; | ||||||
|  |         Math::Vec4<float24> src3; | ||||||
|  | 
 | ||||||
|  |         Math::Vec4<float24> dest_in; | ||||||
|  |         Math::Vec4<float24> dest_out; | ||||||
|  | 
 | ||||||
|  |         s32 address_registers[2]; | ||||||
|  |         bool conditional_code[2]; | ||||||
|  |         bool cond_bool; | ||||||
|  |         bool cond_cmp[2]; | ||||||
|  |         Math::Vec4<u8> loop_int; | ||||||
|  | 
 | ||||||
|  |         u32 instruction_offset; | ||||||
|  |         u32 next_instruction; | ||||||
|  | 
 | ||||||
|  |         // set of enabled fields (as a combination of Type flags)
 | ||||||
|  |         unsigned mask = 0; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     u32 max_offset; // maximum program counter ever reached
 | ||||||
|  |     u32 max_opdesc_id; // maximum swizzle pattern index ever used
 | ||||||
|  | 
 | ||||||
|  |     // List of records for each executed shader instruction
 | ||||||
|  |     std::vector<DebugData<true>::Record> records; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | // Type alias for better readability
 | ||||||
|  | using DebugDataRecord = DebugData<true>::Record; | ||||||
|  | 
 | ||||||
|  | // Helper function to set a DebugData<true>::Record field based on the template enum parameter.
 | ||||||
|  | template<DebugDataRecord::Type type, typename ValueType> | ||||||
|  | inline void SetField(DebugDataRecord& record, ValueType value); | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { | ||||||
|  |     record.src1.x = value[0]; | ||||||
|  |     record.src1.y = value[1]; | ||||||
|  |     record.src1.z = value[2]; | ||||||
|  |     record.src1.w = value[3]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { | ||||||
|  |     record.src2.x = value[0]; | ||||||
|  |     record.src2.y = value[1]; | ||||||
|  |     record.src2.z = value[2]; | ||||||
|  |     record.src2.w = value[3]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { | ||||||
|  |     record.src3.x = value[0]; | ||||||
|  |     record.src3.y = value[1]; | ||||||
|  |     record.src3.z = value[2]; | ||||||
|  |     record.src3.w = value[3]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { | ||||||
|  |     record.dest_in.x = value[0]; | ||||||
|  |     record.dest_in.y = value[1]; | ||||||
|  |     record.dest_in.z = value[2]; | ||||||
|  |     record.dest_in.w = value[3]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { | ||||||
|  |     record.dest_out.x = value[0]; | ||||||
|  |     record.dest_out.y = value[1]; | ||||||
|  |     record.dest_out.z = value[2]; | ||||||
|  |     record.dest_out.w = value[3]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { | ||||||
|  |     record.address_registers[0] = value[0]; | ||||||
|  |     record.address_registers[1] = value[1]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { | ||||||
|  |     record.conditional_code[0] = value[0]; | ||||||
|  |     record.conditional_code[1] = value[1]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { | ||||||
|  |     record.cond_bool = value; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { | ||||||
|  |     record.cond_cmp[0] = value[0]; | ||||||
|  |     record.cond_cmp[1] = value[1]; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { | ||||||
|  |     record.loop_int = value; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { | ||||||
|  |     record.instruction_offset = value; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<> | ||||||
|  | inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { | ||||||
|  |     record.next_instruction = value; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Helper function to set debug information on the current shader iteration.
 | ||||||
|  | template<DebugDataRecord::Type type, typename ValueType> | ||||||
|  | inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { | ||||||
|  |     // Debugging disabled => nothing to do
 | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<DebugDataRecord::Type type, typename ValueType> | ||||||
|  | inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { | ||||||
|  |     if (offset >= debug_data.records.size()) | ||||||
|  |         debug_data.records.resize(offset + 1); | ||||||
|  | 
 | ||||||
|  |    SetField<type, ValueType>(debug_data.records[offset], value); | ||||||
|  |    debug_data.records[offset].mask |= type; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
|  * This structure contains the state information that needs to be unique for a shader unit. The 3DS |  * This structure contains the state information that needs to be unique for a shader unit. The 3DS | ||||||
|  * has four shader units that process shaders in parallel. At the present, Citra only implements a |  * has four shader units that process shaders in parallel. At the present, Citra only implements a | ||||||
|  * single shader unit that processes all shaders serially. Putting the state information in a struct |  * single shader unit that processes all shaders serially. Putting the state information in a struct | ||||||
|  * here will make it easier for us to parallelize the shader processing later. |  * here will make it easier for us to parallelize the shader processing later. | ||||||
|  */ |  */ | ||||||
|  | template<bool Debug> | ||||||
| struct UnitState { | struct UnitState { | ||||||
|     struct Registers { |     struct Registers { | ||||||
|         // The registers are accessed by the shader JIT using SSE instructions, and are therefore
 |         // The registers are accessed by the shader JIT using SSE instructions, and are therefore
 | ||||||
|  | @ -111,10 +287,7 @@ struct UnitState { | ||||||
|     // TODO: Is there a maximal size for this?
 |     // TODO: Is there a maximal size for this?
 | ||||||
|     boost::container::static_vector<CallStackElement, 16> call_stack; |     boost::container::static_vector<CallStackElement, 16> call_stack; | ||||||
| 
 | 
 | ||||||
|     struct { |     DebugData<Debug> debug; | ||||||
|         u32 max_offset; // maximum program counter ever reached
 |  | ||||||
|         u32 max_opdesc_id; // maximum swizzle pattern index ever used
 |  | ||||||
|     } debug; |  | ||||||
| 
 | 
 | ||||||
|     static int InputOffset(const SourceRegister& reg) { |     static int InputOffset(const SourceRegister& reg) { | ||||||
|         switch (reg.GetRegisterType()) { |         switch (reg.GetRegisterType()) { | ||||||
|  | @ -150,7 +323,7 @@ struct UnitState { | ||||||
|  * vertex, which would happen within the `Run` function). |  * vertex, which would happen within the `Run` function). | ||||||
|  * @param state Shader unit state, must be setup per shader and per shader unit |  * @param state Shader unit state, must be setup per shader and per shader unit | ||||||
|  */ |  */ | ||||||
| void Setup(UnitState& state); | void Setup(UnitState<false>& state); | ||||||
| 
 | 
 | ||||||
| /// Performs any cleanup when the emulator is shutdown
 | /// Performs any cleanup when the emulator is shutdown
 | ||||||
| void Shutdown(); | void Shutdown(); | ||||||
|  | @ -162,7 +335,17 @@ void Shutdown(); | ||||||
|  * @param num_attributes The number of vertex shader attributes |  * @param num_attributes The number of vertex shader attributes | ||||||
|  * @return The output vertex, after having been processed by the vertex shader |  * @return The output vertex, after having been processed by the vertex shader | ||||||
|  */ |  */ | ||||||
| OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); | OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes); | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Produce debug information based on the given shader and input vertex | ||||||
|  |  * @param input Input vertex into the shader | ||||||
|  |  * @param num_attributes The number of vertex shader attributes | ||||||
|  |  * @param config Configuration object for the shader pipeline | ||||||
|  |  * @param setup Setup object for the shader pipeline | ||||||
|  |  * @return Debug information for this shader with regards to the given vertex | ||||||
|  |  */ | ||||||
|  | DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup); | ||||||
| 
 | 
 | ||||||
| } // namespace Shader
 | } // namespace Shader
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -21,7 +21,8 @@ namespace Pica { | ||||||
| 
 | 
 | ||||||
| namespace Shader { | namespace Shader { | ||||||
| 
 | 
 | ||||||
| void RunInterpreter(UnitState& state) { | template<bool Debug> | ||||||
|  | void RunInterpreter(UnitState<Debug>& state) { | ||||||
|     const auto& uniforms = g_state.vs.uniforms; |     const auto& uniforms = g_state.vs.uniforms; | ||||||
|     const auto& swizzle_data = g_state.vs.swizzle_data; |     const auto& swizzle_data = g_state.vs.swizzle_data; | ||||||
|     const auto& program_code = g_state.vs.program_code; |     const auto& program_code = g_state.vs.program_code; | ||||||
|  | @ -29,7 +30,9 @@ void RunInterpreter(UnitState& state) { | ||||||
|     // Placeholder for invalid inputs
 |     // Placeholder for invalid inputs
 | ||||||
|     static float24 dummy_vec4_float24[4]; |     static float24 dummy_vec4_float24[4]; | ||||||
| 
 | 
 | ||||||
|     while (true) { |     unsigned iteration = 0; | ||||||
|  |     bool exit_loop = false; | ||||||
|  |     while (!exit_loop) { | ||||||
|         if (!state.call_stack.empty()) { |         if (!state.call_stack.empty()) { | ||||||
|             auto& top = state.call_stack.back(); |             auto& top = state.call_stack.back(); | ||||||
|             if (state.program_counter == top.final_address) { |             if (state.program_counter == top.final_address) { | ||||||
|  | @ -47,16 +50,19 @@ void RunInterpreter(UnitState& state) { | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         bool exit_loop = false; |  | ||||||
|         const Instruction instr = { program_code[state.program_counter] }; |         const Instruction instr = { program_code[state.program_counter] }; | ||||||
|         const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; |         const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; | ||||||
| 
 | 
 | ||||||
|         static auto call = [](UnitState& state, u32 offset, u32 num_instructions, |         static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, | ||||||
|                               u32 return_offset, u8 repeat_count, u8 loop_increment) { |                               u32 return_offset, u8 repeat_count, u8 loop_increment) { | ||||||
|             state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
 |             state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
 | ||||||
|             ASSERT(state.call_stack.size() < state.call_stack.capacity()); |             ASSERT(state.call_stack.size() < state.call_stack.capacity()); | ||||||
|             state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); |             state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | ||||||
|         }; |         }; | ||||||
|  |         Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); | ||||||
|  |         if (iteration > 0) | ||||||
|  |             Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); | ||||||
|  | 
 | ||||||
|         state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); |         state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); | ||||||
| 
 | 
 | ||||||
|         auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { |         auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | ||||||
|  | @ -123,58 +129,78 @@ void RunInterpreter(UnitState& state) { | ||||||
|             switch (instr.opcode.Value().EffectiveOpCode()) { |             switch (instr.opcode.Value().EffectiveOpCode()) { | ||||||
|             case OpCode::Id::ADD: |             case OpCode::Id::ADD: | ||||||
|             { |             { | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||||
|  |                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = src1[i] + src2[i]; |                     dest[i] = src1[i] + src2[i]; | ||||||
|                 } |                 } | ||||||
| 
 |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::MUL: |             case OpCode::Id::MUL: | ||||||
|             { |             { | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||||
|  |                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = src1[i] * src2[i]; |                     dest[i] = src1[i] * src2[i]; | ||||||
|                 } |                 } | ||||||
| 
 |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::FLR: |             case OpCode::Id::FLR: | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); |                     dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); | ||||||
|                 } |                 } | ||||||
|  |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::MAX: |             case OpCode::Id::MAX: | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||||
|  |                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = std::max(src1[i], src2[i]); |                     dest[i] = std::max(src1[i], src2[i]); | ||||||
|                 } |                 } | ||||||
|  |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::MIN: |             case OpCode::Id::MIN: | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||||
|  |                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = std::min(src1[i], src2[i]); |                     dest[i] = std::min(src1[i], src2[i]); | ||||||
|                 } |                 } | ||||||
|  |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::DP3: |             case OpCode::Id::DP3: | ||||||
|             case OpCode::Id::DP4: |             case OpCode::Id::DP4: | ||||||
|             { |             { | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||||
|  |                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||||
|                 float24 dot = float24::FromFloat32(0.f); |                 float24 dot = float24::FromFloat32(0.f); | ||||||
|                 int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; |                 int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; | ||||||
|                 for (int i = 0; i < num_components; ++i) |                 for (int i = 0; i < num_components; ++i) | ||||||
|  | @ -186,12 +212,15 @@ void RunInterpreter(UnitState& state) { | ||||||
| 
 | 
 | ||||||
|                     dest[i] = dot; |                     dest[i] = dot; | ||||||
|                 } |                 } | ||||||
|  |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // Reciprocal
 |             // Reciprocal
 | ||||||
|             case OpCode::Id::RCP: |             case OpCode::Id::RCP: | ||||||
|             { |             { | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
|  | @ -200,13 +229,15 @@ void RunInterpreter(UnitState& state) { | ||||||
|                     // TODO: I think this might be wrong... we should only use one component here
 |                     // TODO: I think this might be wrong... we should only use one component here
 | ||||||
|                     dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); |                     dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); | ||||||
|                 } |                 } | ||||||
| 
 |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // Reciprocal Square Root
 |             // Reciprocal Square Root
 | ||||||
|             case OpCode::Id::RSQ: |             case OpCode::Id::RSQ: | ||||||
|             { |             { | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
|  | @ -215,12 +246,13 @@ void RunInterpreter(UnitState& state) { | ||||||
|                     // TODO: I think this might be wrong... we should only use one component here
 |                     // TODO: I think this might be wrong... we should only use one component here
 | ||||||
|                     dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); |                     dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); | ||||||
|                 } |                 } | ||||||
| 
 |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::MOVA: |             case OpCode::Id::MOVA: | ||||||
|             { |             { | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|                 for (int i = 0; i < 2; ++i) { |                 for (int i = 0; i < 2; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
|  | @ -228,32 +260,41 @@ void RunInterpreter(UnitState& state) { | ||||||
|                     // TODO: Figure out how the rounding is done on hardware
 |                     // TODO: Figure out how the rounding is done on hardware
 | ||||||
|                     state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); |                     state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); | ||||||
|                 } |                 } | ||||||
| 
 |                 Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers); | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::MOV: |             case OpCode::Id::MOV: | ||||||
|             { |             { | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = src1[i]; |                     dest[i] = src1[i]; | ||||||
|                 } |                 } | ||||||
|  |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::SLT: |             case OpCode::Id::SLT: | ||||||
|             case OpCode::Id::SLTI: |             case OpCode::Id::SLTI: | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||||
|  |                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); |                     dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | ||||||
|                 } |                 } | ||||||
|  |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|                 break; |                 break; | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::CMP: |             case OpCode::Id::CMP: | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||||
|                 for (int i = 0; i < 2; ++i) { |                 for (int i = 0; i < 2; ++i) { | ||||||
|                     // TODO: Can you restrict to one compare via dest masking?
 |                     // TODO: Can you restrict to one compare via dest masking?
 | ||||||
| 
 | 
 | ||||||
|  | @ -261,27 +302,27 @@ void RunInterpreter(UnitState& state) { | ||||||
|                     auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); |                     auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); | ||||||
| 
 | 
 | ||||||
|                     switch (op) { |                     switch (op) { | ||||||
|                         case compare_op.Equal: |                         case Instruction::Common::CompareOpType::Equal: | ||||||
|                             state.conditional_code[i] = (src1[i] == src2[i]); |                             state.conditional_code[i] = (src1[i] == src2[i]); | ||||||
|                             break; |                             break; | ||||||
| 
 | 
 | ||||||
|                         case compare_op.NotEqual: |                         case Instruction::Common::CompareOpType::NotEqual: | ||||||
|                             state.conditional_code[i] = (src1[i] != src2[i]); |                             state.conditional_code[i] = (src1[i] != src2[i]); | ||||||
|                             break; |                             break; | ||||||
| 
 | 
 | ||||||
|                         case compare_op.LessThan: |                         case Instruction::Common::CompareOpType::LessThan: | ||||||
|                             state.conditional_code[i] = (src1[i] <  src2[i]); |                             state.conditional_code[i] = (src1[i] <  src2[i]); | ||||||
|                             break; |                             break; | ||||||
| 
 | 
 | ||||||
|                         case compare_op.LessEqual: |                         case Instruction::Common::CompareOpType::LessEqual: | ||||||
|                             state.conditional_code[i] = (src1[i] <= src2[i]); |                             state.conditional_code[i] = (src1[i] <= src2[i]); | ||||||
|                             break; |                             break; | ||||||
| 
 | 
 | ||||||
|                         case compare_op.GreaterThan: |                         case Instruction::Common::CompareOpType::GreaterThan: | ||||||
|                             state.conditional_code[i] = (src1[i] >  src2[i]); |                             state.conditional_code[i] = (src1[i] >  src2[i]); | ||||||
|                             break; |                             break; | ||||||
| 
 | 
 | ||||||
|                         case compare_op.GreaterEqual: |                         case Instruction::Common::CompareOpType::GreaterEqual: | ||||||
|                             state.conditional_code[i] = (src1[i] >= src2[i]); |                             state.conditional_code[i] = (src1[i] >= src2[i]); | ||||||
|                             break; |                             break; | ||||||
| 
 | 
 | ||||||
|  | @ -290,6 +331,7 @@ void RunInterpreter(UnitState& state) { | ||||||
|                             break; |                             break; | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|  |                 Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); | ||||||
|                 break; |                 break; | ||||||
| 
 | 
 | ||||||
|             default: |             default: | ||||||
|  | @ -359,12 +401,17 @@ void RunInterpreter(UnitState& state) { | ||||||
|                             : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] |                             : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | ||||||
|                             : dummy_vec4_float24; |                             : dummy_vec4_float24; | ||||||
| 
 | 
 | ||||||
|  |                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||||
|  |                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||||
|  |                 Record<DebugDataRecord::SRC3>(state.debug, iteration, src3); | ||||||
|  |                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||||
|                 for (int i = 0; i < 4; ++i) { |                 for (int i = 0; i < 4; ++i) { | ||||||
|                     if (!swizzle.DestComponentEnabled(i)) |                     if (!swizzle.DestComponentEnabled(i)) | ||||||
|                         continue; |                         continue; | ||||||
| 
 | 
 | ||||||
|                     dest[i] = src1[i] * src2[i] + src3[i]; |                     dest[i] = src1[i] * src2[i] + src3[i]; | ||||||
|                 } |                 } | ||||||
|  |                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||||
|             } else { |             } else { | ||||||
|                 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", |                 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", | ||||||
|                           (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); |                           (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | ||||||
|  | @ -374,7 +421,7 @@ void RunInterpreter(UnitState& state) { | ||||||
| 
 | 
 | ||||||
|         default: |         default: | ||||||
|         { |         { | ||||||
|             static auto evaluate_condition = [](const UnitState& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { |             static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { | ||||||
|                 bool results[2] = { refx == state.conditional_code[0], |                 bool results[2] = { refx == state.conditional_code[0], | ||||||
|                                     refy == state.conditional_code[1] }; |                                     refy == state.conditional_code[1] }; | ||||||
| 
 | 
 | ||||||
|  | @ -400,12 +447,14 @@ void RunInterpreter(UnitState& state) { | ||||||
|                 break; |                 break; | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::JMPC: |             case OpCode::Id::JMPC: | ||||||
|  |                 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||||||
|                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | ||||||
|                     state.program_counter = instr.flow_control.dest_offset - 1; |                     state.program_counter = instr.flow_control.dest_offset - 1; | ||||||
|                 } |                 } | ||||||
|                 break; |                 break; | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::JMPU: |             case OpCode::Id::JMPU: | ||||||
|  |                 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||||||
|                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { |                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||||||
|                     state.program_counter = instr.flow_control.dest_offset - 1; |                     state.program_counter = instr.flow_control.dest_offset - 1; | ||||||
|                 } |                 } | ||||||
|  | @ -419,6 +468,7 @@ void RunInterpreter(UnitState& state) { | ||||||
|                 break; |                 break; | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::CALLU: |             case OpCode::Id::CALLU: | ||||||
|  |                 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||||||
|                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { |                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||||||
|                     call(state, |                     call(state, | ||||||
|                         instr.flow_control.dest_offset, |                         instr.flow_control.dest_offset, | ||||||
|  | @ -428,6 +478,7 @@ void RunInterpreter(UnitState& state) { | ||||||
|                 break; |                 break; | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::CALLC: |             case OpCode::Id::CALLC: | ||||||
|  |                 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||||||
|                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | ||||||
|                     call(state, |                     call(state, | ||||||
|                         instr.flow_control.dest_offset, |                         instr.flow_control.dest_offset, | ||||||
|  | @ -440,6 +491,7 @@ void RunInterpreter(UnitState& state) { | ||||||
|                 break; |                 break; | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::IFU: |             case OpCode::Id::IFU: | ||||||
|  |                 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||||||
|                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { |                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||||||
|                     call(state, |                     call(state, | ||||||
|                          state.program_counter + 1, |                          state.program_counter + 1, | ||||||
|  | @ -458,6 +510,7 @@ void RunInterpreter(UnitState& state) { | ||||||
|             { |             { | ||||||
|                 // TODO: Do we need to consider swizzlers here?
 |                 // TODO: Do we need to consider swizzlers here?
 | ||||||
| 
 | 
 | ||||||
|  |                 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||||||
|                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { |                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | ||||||
|                     call(state, |                     call(state, | ||||||
|                          state.program_counter + 1, |                          state.program_counter + 1, | ||||||
|  | @ -475,14 +528,19 @@ void RunInterpreter(UnitState& state) { | ||||||
| 
 | 
 | ||||||
|             case OpCode::Id::LOOP: |             case OpCode::Id::LOOP: | ||||||
|             { |             { | ||||||
|                 state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; |                 Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, | ||||||
|  |                                           uniforms.i[instr.flow_control.int_uniform_id].y, | ||||||
|  |                                           uniforms.i[instr.flow_control.int_uniform_id].z, | ||||||
|  |                                           uniforms.i[instr.flow_control.int_uniform_id].w); | ||||||
|  |                 state.address_registers[2] = loop_param.y; | ||||||
| 
 | 
 | ||||||
|  |                 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); | ||||||
|                 call(state, |                 call(state, | ||||||
|                      state.program_counter + 1, |                      state.program_counter + 1, | ||||||
|                      instr.flow_control.dest_offset - state.program_counter + 1, |                      instr.flow_control.dest_offset - state.program_counter + 1, | ||||||
|                      instr.flow_control.dest_offset + 1, |                      instr.flow_control.dest_offset + 1, | ||||||
|                      uniforms.i[instr.flow_control.int_uniform_id].x, |                      loop_param.x, | ||||||
|                      uniforms.i[instr.flow_control.int_uniform_id].z); |                      loop_param.z); | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|  | @ -497,11 +555,13 @@ void RunInterpreter(UnitState& state) { | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         ++state.program_counter; |         ++state.program_counter; | ||||||
|  |         ++iteration; | ||||||
|  |     } | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
|         if (exit_loop) | // Explicit instantiation
 | ||||||
|             break; | template void RunInterpreter(UnitState<false>& state); | ||||||
|     } | template void RunInterpreter(UnitState<true>& state); | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| } // namespace
 | } // namespace
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -12,7 +12,8 @@ namespace Pica { | ||||||
| 
 | 
 | ||||||
| namespace Shader { | namespace Shader { | ||||||
| 
 | 
 | ||||||
| void RunInterpreter(UnitState& state); | template<bool Debug> | ||||||
|  | void RunInterpreter(UnitState<Debug>& state); | ||||||
| 
 | 
 | ||||||
| } // namespace
 | } // namespace
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -141,7 +141,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source | ||||||
|         src_offset = src_reg.GetIndex() * sizeof(float24) * 4; |         src_offset = src_reg.GetIndex() * sizeof(float24) * 4; | ||||||
|     } else { |     } else { | ||||||
|         src_ptr = REGISTERS; |         src_ptr = REGISTERS; | ||||||
|         src_offset = UnitState::InputOffset(src_reg); |         src_offset = UnitState<false>::InputOffset(src_reg); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     unsigned operand_desc_id; |     unsigned operand_desc_id; | ||||||
|  | @ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | ||||||
|     // If all components are enabled, write the result to the destination register
 |     // If all components are enabled, write the result to the destination register
 | ||||||
|     if (swiz.dest_mask == NO_DEST_REG_MASK) { |     if (swiz.dest_mask == NO_DEST_REG_MASK) { | ||||||
|         // Store dest back to memory
 |         // Store dest back to memory
 | ||||||
|         MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src); |         MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), src); | ||||||
| 
 | 
 | ||||||
|     } else { |     } else { | ||||||
|         // Not all components are enabled, so mask the result when storing to the destination register...
 |         // Not all components are enabled, so mask the result when storing to the destination register...
 | ||||||
|         MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest))); |         MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState<false>::OutputOffset(dest))); | ||||||
| 
 | 
 | ||||||
|         if (Common::GetCPUCaps().sse4_1) { |         if (Common::GetCPUCaps().sse4_1) { | ||||||
|             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); |             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | ||||||
|  | @ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Store dest back to memory
 |         // Store dest back to memory
 | ||||||
|         MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH); |         MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), SCRATCH); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue