mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-30 21:30:04 +00:00 
			
		
		
		
	OpenGL Cache: Optimize Morton Copy to copy in tiles
Compiles two lookup arrays of functions for the different configurations of Morton Copy.
This commit is contained in:
		
							parent
							
								
									160ac25527
								
							
						
					
					
						commit
						e9e2d444ef
					
				
					 3 changed files with 202 additions and 50 deletions
				
			
		|  | @ -46,6 +46,82 @@ static const std::array<FormatTuple, 4> depth_format_tuples = {{ | |||
|     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
 | ||||
| }}; | ||||
| 
 | ||||
| static bool FillSurface(const Surface& surface, const u8* fill_data, | ||||
|                         const MathUtil::Rectangle<u32>& fill_rect) { | ||||
|     OpenGLState state = OpenGLState::GetCurState(); | ||||
| 
 | ||||
|     OpenGLState prev_state = state; | ||||
|     SCOPE_EXIT({ prev_state.Apply(); }); | ||||
| 
 | ||||
|     OpenGLState::ResetTexture(surface->texture.handle); | ||||
| 
 | ||||
|     state.scissor.enabled = true; | ||||
|     state.scissor.x = static_cast<GLint>(fill_rect.left); | ||||
|     state.scissor.y = static_cast<GLint>(std::min(fill_rect.top, fill_rect.bottom)); | ||||
|     state.scissor.width = static_cast<GLsizei>(fill_rect.GetWidth()); | ||||
|     state.scissor.height = static_cast<GLsizei>(fill_rect.GetHeight()); | ||||
| 
 | ||||
|     state.draw.draw_framebuffer = transfer_framebuffers[1].handle; | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | ||||
|                                surface->texture.handle, 0); | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||||
|                                0); | ||||
| 
 | ||||
|         Pica::Texture::TextureInfo tex_info{}; | ||||
|         tex_info.format = static_cast<Pica::TexturingRegs::TextureFormat>(surface->pixel_format); | ||||
|         Math::Vec4<u8> color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info); | ||||
| 
 | ||||
|         std::array<GLfloat, 4> color_values = {color.x / 255.f, color.y / 255.f, color.z / 255.f, | ||||
|                                                color.w / 255.f}; | ||||
| 
 | ||||
|         state.color_mask.red_enabled = GL_TRUE; | ||||
|         state.color_mask.green_enabled = GL_TRUE; | ||||
|         state.color_mask.blue_enabled = GL_TRUE; | ||||
|         state.color_mask.alpha_enabled = GL_TRUE; | ||||
|         state.Apply(); | ||||
|         glClearBufferfv(GL_COLOR, 0, &color_values[0]); | ||||
|     } else if (surface->type == SurfaceType::Depth) { | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                surface->texture.handle, 0); | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||||
| 
 | ||||
|         u32 value_32bit = 0; | ||||
|         GLfloat value_float; | ||||
| 
 | ||||
|         if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { | ||||
|             std::memcpy(&value_32bit, fill_data, 2); | ||||
|             value_float = value_32bit / 65535.0f; // 2^16 - 1
 | ||||
|         } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { | ||||
|             std::memcpy(&value_32bit, fill_data, 3); | ||||
|             value_float = value_32bit / 16777215.0f; // 2^24 - 1
 | ||||
|         } | ||||
| 
 | ||||
|         state.depth.write_mask = GL_TRUE; | ||||
|         state.Apply(); | ||||
|         glClearBufferfv(GL_DEPTH, 0, &value_float); | ||||
|     } else if (surface->type == SurfaceType::DepthStencil) { | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                surface->texture.handle, 0); | ||||
| 
 | ||||
|         u32 value_32bit; | ||||
|         std::memcpy(&value_32bit, fill_data, 4); | ||||
| 
 | ||||
|         GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
 | ||||
|         GLint value_int = (value_32bit >> 24); | ||||
| 
 | ||||
|         state.depth.write_mask = GL_TRUE; | ||||
|         state.stencil.write_mask = -1; | ||||
|         state.Apply(); | ||||
|         glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); | ||||
|     } | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| RasterizerCacheOpenGL::RasterizerCacheOpenGL() { | ||||
|     transfer_framebuffers[0].Create(); | ||||
|     transfer_framebuffers[1].Create(); | ||||
|  | @ -55,55 +131,131 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { | |||
|     FlushAll(); | ||||
| } | ||||
| 
 | ||||
| static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, | ||||
|                              u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, | ||||
|                              u8* gl_data, bool morton_to_gl) { | ||||
|     using PixelFormat = CachedSurface::PixelFormat; | ||||
| 
 | ||||
|     u8* data_ptrs[2]; | ||||
|     u32 depth_stencil_shifts[2] = {24, 8}; | ||||
| 
 | ||||
| template <bool morton_to_gl, PixelFormat format> | ||||
| static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { | ||||
|     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; | ||||
|     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | ||||
|     for (u32 y = 0; y < 8; ++y) { | ||||
|         for (u32 x = 0; x < 8; ++x) { | ||||
|             u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; | ||||
|             u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel; | ||||
|             if (morton_to_gl) { | ||||
|         std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]); | ||||
|     } | ||||
| 
 | ||||
|     if (pixel_format == PixelFormat::D24S8) { | ||||
|         for (unsigned y = 0; y < height; ++y) { | ||||
|             for (unsigned x = 0; x < width; ++x) { | ||||
|                 const u32 coarse_y = y & ~7; | ||||
|                 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||||
|                                     coarse_y * width * bytes_per_pixel; | ||||
|                 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; | ||||
| 
 | ||||
|                 data_ptrs[morton_to_gl] = morton_data + morton_offset; | ||||
|                 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; | ||||
| 
 | ||||
|                 // Swap depth and stencil value ordering since 3DS does not match OpenGL
 | ||||
|                 u32 depth_stencil; | ||||
|                 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); | ||||
|                 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | | ||||
|                                 (depth_stencil >> depth_stencil_shifts[1]); | ||||
| 
 | ||||
|                 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); | ||||
|             } | ||||
|                 if (format == PixelFormat::D24S8) { | ||||
|                     gl_ptr[0] = tile_ptr[3]; | ||||
|                     std::memcpy(gl_ptr + 1, tile_ptr, 3); | ||||
|                 } else { | ||||
|                     std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel); | ||||
|                 } | ||||
|             } else { | ||||
|         for (unsigned y = 0; y < height; ++y) { | ||||
|             for (unsigned x = 0; x < width; ++x) { | ||||
|                 const u32 coarse_y = y & ~7; | ||||
|                 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||||
|                                     coarse_y * width * bytes_per_pixel; | ||||
|                 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; | ||||
|                 if (format == PixelFormat::D24S8) { | ||||
|                     std::memcpy(tile_ptr, gl_ptr + 1, 3); | ||||
|                     tile_ptr[3] = gl_ptr[0]; | ||||
|                 } else { | ||||
|                     std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|                 data_ptrs[morton_to_gl] = morton_data + morton_offset; | ||||
|                 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; | ||||
| template <bool morton_to_gl, PixelFormat format> | ||||
| static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) { | ||||
|     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; | ||||
|     constexpr u32 tile_size = bytes_per_pixel * 64; | ||||
| 
 | ||||
|                 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | ||||
|             } | ||||
|         } | ||||
|     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | ||||
|     static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); | ||||
|     gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; | ||||
| 
 | ||||
|     const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); | ||||
|     const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); | ||||
|     const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); | ||||
| 
 | ||||
|     ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); | ||||
| 
 | ||||
|     const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; | ||||
|     u32 x = (begin_pixel_index % (stride * 8)) / 8; | ||||
|     u32 y = (begin_pixel_index / (stride * 8)) * 8; | ||||
| 
 | ||||
|     gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; | ||||
| 
 | ||||
|     auto glbuf_next_tile = [&] { | ||||
|         x = (x + 8) % stride; | ||||
|         gl_buffer += 8 * gl_bytes_per_pixel; | ||||
|         if (!x) { | ||||
|             y += 8; | ||||
|             gl_buffer -= stride * 9 * gl_bytes_per_pixel; | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     u8* tile_buffer = Memory::GetPhysicalPointer(start); | ||||
| 
 | ||||
|     if (start < aligned_start && !morton_to_gl) { | ||||
|         std::array<u8, tile_size> tmp_buf; | ||||
|         MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); | ||||
|         std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], | ||||
|                     std::min(aligned_start, end) - start); | ||||
| 
 | ||||
|         tile_buffer += aligned_start - start; | ||||
|         glbuf_next_tile(); | ||||
|     } | ||||
| 
 | ||||
|     u8* const buffer_end = tile_buffer + aligned_end - aligned_start; | ||||
|     while (tile_buffer < buffer_end) { | ||||
|         MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer); | ||||
|         tile_buffer += tile_size; | ||||
|         glbuf_next_tile(); | ||||
|     } | ||||
| 
 | ||||
|     if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { | ||||
|         std::array<u8, tile_size> tmp_buf; | ||||
|         MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); | ||||
|         std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> morton_to_gl_fns = { | ||||
|     MortonCopy<true, PixelFormat::RGBA8>,  // 0
 | ||||
|     MortonCopy<true, PixelFormat::RGB8>,   // 1
 | ||||
|     MortonCopy<true, PixelFormat::RGB5A1>, // 2
 | ||||
|     MortonCopy<true, PixelFormat::RGB565>, // 3
 | ||||
|     MortonCopy<true, PixelFormat::RGBA4>,  // 4
 | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr,                             // 5 - 13
 | ||||
|     MortonCopy<true, PixelFormat::D16>,  // 14
 | ||||
|     nullptr,                             // 15
 | ||||
|     MortonCopy<true, PixelFormat::D24>,  // 16
 | ||||
|     MortonCopy<true, PixelFormat::D24S8> // 17
 | ||||
| }; | ||||
| 
 | ||||
| static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl_to_morton_fns = { | ||||
|     MortonCopy<false, PixelFormat::RGBA8>,  // 0
 | ||||
|     MortonCopy<false, PixelFormat::RGB8>,   // 1
 | ||||
|     MortonCopy<false, PixelFormat::RGB5A1>, // 2
 | ||||
|     MortonCopy<false, PixelFormat::RGB565>, // 3
 | ||||
|     MortonCopy<false, PixelFormat::RGBA4>,  // 4
 | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr,                              // 5 - 13
 | ||||
|     MortonCopy<false, PixelFormat::D16>,  // 14
 | ||||
|     nullptr,                              // 15
 | ||||
|     MortonCopy<false, PixelFormat::D24>,  // 16
 | ||||
|     MortonCopy<false, PixelFormat::D24S8> // 17
 | ||||
| }; | ||||
| 
 | ||||
| void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, | ||||
|                                          CachedSurface::SurfaceType type, | ||||
|                                          const MathUtil::Rectangle<int>& src_rect, | ||||
|  |  | |||
|  | @ -71,8 +71,8 @@ struct CachedSurface { | |||
|         Invalid = 4, | ||||
|     }; | ||||
| 
 | ||||
|     static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { | ||||
|         static const std::array<unsigned int, 18> bpp_table = { | ||||
|     static constexpr unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { | ||||
|         constexpr std::array<unsigned int, 18> bpp_table = { | ||||
|             32, // RGBA8
 | ||||
|             24, // RGB8
 | ||||
|             16, // RGB5A1
 | ||||
|  | @ -142,7 +142,7 @@ struct CachedSurface { | |||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     static SurfaceType GetFormatType(PixelFormat pixel_format) { | ||||
|     static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) { | ||||
|         if ((unsigned int)pixel_format < 5) { | ||||
|             return SurfaceType::Color; | ||||
|         } | ||||
|  |  | |||
|  | @ -9,9 +9,9 @@ | |||
| namespace VideoCore { | ||||
| 
 | ||||
| // 8x8 Z-Order coordinate from 2D coordinates
 | ||||
| static inline u32 MortonInterleave(u32 x, u32 y) { | ||||
|     static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; | ||||
|     static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; | ||||
| static constexpr u32 MortonInterleave(u32 x, u32 y) { | ||||
|     constexpr u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; | ||||
|     constexpr u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; | ||||
|     return xlut[x % 8] + ylut[y % 8]; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue