mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 05:40:04 +00:00 
			
		
		
		
	OpenGL Cache: Optimize Morton Copy to copy in tiles
Compiles two lookup arrays of functions for the different configurations of Morton Copy.
This commit is contained in:
		
							parent
							
								
									160ac25527
								
							
						
					
					
						commit
						e9e2d444ef
					
				
					 3 changed files with 202 additions and 50 deletions
				
			
		|  | @ -46,6 +46,82 @@ static const std::array<FormatTuple, 4> depth_format_tuples = {{ | ||||||
|     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
 |     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
 | ||||||
| }}; | }}; | ||||||
| 
 | 
 | ||||||
|  | static bool FillSurface(const Surface& surface, const u8* fill_data, | ||||||
|  |                         const MathUtil::Rectangle<u32>& fill_rect) { | ||||||
|  |     OpenGLState state = OpenGLState::GetCurState(); | ||||||
|  | 
 | ||||||
|  |     OpenGLState prev_state = state; | ||||||
|  |     SCOPE_EXIT({ prev_state.Apply(); }); | ||||||
|  | 
 | ||||||
|  |     OpenGLState::ResetTexture(surface->texture.handle); | ||||||
|  | 
 | ||||||
|  |     state.scissor.enabled = true; | ||||||
|  |     state.scissor.x = static_cast<GLint>(fill_rect.left); | ||||||
|  |     state.scissor.y = static_cast<GLint>(std::min(fill_rect.top, fill_rect.bottom)); | ||||||
|  |     state.scissor.width = static_cast<GLsizei>(fill_rect.GetWidth()); | ||||||
|  |     state.scissor.height = static_cast<GLsizei>(fill_rect.GetHeight()); | ||||||
|  | 
 | ||||||
|  |     state.draw.draw_framebuffer = transfer_framebuffers[1].handle; | ||||||
|  |     state.Apply(); | ||||||
|  | 
 | ||||||
|  |     if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { | ||||||
|  |         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | ||||||
|  |                                surface->texture.handle, 0); | ||||||
|  |         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||||||
|  |                                0); | ||||||
|  | 
 | ||||||
|  |         Pica::Texture::TextureInfo tex_info{}; | ||||||
|  |         tex_info.format = static_cast<Pica::TexturingRegs::TextureFormat>(surface->pixel_format); | ||||||
|  |         Math::Vec4<u8> color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info); | ||||||
|  | 
 | ||||||
|  |         std::array<GLfloat, 4> color_values = {color.x / 255.f, color.y / 255.f, color.z / 255.f, | ||||||
|  |                                                color.w / 255.f}; | ||||||
|  | 
 | ||||||
|  |         state.color_mask.red_enabled = GL_TRUE; | ||||||
|  |         state.color_mask.green_enabled = GL_TRUE; | ||||||
|  |         state.color_mask.blue_enabled = GL_TRUE; | ||||||
|  |         state.color_mask.alpha_enabled = GL_TRUE; | ||||||
|  |         state.Apply(); | ||||||
|  |         glClearBufferfv(GL_COLOR, 0, &color_values[0]); | ||||||
|  |     } else if (surface->type == SurfaceType::Depth) { | ||||||
|  |         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||||||
|  |         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||||||
|  |                                surface->texture.handle, 0); | ||||||
|  |         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||||||
|  | 
 | ||||||
|  |         u32 value_32bit = 0; | ||||||
|  |         GLfloat value_float; | ||||||
|  | 
 | ||||||
|  |         if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { | ||||||
|  |             std::memcpy(&value_32bit, fill_data, 2); | ||||||
|  |             value_float = value_32bit / 65535.0f; // 2^16 - 1
 | ||||||
|  |         } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { | ||||||
|  |             std::memcpy(&value_32bit, fill_data, 3); | ||||||
|  |             value_float = value_32bit / 16777215.0f; // 2^24 - 1
 | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         state.depth.write_mask = GL_TRUE; | ||||||
|  |         state.Apply(); | ||||||
|  |         glClearBufferfv(GL_DEPTH, 0, &value_float); | ||||||
|  |     } else if (surface->type == SurfaceType::DepthStencil) { | ||||||
|  |         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||||||
|  |         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||||
|  |                                surface->texture.handle, 0); | ||||||
|  | 
 | ||||||
|  |         u32 value_32bit; | ||||||
|  |         std::memcpy(&value_32bit, fill_data, 4); | ||||||
|  | 
 | ||||||
|  |         GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
 | ||||||
|  |         GLint value_int = (value_32bit >> 24); | ||||||
|  | 
 | ||||||
|  |         state.depth.write_mask = GL_TRUE; | ||||||
|  |         state.stencil.write_mask = -1; | ||||||
|  |         state.Apply(); | ||||||
|  |         glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); | ||||||
|  |     } | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| RasterizerCacheOpenGL::RasterizerCacheOpenGL() { | RasterizerCacheOpenGL::RasterizerCacheOpenGL() { | ||||||
|     transfer_framebuffers[0].Create(); |     transfer_framebuffers[0].Create(); | ||||||
|     transfer_framebuffers[1].Create(); |     transfer_framebuffers[1].Create(); | ||||||
|  | @ -55,55 +131,131 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { | ||||||
|     FlushAll(); |     FlushAll(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, | template <bool morton_to_gl, PixelFormat format> | ||||||
|                              u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, | static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { | ||||||
|                              u8* gl_data, bool morton_to_gl) { |     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; | ||||||
|     using PixelFormat = CachedSurface::PixelFormat; |     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | ||||||
| 
 |     for (u32 y = 0; y < 8; ++y) { | ||||||
|     u8* data_ptrs[2]; |         for (u32 x = 0; x < 8; ++x) { | ||||||
|     u32 depth_stencil_shifts[2] = {24, 8}; |             u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; | ||||||
| 
 |             u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel; | ||||||
|     if (morton_to_gl) { |             if (morton_to_gl) { | ||||||
|         std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]); |                 if (format == PixelFormat::D24S8) { | ||||||
|     } |                     gl_ptr[0] = tile_ptr[3]; | ||||||
| 
 |                     std::memcpy(gl_ptr + 1, tile_ptr, 3); | ||||||
|     if (pixel_format == PixelFormat::D24S8) { |                 } else { | ||||||
|         for (unsigned y = 0; y < height; ++y) { |                     std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel); | ||||||
|             for (unsigned x = 0; x < width; ++x) { |                 } | ||||||
|                 const u32 coarse_y = y & ~7; |             } else { | ||||||
|                 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + |                 if (format == PixelFormat::D24S8) { | ||||||
|                                     coarse_y * width * bytes_per_pixel; |                     std::memcpy(tile_ptr, gl_ptr + 1, 3); | ||||||
|                 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; |                     tile_ptr[3] = gl_ptr[0]; | ||||||
| 
 |                 } else { | ||||||
|                 data_ptrs[morton_to_gl] = morton_data + morton_offset; |                     std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel); | ||||||
|                 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; |                 } | ||||||
| 
 |  | ||||||
|                 // Swap depth and stencil value ordering since 3DS does not match OpenGL
 |  | ||||||
|                 u32 depth_stencil; |  | ||||||
|                 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32)); |  | ||||||
|                 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | |  | ||||||
|                                 (depth_stencil >> depth_stencil_shifts[1]); |  | ||||||
| 
 |  | ||||||
|                 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32)); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } else { |  | ||||||
|         for (unsigned y = 0; y < height; ++y) { |  | ||||||
|             for (unsigned x = 0; x < width; ++x) { |  | ||||||
|                 const u32 coarse_y = y & ~7; |  | ||||||
|                 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + |  | ||||||
|                                     coarse_y * width * bytes_per_pixel; |  | ||||||
|                 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; |  | ||||||
| 
 |  | ||||||
|                 data_ptrs[morton_to_gl] = morton_data + morton_offset; |  | ||||||
|                 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; |  | ||||||
| 
 |  | ||||||
|                 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | template <bool morton_to_gl, PixelFormat format> | ||||||
|  | static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) { | ||||||
|  |     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; | ||||||
|  |     constexpr u32 tile_size = bytes_per_pixel * 64; | ||||||
|  | 
 | ||||||
|  |     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | ||||||
|  |     static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); | ||||||
|  |     gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; | ||||||
|  | 
 | ||||||
|  |     const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); | ||||||
|  |     const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); | ||||||
|  |     const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); | ||||||
|  | 
 | ||||||
|  |     ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); | ||||||
|  | 
 | ||||||
|  |     const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; | ||||||
|  |     u32 x = (begin_pixel_index % (stride * 8)) / 8; | ||||||
|  |     u32 y = (begin_pixel_index / (stride * 8)) * 8; | ||||||
|  | 
 | ||||||
|  |     gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; | ||||||
|  | 
 | ||||||
|  |     auto glbuf_next_tile = [&] { | ||||||
|  |         x = (x + 8) % stride; | ||||||
|  |         gl_buffer += 8 * gl_bytes_per_pixel; | ||||||
|  |         if (!x) { | ||||||
|  |             y += 8; | ||||||
|  |             gl_buffer -= stride * 9 * gl_bytes_per_pixel; | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     u8* tile_buffer = Memory::GetPhysicalPointer(start); | ||||||
|  | 
 | ||||||
|  |     if (start < aligned_start && !morton_to_gl) { | ||||||
|  |         std::array<u8, tile_size> tmp_buf; | ||||||
|  |         MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); | ||||||
|  |         std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], | ||||||
|  |                     std::min(aligned_start, end) - start); | ||||||
|  | 
 | ||||||
|  |         tile_buffer += aligned_start - start; | ||||||
|  |         glbuf_next_tile(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u8* const buffer_end = tile_buffer + aligned_end - aligned_start; | ||||||
|  |     while (tile_buffer < buffer_end) { | ||||||
|  |         MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer); | ||||||
|  |         tile_buffer += tile_size; | ||||||
|  |         glbuf_next_tile(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { | ||||||
|  |         std::array<u8, tile_size> tmp_buf; | ||||||
|  |         MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); | ||||||
|  |         std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> morton_to_gl_fns = { | ||||||
|  |     MortonCopy<true, PixelFormat::RGBA8>,  // 0
 | ||||||
|  |     MortonCopy<true, PixelFormat::RGB8>,   // 1
 | ||||||
|  |     MortonCopy<true, PixelFormat::RGB5A1>, // 2
 | ||||||
|  |     MortonCopy<true, PixelFormat::RGB565>, // 3
 | ||||||
|  |     MortonCopy<true, PixelFormat::RGBA4>,  // 4
 | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr,                             // 5 - 13
 | ||||||
|  |     MortonCopy<true, PixelFormat::D16>,  // 14
 | ||||||
|  |     nullptr,                             // 15
 | ||||||
|  |     MortonCopy<true, PixelFormat::D24>,  // 16
 | ||||||
|  |     MortonCopy<true, PixelFormat::D24S8> // 17
 | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl_to_morton_fns = { | ||||||
|  |     MortonCopy<false, PixelFormat::RGBA8>,  // 0
 | ||||||
|  |     MortonCopy<false, PixelFormat::RGB8>,   // 1
 | ||||||
|  |     MortonCopy<false, PixelFormat::RGB5A1>, // 2
 | ||||||
|  |     MortonCopy<false, PixelFormat::RGB565>, // 3
 | ||||||
|  |     MortonCopy<false, PixelFormat::RGBA4>,  // 4
 | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr, | ||||||
|  |     nullptr,                              // 5 - 13
 | ||||||
|  |     MortonCopy<false, PixelFormat::D16>,  // 14
 | ||||||
|  |     nullptr,                              // 15
 | ||||||
|  |     MortonCopy<false, PixelFormat::D24>,  // 16
 | ||||||
|  |     MortonCopy<false, PixelFormat::D24S8> // 17
 | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, | void RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, | ||||||
|                                          CachedSurface::SurfaceType type, |                                          CachedSurface::SurfaceType type, | ||||||
|                                          const MathUtil::Rectangle<int>& src_rect, |                                          const MathUtil::Rectangle<int>& src_rect, | ||||||
|  |  | ||||||
|  | @ -71,8 +71,8 @@ struct CachedSurface { | ||||||
|         Invalid = 4, |         Invalid = 4, | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { |     static constexpr unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { | ||||||
|         static const std::array<unsigned int, 18> bpp_table = { |         constexpr std::array<unsigned int, 18> bpp_table = { | ||||||
|             32, // RGBA8
 |             32, // RGBA8
 | ||||||
|             24, // RGB8
 |             24, // RGB8
 | ||||||
|             16, // RGB5A1
 |             16, // RGB5A1
 | ||||||
|  | @ -142,7 +142,7 @@ struct CachedSurface { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     static SurfaceType GetFormatType(PixelFormat pixel_format) { |     static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) { | ||||||
|         if ((unsigned int)pixel_format < 5) { |         if ((unsigned int)pixel_format < 5) { | ||||||
|             return SurfaceType::Color; |             return SurfaceType::Color; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  | @ -9,9 +9,9 @@ | ||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
| 
 | 
 | ||||||
| // 8x8 Z-Order coordinate from 2D coordinates
 | // 8x8 Z-Order coordinate from 2D coordinates
 | ||||||
| static inline u32 MortonInterleave(u32 x, u32 y) { | static constexpr u32 MortonInterleave(u32 x, u32 y) { | ||||||
|     static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; |     constexpr u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; | ||||||
|     static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; |     constexpr u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; | ||||||
|     return xlut[x % 8] + ylut[y % 8]; |     return xlut[x % 8] + ylut[y % 8]; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue