mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-30 21:30:04 +00:00 
			
		
		
		
	OpenGL Cache: Split CachedSurface
Breaks CachedSurface into two classes, the parameters used to create or find a cached surface, and the actual cached surface. This also adds a few helper methods for getting surfaces from cache
This commit is contained in:
		
							parent
							
								
									0b98b768f5
								
							
						
					
					
						commit
						3e1cbb7d14
					
				
					 3 changed files with 649 additions and 290 deletions
				
			
		|  | @ -342,6 +342,231 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec | |||
|     return true; | ||||
| } | ||||
| 
 | ||||
| SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { | ||||
|     SurfaceParams params = *this; | ||||
| 
 | ||||
|     const u32 stride_tiled_bytes = BytesInPixels(stride * (is_tiled ? 8 : 1)); | ||||
|     PAddr aligned_start = | ||||
|         addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); | ||||
|     PAddr aligned_end = | ||||
|         addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); | ||||
| 
 | ||||
|     if (aligned_end - aligned_start > stride_tiled_bytes) { | ||||
|         params.addr = aligned_start; | ||||
|         params.height = (aligned_end - aligned_start) / BytesInPixels(stride); | ||||
|     } else { | ||||
|         // 1 row
 | ||||
|         ASSERT(aligned_end - aligned_start == stride_tiled_bytes); | ||||
|         const u32 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); | ||||
|         aligned_start = | ||||
|             addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); | ||||
|         aligned_end = | ||||
|             addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); | ||||
|         params.addr = aligned_start; | ||||
|         params.width = PixelsInBytes(aligned_end - aligned_start) / (is_tiled ? 8 : 1); | ||||
|         params.height = is_tiled ? 8 : 1; | ||||
|     } | ||||
|     params.UpdateParams(); | ||||
| 
 | ||||
|     return params; | ||||
| } | ||||
| 
 | ||||
| SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const { | ||||
|     if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     if (unscaled_rect.bottom > unscaled_rect.top) { | ||||
|         std::swap(unscaled_rect.top, unscaled_rect.bottom); | ||||
|     } | ||||
| 
 | ||||
|     if (is_tiled) { | ||||
|         unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; | ||||
|         unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; | ||||
|         unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; | ||||
|         unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; | ||||
|     } | ||||
| 
 | ||||
|     const u32 stride_tiled = (!is_tiled ? stride : stride * 8); | ||||
| 
 | ||||
|     const u32 pixel_offset = | ||||
|         stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + | ||||
|         unscaled_rect.left; | ||||
| 
 | ||||
|     const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); | ||||
| 
 | ||||
|     return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; | ||||
| } | ||||
| 
 | ||||
| MathUtil::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { | ||||
|     const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); | ||||
| 
 | ||||
|     if (is_tiled) { | ||||
|         const int x0 = (begin_pixel_index % (stride * 8)) / 8; | ||||
|         const int y0 = (begin_pixel_index / (stride * 8)) * 8; | ||||
|         return MathUtil::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width, | ||||
|                                         height - (y0 + sub_surface.height)); // Top to bottom
 | ||||
|     } | ||||
| 
 | ||||
|     const int x0 = begin_pixel_index % stride; | ||||
|     const int y0 = begin_pixel_index / stride; | ||||
|     return MathUtil::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, | ||||
|                                     y0); // Bottom to top
 | ||||
| } | ||||
| 
 | ||||
| MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { | ||||
|     auto rect = GetSubRect(sub_surface); | ||||
|     rect.left = rect.left * res_scale; | ||||
|     rect.right = rect.right * res_scale; | ||||
|     rect.top = rect.top * res_scale; | ||||
|     rect.bottom = rect.bottom * res_scale; | ||||
|     return rect; | ||||
| } | ||||
| 
 | ||||
| bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { | ||||
|     return (other_surface.addr == addr && other_surface.width == width && | ||||
|             other_surface.height == height && other_surface.stride == stride && | ||||
|             other_surface.pixel_format == pixel_format && other_surface.is_tiled == is_tiled); | ||||
| } | ||||
| 
 | ||||
| bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { | ||||
|     if (sub_surface.addr < addr || sub_surface.end > end || sub_surface.stride != stride || | ||||
|         sub_surface.pixel_format != pixel_format || sub_surface.is_tiled != is_tiled || | ||||
|         (sub_surface.addr - addr) * 8 % GetFormatBpp() != 0) | ||||
|         return false; | ||||
| 
 | ||||
|     auto rect = GetSubRect(sub_surface); | ||||
| 
 | ||||
|     if (rect.left + sub_surface.width > stride) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     if (is_tiled) { | ||||
|         return PixelsInBytes(sub_surface.addr - addr) % 64 == 0 && sub_surface.height % 8 == 0 && | ||||
|                sub_surface.width % 8 == 0; | ||||
|     } | ||||
| 
 | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { | ||||
|     if (pixel_format == PixelFormat::Invalid || pixel_format != expanded_surface.pixel_format || | ||||
|         is_tiled != expanded_surface.is_tiled || addr > expanded_surface.end || | ||||
|         expanded_surface.addr > end || stride != expanded_surface.stride) | ||||
|         return false; | ||||
| 
 | ||||
|     const u32 byte_offset = | ||||
|         std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr); | ||||
| 
 | ||||
|     const int x0 = byte_offset % BytesInPixels(stride); | ||||
|     const int y0 = byte_offset / BytesInPixels(stride); | ||||
| 
 | ||||
|     return x0 == 0 && (!is_tiled || y0 % 8 == 0); | ||||
| } | ||||
| 
 | ||||
| bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { | ||||
|     if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || | ||||
|         end < texcopy_params.end || ((texcopy_params.addr - addr) * 8) % GetFormatBpp() != 0 || | ||||
|         (texcopy_params.width * 8) % GetFormatBpp() != 0 || | ||||
|         (texcopy_params.stride * 8) % GetFormatBpp() != 0) | ||||
|         return false; | ||||
| 
 | ||||
|     const u32 begin_pixel_index = PixelsInBytes(texcopy_params.addr - addr); | ||||
|     const int x0 = begin_pixel_index % stride; | ||||
|     const int y0 = begin_pixel_index / stride; | ||||
| 
 | ||||
|     if (!is_tiled) | ||||
|         return ((texcopy_params.height == 1 || PixelsInBytes(texcopy_params.stride) == stride) && | ||||
|                 x0 + PixelsInBytes(texcopy_params.width) <= stride); | ||||
| 
 | ||||
|     return (PixelsInBytes(texcopy_params.addr - addr) % 64 == 0 && | ||||
|             PixelsInBytes(texcopy_params.width) % 64 == 0 && | ||||
|             (texcopy_params.height == 1 || PixelsInBytes(texcopy_params.stride) == stride * 8) && | ||||
|             x0 + PixelsInBytes(texcopy_params.width / 8) <= stride); | ||||
| } | ||||
| 
 | ||||
| bool CachedSurface::CanFill(const SurfaceParams& dest_surface, | ||||
|                             SurfaceInterval fill_interval) const { | ||||
|     if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && | ||||
|         boost::icl::first(fill_interval) >= addr && | ||||
|         boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range
 | ||||
|         dest_surface.FromInterval(fill_interval).GetInterval() == | ||||
|             fill_interval) { // make sure interval is a rectangle in dest surface
 | ||||
|         if (fill_size * 8 != dest_surface.GetFormatBpp()) { | ||||
|             // Check if bits repeat for our fill_size
 | ||||
|             const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); | ||||
|             std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); | ||||
| 
 | ||||
|             for (u32 i = 0; i < dest_bytes_per_pixel; ++i) | ||||
|                 std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); | ||||
| 
 | ||||
|             for (u32 i = 0; i < fill_size; ++i) | ||||
|                 if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], | ||||
|                                 dest_bytes_per_pixel) != 0) | ||||
|                     return false; | ||||
| 
 | ||||
|             if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) | ||||
|                 return false; | ||||
|         } | ||||
|         return true; | ||||
|     } | ||||
|     return false; | ||||
| } | ||||
| 
 | ||||
| bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, | ||||
|                             SurfaceInterval copy_interval) const { | ||||
|     SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); | ||||
|     ASSERT(subrect_params.GetInterval() == copy_interval); | ||||
|     if (CanSubRect(subrect_params)) | ||||
|         return true; | ||||
| 
 | ||||
|     if (CanFill(dest_surface, copy_interval)) | ||||
|         return true; | ||||
| 
 | ||||
|     return false; | ||||
| } | ||||
| 
 | ||||
| SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { | ||||
|     SurfaceInterval result{}; | ||||
|     const auto valid_regions = | ||||
|         SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; | ||||
|     for (auto& valid_interval : valid_regions) { | ||||
|         const SurfaceInterval aligned_interval{ | ||||
|             addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, | ||||
|                                    BytesInPixels(is_tiled ? 8 * 8 : 1)), | ||||
|             addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, | ||||
|                                      BytesInPixels(is_tiled ? 8 * 8 : 1))}; | ||||
| 
 | ||||
|         if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || | ||||
|             boost::icl::length(aligned_interval) == 0) { | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         // Get the rectangle within aligned_interval
 | ||||
|         const u32 stride_bytes = BytesInPixels(stride) * (is_tiled ? 8 : 1); | ||||
|         SurfaceInterval rect_interval{ | ||||
|             addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), | ||||
|             addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), | ||||
|         }; | ||||
|         if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { | ||||
|             // 1 row
 | ||||
|             rect_interval = aligned_interval; | ||||
|         } else if (boost::icl::length(rect_interval) == 0) { | ||||
|             // 2 rows that do not make a rectangle, return the larger one
 | ||||
|             const SurfaceInterval row1{boost::icl::first(aligned_interval), | ||||
|                                        boost::icl::first(rect_interval)}; | ||||
|             const SurfaceInterval row2{boost::icl::first(rect_interval), | ||||
|                                        boost::icl::last_next(aligned_interval)}; | ||||
|             rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; | ||||
|         } | ||||
| 
 | ||||
|         if (boost::icl::length(rect_interval) > boost::icl::length(result)) { | ||||
|             result = rect_interval; | ||||
|         } | ||||
|     } | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, | ||||
|                                             const MathUtil::Rectangle<int>& src_rect, | ||||
|                                             CachedSurface* dst_surface, | ||||
|  | @ -381,201 +606,61 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup | |||
|     cur_state.Apply(); | ||||
| } | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192)); | ||||
| CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, | ||||
|                                                  bool load_if_create) { | ||||
|     using PixelFormat = CachedSurface::PixelFormat; | ||||
|     using SurfaceType = CachedSurface::SurfaceType; | ||||
| MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); | ||||
| void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { | ||||
|     ASSERT(type != SurfaceType::Fill); | ||||
| 
 | ||||
|     if (params.addr == 0) { | ||||
|         return nullptr; | ||||
|     const u8* const texture_src_data = Memory::GetPhysicalPointer(addr); | ||||
|     if (texture_src_data == nullptr) | ||||
|         return; | ||||
| 
 | ||||
|     if (gl_buffer == nullptr) { | ||||
|         gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); | ||||
|         gl_buffer.reset(new u8[gl_buffer_size]); | ||||
|     } | ||||
| 
 | ||||
|     u32 params_size = | ||||
|         params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8; | ||||
|     // TODO: Should probably be done in ::Memory:: and check for other regions too
 | ||||
|     if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) | ||||
|         load_end = Memory::VRAM_VADDR_END; | ||||
| 
 | ||||
|     // Check for an exact match in existing surfaces
 | ||||
|     CachedSurface* best_exact_surface = nullptr; | ||||
|     float exact_surface_goodness = -1.f; | ||||
|     if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) | ||||
|         load_start = Memory::VRAM_VADDR; | ||||
| 
 | ||||
|     auto surface_interval = | ||||
|         boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size); | ||||
|     auto range = surface_cache.equal_range(surface_interval); | ||||
|     for (auto it = range.first; it != range.second; ++it) { | ||||
|         for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) { | ||||
|             CachedSurface* surface = it2->get(); | ||||
|     MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); | ||||
| 
 | ||||
|             // Check if the request matches the surface exactly
 | ||||
|             if (params.addr == surface->addr && params.width == surface->width && | ||||
|                 params.height == surface->height && params.pixel_format == surface->pixel_format) { | ||||
|                 // Make sure optional param-matching criteria are fulfilled
 | ||||
|                 bool tiling_match = (params.is_tiled == surface->is_tiled); | ||||
|                 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && | ||||
|                                         params.res_scale_height == surface->res_scale_height); | ||||
|                 if (!match_res_scale || res_scale_match) { | ||||
|                     // Prioritize same-tiling and highest resolution surfaces
 | ||||
|                     float match_goodness = | ||||
|                         (float)tiling_match + surface->res_scale_width * surface->res_scale_height; | ||||
|                     if (match_goodness > exact_surface_goodness || surface->dirty) { | ||||
|                         exact_surface_goodness = match_goodness; | ||||
|                         best_exact_surface = surface; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     ASSERT(load_start >= addr && load_end <= end); | ||||
|     const u32 start_offset = load_start - addr; | ||||
| 
 | ||||
|     // Return the best exact surface if found
 | ||||
|     if (best_exact_surface != nullptr) { | ||||
|         return best_exact_surface; | ||||
|     } | ||||
| 
 | ||||
|     // No matching surfaces found, so create a new one
 | ||||
|     u8* texture_src_data = Memory::GetPhysicalPointer(params.addr); | ||||
|     if (texture_src_data == nullptr) { | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     MICROPROFILE_SCOPE(OpenGL_SurfaceUpload); | ||||
| 
 | ||||
|     // Stride only applies to linear images.
 | ||||
|     ASSERT(params.pixel_stride == 0 || !params.is_tiled); | ||||
| 
 | ||||
|     std::shared_ptr<CachedSurface> new_surface = std::make_shared<CachedSurface>(); | ||||
| 
 | ||||
|     new_surface->addr = params.addr; | ||||
|     new_surface->size = params_size; | ||||
| 
 | ||||
|     new_surface->texture.Create(); | ||||
|     new_surface->width = params.width; | ||||
|     new_surface->height = params.height; | ||||
|     new_surface->pixel_stride = params.pixel_stride; | ||||
|     new_surface->res_scale_width = params.res_scale_width; | ||||
|     new_surface->res_scale_height = params.res_scale_height; | ||||
| 
 | ||||
|     new_surface->is_tiled = params.is_tiled; | ||||
|     new_surface->pixel_format = params.pixel_format; | ||||
|     new_surface->dirty = false; | ||||
| 
 | ||||
|     if (!load_if_create) { | ||||
|         // Don't load any data; just allocate the surface's texture
 | ||||
|         AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, | ||||
|                                new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); | ||||
|     if (!is_tiled) { | ||||
|         ASSERT(type == SurfaceType::Color); | ||||
|         std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, | ||||
|                     load_end - load_start); | ||||
|     } else { | ||||
|         // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead
 | ||||
|         // of memory upload below if that's a common scenario in some game
 | ||||
| 
 | ||||
|         Memory::RasterizerFlushRegion(params.addr, params_size); | ||||
| 
 | ||||
|         // Load data from memory to the new surface
 | ||||
|         OpenGLState cur_state = OpenGLState::GetCurState(); | ||||
| 
 | ||||
|         GLuint old_tex = cur_state.texture_units[0].texture_2d; | ||||
|         cur_state.texture_units[0].texture_2d = new_surface->texture.handle; | ||||
|         cur_state.Apply(); | ||||
|         glActiveTexture(GL_TEXTURE0); | ||||
| 
 | ||||
|         if (!new_surface->is_tiled) { | ||||
|             // TODO: Ensure this will always be a color format, not a depth or other format
 | ||||
|             ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size()); | ||||
|             const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format]; | ||||
| 
 | ||||
|             glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->pixel_stride); | ||||
|             glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, | ||||
|                          tuple.format, tuple.type, texture_src_data); | ||||
|             glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | ||||
|         } else { | ||||
|             SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format); | ||||
|             if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { | ||||
|                 FormatTuple tuple; | ||||
|                 if ((size_t)params.pixel_format < fb_format_tuples.size()) { | ||||
|                     tuple = fb_format_tuples[(unsigned int)params.pixel_format]; | ||||
|                 } else { | ||||
|                     // Texture
 | ||||
|                     tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; | ||||
|                 } | ||||
| 
 | ||||
|                 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height); | ||||
| 
 | ||||
|                 Pica::Texture::TextureInfo tex_info; | ||||
|                 tex_info.width = params.width; | ||||
|                 tex_info.height = params.height; | ||||
|                 tex_info.format = (Pica::TexturingRegs::TextureFormat)params.pixel_format; | ||||
|         if (type == SurfaceType::Texture) { | ||||
|             Pica::Texture::TextureInfo tex_info{}; | ||||
|             tex_info.width = width; | ||||
|             tex_info.height = height; | ||||
|             tex_info.format = static_cast<Pica::TexturingRegs::TextureFormat>(pixel_format); | ||||
|             tex_info.SetDefaultStride(); | ||||
|                 tex_info.physical_address = params.addr; | ||||
|             tex_info.physical_address = addr; | ||||
| 
 | ||||
|                 for (unsigned y = 0; y < params.height; ++y) { | ||||
|                     for (unsigned x = 0; x < params.width; ++x) { | ||||
|                         tex_buffer[x + params.width * y] = Pica::Texture::LookupTexture( | ||||
|                             texture_src_data, x, params.height - 1 - y, tex_info); | ||||
|             const auto load_interval = SurfaceInterval(load_start, load_end); | ||||
|             const auto rect = GetSubRect(FromInterval(load_interval)); | ||||
|             ASSERT(FromInterval(load_interval).GetInterval() == load_interval); | ||||
| 
 | ||||
|             for (unsigned y = rect.bottom; y < rect.top; ++y) { | ||||
|                 for (unsigned x = rect.left; x < rect.right; ++x) { | ||||
|                     auto vec4 = | ||||
|                         Pica::Texture::LookupTexture(texture_src_data, x, height - 1 - y, tex_info); | ||||
|                     const size_t offset = (x + (width * y)) * 4; | ||||
|                     std::memcpy(&gl_buffer[offset], vec4.AsArray(), 4); | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|                 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, | ||||
|                              0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data()); | ||||
|         } else { | ||||
|                 // Depth/Stencil formats need special treatment since they aren't sampleable using
 | ||||
|                 // LookupTexture and can't use RGBA format
 | ||||
|                 size_t tuple_idx = (size_t)params.pixel_format - 14; | ||||
|                 ASSERT(tuple_idx < depth_format_tuples.size()); | ||||
|                 const FormatTuple& tuple = depth_format_tuples[tuple_idx]; | ||||
| 
 | ||||
|                 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8; | ||||
| 
 | ||||
|                 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
 | ||||
|                 bool use_4bpp = (params.pixel_format == PixelFormat::D24); | ||||
| 
 | ||||
|                 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; | ||||
| 
 | ||||
|                 std::vector<u8> temp_fb_depth_buffer(params.width * params.height * | ||||
|                                                      gl_bytes_per_pixel); | ||||
| 
 | ||||
|                 u8* temp_fb_depth_buffer_ptr = | ||||
|                     use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data(); | ||||
| 
 | ||||
|                 MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, | ||||
|                                  gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, | ||||
|                                  true); | ||||
| 
 | ||||
|                 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, | ||||
|                              0, tuple.format, tuple.type, temp_fb_depth_buffer.data()); | ||||
|             morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, | ||||
|                                                                 load_start, load_end); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|         // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface
 | ||||
|         if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) { | ||||
|             OGLTexture scaled_texture; | ||||
|             scaled_texture.Create(); | ||||
| 
 | ||||
|             AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, | ||||
|                                    new_surface->GetScaledWidth(), new_surface->GetScaledHeight()); | ||||
|             BlitTextures(new_surface->texture.handle, scaled_texture.handle, | ||||
|                          CachedSurface::GetFormatType(new_surface->pixel_format), | ||||
|                          MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height), | ||||
|                          MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), | ||||
|                                                   new_surface->GetScaledHeight())); | ||||
| 
 | ||||
|             new_surface->texture.Release(); | ||||
|             new_surface->texture.handle = scaled_texture.handle; | ||||
|             scaled_texture.handle = 0; | ||||
|             cur_state.texture_units[0].texture_2d = new_surface->texture.handle; | ||||
|             cur_state.Apply(); | ||||
|         } | ||||
| 
 | ||||
|         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); | ||||
|         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | ||||
|         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | ||||
|         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | ||||
| 
 | ||||
|         cur_state.texture_units[0].texture_2d = old_tex; | ||||
|         cur_state.Apply(); | ||||
|     } | ||||
| 
 | ||||
|     Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1); | ||||
|     surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open( | ||||
|                                          new_surface->addr, new_surface->addr + new_surface->size), | ||||
|                                      std::set<std::shared_ptr<CachedSurface>>({new_surface}))); | ||||
|     return new_surface.get(); | ||||
| } | ||||
| 
 | ||||
| CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, | ||||
|  | @ -826,102 +911,272 @@ CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryF | |||
|     return nullptr; | ||||
| } | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64)); | ||||
| void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) { | ||||
|     using PixelFormat = CachedSurface::PixelFormat; | ||||
|     using SurfaceType = CachedSurface::SurfaceType; | ||||
| 
 | ||||
|     if (!surface->dirty) { | ||||
| MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); | ||||
| void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { | ||||
|     u8* const dst_buffer = Memory::GetPhysicalPointer(addr); | ||||
|     if (dst_buffer == nullptr) | ||||
|         return; | ||||
| 
 | ||||
|     ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); | ||||
| 
 | ||||
|     // TODO: Should probably be done in ::Memory:: and check for other regions too
 | ||||
|     // same as loadglbuffer()
 | ||||
|     if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) | ||||
|         flush_end = Memory::VRAM_VADDR_END; | ||||
| 
 | ||||
|     if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) | ||||
|         flush_start = Memory::VRAM_VADDR; | ||||
| 
 | ||||
|     MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); | ||||
| 
 | ||||
|     ASSERT(flush_start >= addr && flush_end <= end); | ||||
|     const u32 start_offset = flush_start - addr; | ||||
|     const u32 end_offset = flush_end - addr; | ||||
| 
 | ||||
|     if (type == SurfaceType::Fill) { | ||||
|         const u32 coarse_start_offset = start_offset - (start_offset % fill_size); | ||||
|         const u32 backup_bytes = start_offset % fill_size; | ||||
|         std::array<u8, 4> backup_data; | ||||
|         if (backup_bytes) | ||||
|             std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); | ||||
| 
 | ||||
|         for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) | ||||
|             std::memcpy(&dst_buffer[offset], &fill_data[0], | ||||
|                         std::min(fill_size, end_offset - offset)); | ||||
| 
 | ||||
|         if (backup_bytes) | ||||
|             std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); | ||||
|     } else if (!is_tiled) { | ||||
|         ASSERT(type == SurfaceType::Color); | ||||
|         std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); | ||||
|     } else { | ||||
|         gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, | ||||
|                                                             flush_start, flush_end); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|     MICROPROFILE_SCOPE(OpenGL_SurfaceDownload); | ||||
| 
 | ||||
|     u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr); | ||||
|     if (dst_buffer == nullptr) { | ||||
| void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect) { | ||||
|     if (type == SurfaceType::Fill) | ||||
|         return; | ||||
| 
 | ||||
|     ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); | ||||
| 
 | ||||
|     // Load data from memory to the surface
 | ||||
|     GLint x0 = static_cast<GLint>(rect.left); | ||||
|     GLint y0 = static_cast<GLint>(rect.bottom); | ||||
|     size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); | ||||
| 
 | ||||
|     const FormatTuple& tuple = GetFormatTuple(pixel_format); | ||||
|     GLuint target_tex = texture.handle; | ||||
| 
 | ||||
|     // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
 | ||||
|     // surface
 | ||||
|     OGLTexture unscaled_tex; | ||||
|     if (res_scale != 1) { | ||||
|         x0 = 0; | ||||
|         y0 = 0; | ||||
| 
 | ||||
|         unscaled_tex.Create(); | ||||
|         AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); | ||||
|         target_tex = unscaled_tex.handle; | ||||
|     } | ||||
| 
 | ||||
|     OpenGLState cur_state = OpenGLState::GetCurState(); | ||||
| 
 | ||||
|     GLuint old_tex = cur_state.texture_units[0].texture_2d; | ||||
| 
 | ||||
|     OGLTexture unscaled_tex; | ||||
|     GLuint texture_to_flush = surface->texture.handle; | ||||
| 
 | ||||
|     // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
 | ||||
|     if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) { | ||||
|         unscaled_tex.Create(); | ||||
| 
 | ||||
|         AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, | ||||
|                                surface->height); | ||||
|         BlitTextures( | ||||
|             surface->texture.handle, unscaled_tex.handle, | ||||
|             CachedSurface::GetFormatType(surface->pixel_format), | ||||
|             MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()), | ||||
|             MathUtil::Rectangle<int>(0, 0, surface->width, surface->height)); | ||||
| 
 | ||||
|         texture_to_flush = unscaled_tex.handle; | ||||
|     } | ||||
| 
 | ||||
|     cur_state.texture_units[0].texture_2d = texture_to_flush; | ||||
|     cur_state.texture_units[0].texture_2d = target_tex; | ||||
|     cur_state.Apply(); | ||||
| 
 | ||||
|     // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
 | ||||
|     ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); | ||||
|     glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride)); | ||||
| 
 | ||||
|     glActiveTexture(GL_TEXTURE0); | ||||
|     glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), | ||||
|                     static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, | ||||
|                     &gl_buffer[buffer_offset]); | ||||
| 
 | ||||
|     if (!surface->is_tiled) { | ||||
|         // TODO: Ensure this will always be a color format, not a depth or other format
 | ||||
|         ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); | ||||
|         const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; | ||||
| 
 | ||||
|         glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->pixel_stride); | ||||
|         glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer); | ||||
|         glPixelStorei(GL_PACK_ROW_LENGTH, 0); | ||||
|     } else { | ||||
|         SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format); | ||||
|         if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) { | ||||
|             ASSERT((size_t)surface->pixel_format < fb_format_tuples.size()); | ||||
|             const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format]; | ||||
| 
 | ||||
|             u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; | ||||
| 
 | ||||
|             std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel); | ||||
| 
 | ||||
|             glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); | ||||
| 
 | ||||
|             // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion
 | ||||
|             // is necessary.
 | ||||
|             MortonCopyPixels(surface->pixel_format, surface->width, surface->height, | ||||
|                              bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), | ||||
|                              false); | ||||
|         } else { | ||||
|             // Depth/Stencil formats need special treatment since they aren't sampleable using
 | ||||
|             // LookupTexture and can't use RGBA format
 | ||||
|             size_t tuple_idx = (size_t)surface->pixel_format - 14; | ||||
|             ASSERT(tuple_idx < depth_format_tuples.size()); | ||||
|             const FormatTuple& tuple = depth_format_tuples[tuple_idx]; | ||||
| 
 | ||||
|             u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8; | ||||
| 
 | ||||
|             // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
 | ||||
|             bool use_4bpp = (surface->pixel_format == PixelFormat::D24); | ||||
| 
 | ||||
|             u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel; | ||||
| 
 | ||||
|             std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel); | ||||
| 
 | ||||
|             glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data()); | ||||
| 
 | ||||
|             u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data(); | ||||
| 
 | ||||
|             MortonCopyPixels(surface->pixel_format, surface->width, surface->height, | ||||
|                              bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, | ||||
|                              false); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     surface->dirty = false; | ||||
|     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | ||||
| 
 | ||||
|     cur_state.texture_units[0].texture_2d = old_tex; | ||||
|     cur_state.Apply(); | ||||
| 
 | ||||
|     if (res_scale != 1) { | ||||
|         auto scaled_rect = rect; | ||||
|         scaled_rect.left *= res_scale; | ||||
|         scaled_rect.top *= res_scale; | ||||
|         scaled_rect.right *= res_scale; | ||||
|         scaled_rect.bottom *= res_scale; | ||||
| 
 | ||||
|         BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, | ||||
|                      scaled_rect, type); | ||||
|     } | ||||
| } | ||||
| void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect) { | ||||
|     if (type == SurfaceType::Fill) | ||||
|         return; | ||||
| 
 | ||||
|     if (gl_buffer == nullptr) { | ||||
|         gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); | ||||
|         gl_buffer.reset(new u8[gl_buffer_size]); | ||||
|     } | ||||
| 
 | ||||
|     OpenGLState state = OpenGLState::GetCurState(); | ||||
|     OpenGLState prev_state = state; | ||||
|     SCOPE_EXIT({ prev_state.Apply(); }); | ||||
| 
 | ||||
|     const FormatTuple& tuple = GetFormatTuple(pixel_format); | ||||
| 
 | ||||
|     // Ensure no bad interactions with GL_PACK_ALIGNMENT
 | ||||
|     ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); | ||||
|     glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride)); | ||||
|     size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); | ||||
| 
 | ||||
|     // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
 | ||||
|     OGLTexture unscaled_tex; | ||||
|     if (res_scale != 1) { | ||||
|         auto scaled_rect = rect; | ||||
|         scaled_rect.left *= res_scale; | ||||
|         scaled_rect.top *= res_scale; | ||||
|         scaled_rect.right *= res_scale; | ||||
|         scaled_rect.bottom *= res_scale; | ||||
| 
 | ||||
|         unscaled_tex.Create(); | ||||
|         AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); | ||||
|         BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, rect, type); | ||||
| 
 | ||||
|         state.texture_units[0].texture_2d = unscaled_tex.handle; | ||||
|         state.Apply(); | ||||
| 
 | ||||
|         glActiveTexture(GL_TEXTURE0); | ||||
|         glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); | ||||
|     } else { | ||||
|         state.ResetTexture(texture.handle); | ||||
|         state.draw.read_framebuffer = transfer_framebuffers[0].handle; | ||||
|         state.Apply(); | ||||
| 
 | ||||
|         if (type == SurfaceType::Color || type == SurfaceType::Texture) { | ||||
|             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | ||||
|                                    texture.handle, 0); | ||||
|             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                    0, 0); | ||||
|         } else if (type == SurfaceType::Depth) { | ||||
|             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||||
|             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                    texture.handle, 0); | ||||
|             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||||
|         } else { | ||||
|             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||||
|             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                    texture.handle, 0); | ||||
|         } | ||||
|         glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), | ||||
|                      static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), | ||||
|                      tuple.format, tuple.type, &gl_buffer[buffer_offset]); | ||||
|     } | ||||
| 
 | ||||
|     glPixelStorei(GL_PACK_ROW_LENGTH, 0); | ||||
| } | ||||
| 
 | ||||
| enum MatchFlags { | ||||
|     Invalid = 1,      // Flag that can be applied to other match types, invalid matches require
 | ||||
|                       // validation before they can be used
 | ||||
|     Exact = 1 << 1,   // Surfaces perfectly match
 | ||||
|     SubRect = 1 << 2, // Surface encompasses params
 | ||||
|     Copy = 1 << 3,    // Surface we can copy from
 | ||||
|     Expand = 1 << 4,  // Surface that can expand params
 | ||||
|     TexCopy = 1 << 5  // Surface that will match a display transfer "texture copy" parameters
 | ||||
| }; | ||||
| 
 | ||||
| constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { | ||||
|     return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs)); | ||||
| } | ||||
| 
 | ||||
| /// Get the best surface match (and its match type) for the given flags
 | ||||
| template <MatchFlags find_flags> | ||||
| Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, | ||||
|                   ScaleMatch match_scale_type, | ||||
|                   boost::optional<SurfaceInterval> validate_interval = boost::none) { | ||||
|     Surface match_surface = nullptr; | ||||
|     bool match_valid = false; | ||||
|     u32 match_scale = 0; | ||||
|     SurfaceInterval match_interval{}; | ||||
| 
 | ||||
|     for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { | ||||
|         for (auto& surface : pair.second) { | ||||
|             const bool res_scale_matched = match_scale_type == ScaleMatch::Exact | ||||
|                                                ? (params.res_scale == surface->res_scale) | ||||
|                                                : (params.res_scale <= surface->res_scale); | ||||
|             bool is_valid = | ||||
|                 find_flags & MatchFlags::Copy ? true | ||||
|                                               : // validity will be checked in GetCopyableInterval
 | ||||
|                     surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); | ||||
| 
 | ||||
|             if (!(find_flags & MatchFlags::Invalid) && !is_valid) | ||||
|                 continue; | ||||
| 
 | ||||
|             auto IsMatch_Helper = [&](auto check_type, auto match_fn) { | ||||
|                 if (!(find_flags & check_type)) | ||||
|                     return; | ||||
| 
 | ||||
|                 bool matched; | ||||
|                 SurfaceInterval surface_interval; | ||||
|                 std::tie(matched, surface_interval) = match_fn(); | ||||
|                 if (!matched) | ||||
|                     return; | ||||
| 
 | ||||
|                 if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && | ||||
|                     surface->type != SurfaceType::Fill) | ||||
|                     return; | ||||
| 
 | ||||
|                 // Found a match, update only if this is better than the previous one
 | ||||
|                 auto UpdateMatch = [&] { | ||||
|                     match_surface = surface; | ||||
|                     match_valid = is_valid; | ||||
|                     match_scale = surface->res_scale; | ||||
|                     match_interval = surface_interval; | ||||
|                 }; | ||||
| 
 | ||||
|                 if (surface->res_scale > match_scale) { | ||||
|                     UpdateMatch(); | ||||
|                     return; | ||||
|                 } else if (surface->res_scale < match_scale) { | ||||
|                     return; | ||||
|                 } | ||||
| 
 | ||||
|                 if (is_valid && !match_valid) { | ||||
|                     UpdateMatch(); | ||||
|                     return; | ||||
|                 } else if (is_valid != match_valid) { | ||||
|                     return; | ||||
|                 } | ||||
| 
 | ||||
|                 if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { | ||||
|                     UpdateMatch(); | ||||
|                 } | ||||
|             }; | ||||
|             IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] { | ||||
|                 return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); | ||||
|             }); | ||||
|             IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] { | ||||
|                 return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); | ||||
|             }); | ||||
|             IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] { | ||||
|                 auto copy_interval = | ||||
|                     params.FromInterval(*validate_interval).GetCopyableInterval(surface); | ||||
|                 bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && | ||||
|                                surface->CanCopy(params, copy_interval); | ||||
|                 return std::make_pair(matched, copy_interval); | ||||
|             }); | ||||
|             IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] { | ||||
|                 return std::make_pair(surface->CanExpand(params), surface->GetInterval()); | ||||
|             }); | ||||
|             IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] { | ||||
|                 return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); | ||||
|             }); | ||||
|         } | ||||
|     } | ||||
|     return match_surface; | ||||
| } | ||||
| 
 | ||||
| void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, | ||||
|  |  | |||
|  | @ -13,6 +13,7 @@ | |||
| #pragma GCC diagnostic ignored "-Wunused-local-typedefs" | ||||
| #endif | ||||
| #include <boost/icl/interval_map.hpp> | ||||
| #include <boost/icl/interval_set.hpp> | ||||
| #ifdef __GNUC__ | ||||
| #pragma GCC diagnostic pop | ||||
| #endif | ||||
|  | @ -20,21 +21,37 @@ | |||
| #include "common/assert.h" | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/math_util.h" | ||||
| #include "core/hw/gpu.h" | ||||
| #include "video_core/regs_framebuffer.h" | ||||
| #include "video_core/regs_texturing.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| 
 | ||||
| namespace MathUtil { | ||||
| template <class T> | ||||
| struct Rectangle; | ||||
| } | ||||
| 
 | ||||
| struct CachedSurface; | ||||
| using Surface = std::shared_ptr<CachedSurface>; | ||||
| using SurfaceSet = std::set<Surface>; | ||||
| 
 | ||||
| using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>; | ||||
| using SurfaceRegions = boost::icl::interval_set<PAddr>; | ||||
| using SurfaceMap = boost::icl::interval_map<PAddr, Surface>; | ||||
| using SurfaceCache = boost::icl::interval_map<PAddr, SurfaceSet>; | ||||
| 
 | ||||
| struct CachedSurface { | ||||
| using SurfaceInterval = SurfaceCache::interval_type; | ||||
| static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && | ||||
|                   std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(), | ||||
|               "incorrect interval types"); | ||||
| 
 | ||||
| using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>; | ||||
| using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; | ||||
| 
 | ||||
| using PageMap = boost::icl::interval_map<u32, int>; | ||||
| 
 | ||||
| enum class ScaleMatch { | ||||
|     Exact,   // only accept same res scale
 | ||||
|     Upscale, // only allow higher scale than params
 | ||||
|     Ignore   // accept every scaled res
 | ||||
| }; | ||||
| 
 | ||||
| struct SurfaceParams { | ||||
|     enum class PixelFormat { | ||||
|         // First 5 formats are shared between textures and color buffers
 | ||||
|         RGBA8 = 0, | ||||
|  | @ -68,10 +85,11 @@ struct CachedSurface { | |||
|         Texture = 1, | ||||
|         Depth = 2, | ||||
|         DepthStencil = 3, | ||||
|         Invalid = 4, | ||||
|         Fill = 4, | ||||
|         Invalid = 5 | ||||
|     }; | ||||
| 
 | ||||
|     static constexpr unsigned int GetFormatBpp(CachedSurface::PixelFormat format) { | ||||
|     static constexpr unsigned int GetFormatBpp(PixelFormat format) { | ||||
|         constexpr std::array<unsigned int, 18> bpp_table = { | ||||
|             32, // RGBA8
 | ||||
|             24, // RGB8
 | ||||
|  | @ -93,8 +111,11 @@ struct CachedSurface { | |||
|             32, // D24S8
 | ||||
|         }; | ||||
| 
 | ||||
|         ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table)); | ||||
|         return bpp_table[(unsigned int)format]; | ||||
|         ASSERT(static_cast<size_t>(format) < bpp_table.size()); | ||||
|         return bpp_table[static_cast<size_t>(format)]; | ||||
|     } | ||||
|     unsigned int GetFormatBpp() const { | ||||
|         return GetFormatBpp(pixel_format); | ||||
|     } | ||||
| 
 | ||||
|     static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) { | ||||
|  | @ -162,31 +183,114 @@ struct CachedSurface { | |||
|         return SurfaceType::Invalid; | ||||
|     } | ||||
| 
 | ||||
|     /// Update the params "size", "end" and "type" from the already set "addr", "width", "height"
 | ||||
|     /// and "pixel_format"
 | ||||
|     void UpdateParams() { | ||||
|         if (stride == 0) { | ||||
|             stride = width; | ||||
|         } | ||||
|         type = GetFormatType(pixel_format); | ||||
|         size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) | ||||
|                          : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); | ||||
|         end = addr + size; | ||||
|     } | ||||
| 
 | ||||
|     SurfaceInterval GetInterval() const { | ||||
|         return SurfaceInterval::right_open(addr, end); | ||||
|     } | ||||
| 
 | ||||
|     // Returns the outer rectangle containing "interval"
 | ||||
|     SurfaceParams FromInterval(SurfaceInterval interval) const; | ||||
| 
 | ||||
|     SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const; | ||||
| 
 | ||||
|     // Returns the region of the biggest valid rectange within interval
 | ||||
|     SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; | ||||
| 
 | ||||
|     u32 GetScaledWidth() const { | ||||
|         return (u32)(width * res_scale_width); | ||||
|         return width * res_scale; | ||||
|     } | ||||
| 
 | ||||
|     u32 GetScaledHeight() const { | ||||
|         return (u32)(height * res_scale_height); | ||||
|         return height * res_scale; | ||||
|     } | ||||
| 
 | ||||
|     PAddr addr; | ||||
|     u32 size; | ||||
|     MathUtil::Rectangle<u32> GetRect() const { | ||||
|         return {0, height, width, 0}; | ||||
|     } | ||||
| 
 | ||||
|     PAddr min_valid; | ||||
|     PAddr max_valid; | ||||
|     MathUtil::Rectangle<u32> GetScaledRect() const { | ||||
|         return {0, GetScaledHeight(), GetScaledWidth(), 0}; | ||||
|     } | ||||
| 
 | ||||
|     u32 PixelsInBytes(u32 size) const { | ||||
|         return size * 8 / GetFormatBpp(pixel_format); | ||||
|     } | ||||
| 
 | ||||
|     u32 BytesInPixels(u32 pixels) const { | ||||
|         return pixels * GetFormatBpp(pixel_format) / 8; | ||||
|     } | ||||
| 
 | ||||
|     bool ExactMatch(const SurfaceParams& other_surface) const; | ||||
|     bool CanSubRect(const SurfaceParams& sub_surface) const; | ||||
|     bool CanExpand(const SurfaceParams& expanded_surface) const; | ||||
|     bool CanTexCopy(const SurfaceParams& texcopy_params) const; | ||||
| 
 | ||||
|     MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; | ||||
|     MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; | ||||
| 
 | ||||
|     PAddr addr = 0; | ||||
|     PAddr end = 0; | ||||
|     u32 size = 0; | ||||
| 
 | ||||
|     u32 width = 0; | ||||
|     u32 height = 0; | ||||
|     u32 stride = 0; | ||||
|     u16 res_scale = 1; | ||||
| 
 | ||||
|     bool is_tiled = false; | ||||
|     PixelFormat pixel_format = PixelFormat::Invalid; | ||||
|     SurfaceType type = SurfaceType::Invalid; | ||||
| }; | ||||
| 
 | ||||
| struct CachedSurface : SurfaceParams { | ||||
|     bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; | ||||
|     bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; | ||||
| 
 | ||||
|     bool IsRegionValid(SurfaceInterval interval) const { | ||||
|         return (invalid_regions.find(interval) == invalid_regions.end()); | ||||
|     } | ||||
| 
 | ||||
|     bool IsSurfaceFullyInvalid() const { | ||||
|         return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); | ||||
|     } | ||||
| 
 | ||||
|     SurfaceRegions invalid_regions; | ||||
| 
 | ||||
|     u32 fill_size = 0; /// Number of bytes to read from fill_data
 | ||||
|     std::array<u8, 4> fill_data; | ||||
| 
 | ||||
|     OGLTexture texture; | ||||
|     u32 width; | ||||
|     u32 height; | ||||
|     /// Stride between lines, in pixels. Only valid for images in linear format.
 | ||||
|     u32 pixel_stride = 0; | ||||
|     float res_scale_width = 1.f; | ||||
|     float res_scale_height = 1.f; | ||||
| 
 | ||||
|     bool is_tiled; | ||||
|     PixelFormat pixel_format; | ||||
|     bool dirty; | ||||
|     static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { | ||||
|         // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
 | ||||
|         return format == PixelFormat::Invalid | ||||
|                    ? 0 | ||||
|                    : (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) | ||||
|                          ? 4 | ||||
|                          : SurfaceParams::GetFormatBpp(format) / 8; | ||||
|     } | ||||
| 
 | ||||
|     std::unique_ptr<u8[]> gl_buffer; | ||||
|     size_t gl_buffer_size = 0; | ||||
| 
 | ||||
|     // Read/Write data in 3DS memory to/from gl_buffer
 | ||||
|     void LoadGLBuffer(PAddr load_start, PAddr load_end); | ||||
|     void FlushGLBuffer(PAddr flush_start, PAddr flush_end); | ||||
| 
 | ||||
|     // Upload/Download data in gl_buffer in/to this surface's texture
 | ||||
|     void UploadGLTexture(const MathUtil::Rectangle<u32>& rect); | ||||
|     void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect); | ||||
| }; | ||||
| 
 | ||||
| class RasterizerCacheOpenGL : NonCopyable { | ||||
|  |  | |||
|  | @ -144,7 +144,7 @@ public: | |||
|     OpenGLState(); | ||||
| 
 | ||||
|     /// Get the currently active OpenGL state
 | ||||
|     static OpenGLState& GetCurState() { | ||||
|     static OpenGLState GetCurState() { | ||||
|         return cur_state; | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue