From 31b125ef578dd5df4e289d1057154dd34f73cb19 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sat, 19 Jun 2021 00:55:13 -0400
Subject: [PATCH 1/3] astc: Various robustness enhancements for the gpu decoder

These changes should help in reducing crashes/drivers panics that may
occur due to synchronization issues between the shader completion and
later access of the decoded texture.
---
 src/video_core/host_shaders/astc_decoder.comp | 15 +++----
 .../renderer_opengl/util_shaders.cpp          |  5 +--
 .../renderer_vulkan/vk_compute_pass.cpp       | 39 ++++---------------
 src/video_core/textures/astc.cpp              |  2 +
 src/video_core/textures/astc.h                |  2 -
 5 files changed, 16 insertions(+), 47 deletions(-)

diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index eaba1b103..71327e233 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -14,9 +14,8 @@
 #define BINDING_6_TO_8_BUFFER 2
 #define BINDING_7_TO_8_BUFFER 3
 #define BINDING_8_TO_8_BUFFER 4
-#define BINDING_BYTE_TO_16_BUFFER 5
-#define BINDING_SWIZZLE_BUFFER 6
-#define BINDING_OUTPUT_IMAGE 7
+#define BINDING_SWIZZLE_BUFFER 5
+#define BINDING_OUTPUT_IMAGE 6
 
 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
 
@@ -29,7 +28,6 @@
 #define BINDING_6_TO_8_BUFFER 3
 #define BINDING_7_TO_8_BUFFER 4
 #define BINDING_8_TO_8_BUFFER 5
-#define BINDING_BYTE_TO_16_BUFFER 6
 #define BINDING_OUTPUT_IMAGE 0
 
 #endif
@@ -86,9 +84,6 @@ layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_
 layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 {
     uint REPLICATE_8_BIT_TO_8_TABLE[];
 };
-layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 {
-    uint REPLICATE_BYTE_TO_16_TABLE[];
-};
 
 layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
 
@@ -207,8 +202,7 @@ uint Replicate(uint val, uint num_bits, uint to_bit) {
 }
 
 uvec4 ReplicateByteTo16(uvec4 value) {
-    return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y],
-                 REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]);
+    return value * 0x101;
 }
 
 uint ReplicateBitTo7(uint value) {
@@ -1327,6 +1321,9 @@ void main() {
     offset += swizzle;
 
     const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1));
+    if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
+        return;
+    }
     uint block_index =
         pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x;
 
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 47fddcb6e..d57998cdc 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -83,7 +83,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
     static constexpr GLuint BINDING_6_TO_8_BUFFER = 3;
     static constexpr GLuint BINDING_7_TO_8_BUFFER = 4;
     static constexpr GLuint BINDING_8_TO_8_BUFFER = 5;
-    static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6;
 
     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
 
@@ -105,9 +104,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
     glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle,
                       offsetof(AstcBufferData, replicate_8_to_8),
                       sizeof(AstcBufferData::replicate_8_to_8));
-    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle,
-                      offsetof(AstcBufferData, replicate_byte_to_16),
-                      sizeof(AstcBufferData::replicate_byte_to_16));
 
     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
     glUniform2ui(1, tile_size.width, tile_size.height);
@@ -137,6 +133,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
 
         glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
     }
+    glMemoryBarrier(GL_ALL_BARRIER_BITS);
     program_manager.RestoreGuestCompute();
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index e11406e58..123bed794 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -40,9 +40,9 @@ constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
 constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2;
 constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3;
 constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4;
-constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5;
-constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6;
-constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7;
+constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 5;
+constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 6;
+constexpr size_t ASTC_NUM_BINDINGS = 7;
 
 VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
     return {
@@ -71,7 +71,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBinding
     }};
 }
 
-std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() {
+std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() {
     return {{
         {
             .binding = ASTC_BINDING_INPUT_BUFFER,
@@ -108,13 +108,6 @@ std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() {
             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
             .pImmutableSamplers = nullptr,
         },
-        {
-            .binding = ASTC_BINDING_BYTE_TO_16_BUFFER,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        },
         {
             .binding = ASTC_BINDING_SWIZZLE_BUFFER,
             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
@@ -143,7 +136,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
     };
 }
 
-std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() {
+std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
+BuildASTCPassDescriptorUpdateTemplateEntry() {
     return {{
         {
             .dstBinding = ASTC_BINDING_INPUT_BUFFER,
@@ -185,14 +179,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT
             .offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
             .stride = sizeof(DescriptorUpdateEntry),
         },
-        {
-            .dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER,
-            .dstArrayElement = 0,
-            .descriptorCount = 1,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry),
-            .stride = sizeof(DescriptorUpdateEntry),
-        },
         {
             .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
             .dstArrayElement = 0,
@@ -222,15 +208,6 @@ struct AstcPushConstants {
     u32 block_height_mask;
 };
 
-struct AstcBufferData {
-    decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE;
-    decltype(EncodingsValues) encoding_values = EncodingsValues;
-    decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
-    decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
-    decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
-    decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
-} constexpr ASTC_BUFFER_DATA;
-
 } // Anonymous namespace
 
 VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
@@ -517,9 +494,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
                                           sizeof(AstcBufferData::replicate_7_to_8));
         update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8),
                                           sizeof(AstcBufferData::replicate_8_to_8));
-        update_descriptor_queue.AddBuffer(*data_buffer,
-                                          offsetof(AstcBufferData, replicate_byte_to_16),
-                                          sizeof(AstcBufferData::replicate_byte_to_16));
         update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData),
                                           sizeof(SWIZZLE_TABLE));
         update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
@@ -569,6 +543,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
                                VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
     });
+    scheduler.Finish();
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 9b2177ebd..b6e2022f2 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -551,6 +551,8 @@ static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
         }
     }
 }
+
+static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
 static constexpr u32 ReplicateByteTo16(std::size_t value) {
     return REPLICATE_BYTE_TO_16_TABLE[value];
 }
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index c1c37dfe7..441e8eb04 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -116,7 +116,6 @@ constexpr auto MakeReplicateTable() {
     return table;
 }
 
-constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
 constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
 constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
 constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
@@ -126,7 +125,6 @@ struct AstcBufferData {
     decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
     decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
     decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
-    decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
 } constexpr ASTC_BUFFER_DATA;
 
 void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,

From ace20ba4a4774ae3c42f2ef5566c7113f3b980b3 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sat, 19 Jun 2021 10:56:13 -0400
Subject: [PATCH 2/3] astc_decoder.comp: Remove unnecessary LUT SSBOs

We can move them to instead be compile time constants within the shader.
---
 src/video_core/host_shaders/astc_decoder.comp | 35 ++++-----
 .../renderer_opengl/util_shaders.cpp          | 21 +-----
 .../renderer_vulkan/vk_compute_pass.cpp       | 74 +++----------------
 src/video_core/textures/astc.cpp              |  8 +-
 src/video_core/textures/astc.h                |  9 +--
 5 files changed, 34 insertions(+), 113 deletions(-)

diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 71327e233..c37f15bfd 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -11,11 +11,8 @@
 #define UNIFORM(n)
 #define BINDING_INPUT_BUFFER 0
 #define BINDING_ENC_BUFFER 1
-#define BINDING_6_TO_8_BUFFER 2
-#define BINDING_7_TO_8_BUFFER 3
-#define BINDING_8_TO_8_BUFFER 4
-#define BINDING_SWIZZLE_BUFFER 5
-#define BINDING_OUTPUT_IMAGE 6
+#define BINDING_SWIZZLE_BUFFER 2
+#define BINDING_OUTPUT_IMAGE 3
 
 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
 
@@ -25,9 +22,6 @@
 #define BINDING_SWIZZLE_BUFFER 0
 #define BINDING_INPUT_BUFFER 1
 #define BINDING_ENC_BUFFER 2
-#define BINDING_6_TO_8_BUFFER 3
-#define BINDING_7_TO_8_BUFFER 4
-#define BINDING_8_TO_8_BUFFER 5
 #define BINDING_OUTPUT_IMAGE 0
 
 #endif
@@ -74,16 +68,6 @@ layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
 layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues {
     EncodingData encoding_values[];
 };
-// ASTC Precompiled tables
-layout(binding = BINDING_6_TO_8_BUFFER, std430) readonly buffer REPLICATE_6_BIT_TO_8 {
-    uint REPLICATE_6_BIT_TO_8_TABLE[];
-};
-layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_TO_8 {
-    uint REPLICATE_7_BIT_TO_8_TABLE[];
-};
-layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 {
-    uint REPLICATE_8_BIT_TO_8_TABLE[];
-};
 
 layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
 
@@ -134,6 +118,19 @@ const uint REPLICATE_4_BIT_TO_6_TABLE[16] =
 const uint REPLICATE_5_BIT_TO_6_TABLE[32] =
     uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45,
            47, 49, 51, 53, 55, 57, 59, 61, 63);
+const uint REPLICATE_6_BIT_TO_8_TABLE[64] =
+    uint[](0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 65, 69, 73, 77, 81, 85, 89,
+           93, 97, 101, 105, 109, 113, 117, 121, 125, 130, 134, 138, 142, 146, 150, 154, 158, 162,
+           166, 170, 174, 178, 182, 186, 190, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235,
+           239, 243, 247, 251, 255);
+const uint REPLICATE_7_BIT_TO_8_TABLE[128] =
+    uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44,
+           46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88,
+           90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
+           129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159, 161, 163,
+           165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199,
+           201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235,
+           237, 239, 241, 243, 245, 247, 249, 251, 253, 255);
 
 // Input ASTC texture globals
 uint current_index = 0;
@@ -230,7 +227,7 @@ uint FastReplicateTo8(uint value, uint num_bits) {
     case 7:
         return REPLICATE_7_BIT_TO_8_TABLE[value];
     case 8:
-        return REPLICATE_8_BIT_TO_8_TABLE[value];
+        return value;
     }
     return Replicate(value, num_bits, 8);
 }
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index d57998cdc..7e32f49ca 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -69,7 +69,8 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
     swizzle_table_buffer.Create();
     astc_buffer.Create();
     glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
-    glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_BUFFER_DATA), &ASTC_BUFFER_DATA, 0);
+    glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES,
+                         0);
 }
 
 UtilShaders::~UtilShaders() = default;
@@ -79,11 +80,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
     static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
     static constexpr GLuint BINDING_INPUT_BUFFER = 1;
     static constexpr GLuint BINDING_ENC_BUFFER = 2;
-
-    static constexpr GLuint BINDING_6_TO_8_BUFFER = 3;
-    static constexpr GLuint BINDING_7_TO_8_BUFFER = 4;
-    static constexpr GLuint BINDING_8_TO_8_BUFFER = 5;
-
     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
 
     const Extent2D tile_size{
@@ -92,18 +88,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
     };
     program_manager.BindHostCompute(astc_decoder_program.handle);
     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
-    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle,
-                      offsetof(AstcBufferData, encoding_values),
-                      sizeof(AstcBufferData::encoding_values));
-    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, astc_buffer.handle,
-                      offsetof(AstcBufferData, replicate_6_to_8),
-                      sizeof(AstcBufferData::replicate_6_to_8));
-    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, astc_buffer.handle,
-                      offsetof(AstcBufferData, replicate_7_to_8),
-                      sizeof(AstcBufferData::replicate_7_to_8));
-    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle,
-                      offsetof(AstcBufferData, replicate_8_to_8),
-                      sizeof(AstcBufferData::replicate_8_to_8));
+    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
 
     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
     glUniform2ui(1, tile_size.width, tile_size.height);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 123bed794..205cd3b05 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -30,19 +30,16 @@
 namespace Vulkan {
 
 using Tegra::Texture::SWIZZLE_TABLE;
-using Tegra::Texture::ASTC::EncodingsValues;
+using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES;
 using namespace Tegra::Texture::ASTC;
 
 namespace {
 
 constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
 constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
-constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2;
-constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3;
-constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4;
-constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 5;
-constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 6;
-constexpr size_t ASTC_NUM_BINDINGS = 7;
+constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
+constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
+constexpr size_t ASTC_NUM_BINDINGS = 4;
 
 VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
     return {
@@ -87,27 +84,6 @@ std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorS
             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
             .pImmutableSamplers = nullptr,
         },
-        {
-            .binding = ASTC_BINDING_6_TO_8_BUFFER,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        },
-        {
-            .binding = ASTC_BINDING_7_TO_8_BUFFER,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        },
-        {
-            .binding = ASTC_BINDING_8_TO_8_BUFFER,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .descriptorCount = 1,
-            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-            .pImmutableSamplers = nullptr,
-        },
         {
             .binding = ASTC_BINDING_SWIZZLE_BUFFER,
             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
@@ -155,30 +131,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() {
             .offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry),
             .stride = sizeof(DescriptorUpdateEntry),
         },
-        {
-            .dstBinding = ASTC_BINDING_6_TO_8_BUFFER,
-            .dstArrayElement = 0,
-            .descriptorCount = 1,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .offset = ASTC_BINDING_6_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
-            .stride = sizeof(DescriptorUpdateEntry),
-        },
-        {
-            .dstBinding = ASTC_BINDING_7_TO_8_BUFFER,
-            .dstArrayElement = 0,
-            .descriptorCount = 1,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .offset = ASTC_BINDING_7_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
-            .stride = sizeof(DescriptorUpdateEntry),
-        },
-        {
-            .dstBinding = ASTC_BINDING_8_TO_8_BUFFER,
-            .dstArrayElement = 0,
-            .descriptorCount = 1,
-            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
-            .stride = sizeof(DescriptorUpdateEntry),
-        },
         {
             .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
             .dstArrayElement = 0,
@@ -400,7 +352,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
 ASTCDecoderPass::~ASTCDecoderPass() = default;
 
 void ASTCDecoderPass::MakeDataBuffer() {
-    constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_BUFFER_DATA) + sizeof(SWIZZLE_TABLE);
+    constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE);
     data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
         .pNext = nullptr,
@@ -414,9 +366,10 @@ void ASTCDecoderPass::MakeDataBuffer() {
     data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
 
     const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
-    std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA));
+    std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES,
+                sizeof(ASTC_ENCODINGS_VALUES));
     // Tack on the swizzle table at the end of the buffer
-    std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_BUFFER_DATA), &SWIZZLE_TABLE,
+    std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE,
                 sizeof(SWIZZLE_TABLE));
 
     scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
@@ -486,15 +439,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
         update_descriptor_queue.Acquire();
         update_descriptor_queue.AddBuffer(map.buffer, input_offset,
                                           image.guest_size_bytes - swizzle.buffer_offset);
-        update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values),
-                                          sizeof(AstcBufferData::encoding_values));
-        update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8),
-                                          sizeof(AstcBufferData::replicate_6_to_8));
-        update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8),
-                                          sizeof(AstcBufferData::replicate_7_to_8));
-        update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8),
-                                          sizeof(AstcBufferData::replicate_8_to_8));
-        update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData),
+        update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES));
+        update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
                                           sizeof(SWIZZLE_TABLE));
         update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
 
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index b6e2022f2..7b756ba41 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -269,7 +269,7 @@ static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result,
 static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange,
                                   u32 nValues) {
     // Determine encoding parameters
-    IntegerEncodedValue val = EncodingsValues[maxRange];
+    IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[maxRange];
 
     // Start decoding
     u32 nValsDecoded = 0;
@@ -310,7 +310,7 @@ struct TexelWeightParams {
             nIdxs *= 2;
         }
 
-        return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs);
+        return ASTC_ENCODINGS_VALUES[m_MaxWeight].GetBitLength(nIdxs);
     }
 
     u32 GetNumWeightValues() const {
@@ -755,12 +755,12 @@ static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, co
     // figure out the max value for each of them...
     u32 range = 256;
     while (--range > 0) {
-        IntegerEncodedValue val = EncodingsValues[range];
+        IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[range];
         u32 bitLength = val.GetBitLength(nValues);
         if (bitLength <= nBitsForColorData) {
             // Find the smallest possible range that matches the given encoding
             while (--range > 0) {
-                IntegerEncodedValue newval = EncodingsValues[range];
+                IntegerEncodedValue newval = ASTC_ENCODINGS_VALUES[range];
                 if (!newval.MatchesEncoding(val)) {
                     break;
                 }
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 441e8eb04..0229ae122 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -77,7 +77,7 @@ constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
     return encodings;
 }
 
-constexpr std::array<IntegerEncodedValue, 256> EncodingsValues = MakeEncodedValues();
+constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
 
 // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
 // is the same as [(num_bits - 1):0] and repeats all the way down.
@@ -120,13 +120,6 @@ constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
 constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
 constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
 
-struct AstcBufferData {
-    decltype(EncodingsValues) encoding_values = EncodingsValues;
-    decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
-    decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
-    decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
-} constexpr ASTC_BUFFER_DATA;
-
 void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
                 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
 

From 851c76233db1d6fab507b0ab3423284a79829ede Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sat, 19 Jun 2021 11:16:25 -0400
Subject: [PATCH 3/3] util_shaders: Specify ASTC decoder memory barrier bits

---
 src/video_core/renderer_opengl/util_shaders.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 7e32f49ca..abaf1ee6a 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -118,7 +118,12 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
 
         glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
     }
-    glMemoryBarrier(GL_ALL_BARRIER_BITS);
+    // Precautionary barrier to ensure the compute shader is done decoding prior to texture access.
+    // GL_TEXTURE_FETCH_BARRIER_BIT and GL_SHADER_IMAGE_ACCESS_BARRIER_BIT are used in a separate
+    // glMemoryBarrier call by the texture cache runtime
+    glMemoryBarrier(GL_UNIFORM_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT |
+                    GL_TEXTURE_UPDATE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT |
+                    GL_SHADER_STORAGE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
     program_manager.RestoreGuestCompute();
 }