diff --git a/src/core/core.cpp b/src/core/core.cpp
index 121092868..fa059a394 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -51,6 +51,7 @@
 #include "core/telemetry_session.h"
 #include "core/tools/freezer.h"
 #include "network/network.h"
+#include "video_core/host1x/host1x.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"
 
@@ -215,6 +216,7 @@ struct System::Impl {
 
         telemetry_session = std::make_unique<Core::TelemetrySession>();
 
+        host1x_core = std::make_unique<Tegra::Host1x::Host1x>();
         gpu_core = VideoCore::CreateGPU(emu_window, system);
         if (!gpu_core) {
             return SystemResultStatus::ErrorVideoCore;
@@ -373,6 +375,7 @@ struct System::Impl {
         app_loader.reset();
         audio_core.reset();
         gpu_core.reset();
+        host1x_core.reset();
         perf_stats.reset();
         kernel.Shutdown();
         memory.Reset();
@@ -450,6 +453,7 @@ struct System::Impl {
     /// AppLoader used to load the current executing application
     std::unique_ptr<Loader::AppLoader> app_loader;
     std::unique_ptr<Tegra::GPU> gpu_core;
+    std::unique_ptr<Tegra::Host1x::Host1x> host1x_core;
     std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
     std::unique_ptr<Core::DeviceMemory> device_memory;
     std::unique_ptr<AudioCore::AudioCore> audio_core;
@@ -668,6 +672,14 @@ const Tegra::GPU& System::GPU() const {
     return *impl->gpu_core;
 }
 
+Tegra::Host1x::Host1x& System::Host1x() {
+    return *impl->host1x_core;
+}
+
+const Tegra::Host1x::Host1x& System::Host1x() const {
+    return *impl->host1x_core;
+}
+
 Core::Hardware::InterruptManager& System::InterruptManager() {
     return *impl->interrupt_manager;
 }
diff --git a/src/core/core.h b/src/core/core.h
index 0ce3b1d60..e4168a921 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -74,6 +74,9 @@ class TimeManager;
 namespace Tegra {
 class DebugContext;
 class GPU;
+namespace Host1x {
+class Host1x;
+} // namespace Host1x
 } // namespace Tegra
 
 namespace VideoCore {
@@ -260,6 +263,12 @@ public:
     /// Gets an immutable reference to the GPU interface.
     [[nodiscard]] const Tegra::GPU& GPU() const;
 
+    /// Gets a mutable reference to the Host1x interface
+    [[nodiscard]] Tegra::Host1x::Host1x& Host1x();
+
+    /// Gets an immutable reference to the Host1x interface.
+    [[nodiscard]] const Tegra::Host1x::Host1x& Host1x() const;
+
     /// Gets a mutable reference to the renderer.
     [[nodiscard]] VideoCore::RendererBase& Renderer();
 
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index b1c0e9eb2..e6a976714 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -50,7 +50,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
                                                stride, format, transform, crop_rect};
 
     system.GetPerfStats().EndSystemFrame();
-    system.GPU().SwapBuffers(&framebuffer);
+    system.GPU().RequestSwapBuffers(&framebuffer, nullptr, 0);
     system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
     system.GetPerfStats().BeginSystemFrame();
 }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index 54074af75..ffe42d423 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -18,6 +18,7 @@
 #include "core/hle/service/nvdrv/core/syncpoint_manager.h"
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/host1x.h"
 
 namespace Service::Nvidia::Devices {
 
@@ -129,7 +130,7 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
         return NvResult::Success;
     }
 
-    auto& gpu = system.GPU();
+    auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager();
     const u32 target_value = params.fence.value;
 
     auto lock = NvEventsLock();
@@ -149,7 +150,7 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
         if (events[slot].fails > 2) {
             {
                 auto lk = system.StallProcesses();
-                gpu.WaitFence(fence_id, target_value);
+                host1x_syncpoint_manager.WaitHost(fence_id, target_value);
                 system.UnstallProcesses();
             }
             params.value.raw = target_value;
@@ -198,7 +199,15 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
     }
     params.value.raw |= slot;
 
-    gpu.RegisterSyncptInterrupt(fence_id, target_value);
+    event.wait_handle =
+        host1x_syncpoint_manager.RegisterHostAction(fence_id, target_value, [this, slot]() {
+            auto& event = events[slot];
+            if (event.status.exchange(EventState::Signalling, std::memory_order_acq_rel) ==
+                EventState::Waiting) {
+                event.kevent->GetWritableEvent().Signal();
+            }
+            event.status.store(EventState::Signalled, std::memory_order_release);
+        });
     return NvResult::Timeout;
 }
 
@@ -288,8 +297,10 @@ NvResult nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::v
     auto& event = events[event_id];
     if (event.status.exchange(EventState::Cancelling, std::memory_order_acq_rel) ==
         EventState::Waiting) {
-        system.GPU().CancelSyncptInterrupt(event.assigned_syncpt, event.assigned_value);
+        auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager();
+        host1x_syncpoint_manager.DeregisterHostAction(event.assigned_syncpt, event.wait_handle);
         syncpoint_manager.RefreshSyncpoint(event.assigned_syncpt);
+        event.wait_handle = {};
     }
     event.fails++;
     event.status.store(EventState::Cancelled, std::memory_order_release);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index d56aea405..136a1e925 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -11,6 +11,7 @@
 #include "common/common_types.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
+#include "video_core/host1x/syncpoint_manager.h"
 
 namespace Service::Nvidia::NvCore {
 class Container;
@@ -78,6 +79,9 @@ private:
         // Tells if an NVEvent is registered or not
         bool registered{};
 
+        // Used for waiting on a syncpoint & canceling it.
+        Tegra::Host1x::SyncpointManager::ActionHandle wait_handle{};
+
         bool IsBeingUsed() {
             const auto current_status = status.load(std::memory_order_acquire);
             return current_status == EventState::Waiting ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 38d45cb79..db3e266ad 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -210,10 +210,10 @@ NvResult nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::ve
 
 static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
     return {
-        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
                                   Tegra::SubmissionMode::Increasing),
         {fence.value},
-        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
                                   Tegra::SubmissionMode::Increasing),
         BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
     };
@@ -222,12 +222,12 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
 static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence,
                                                                    u32 add_increment) {
     std::vector<Tegra::CommandHeader> result{
-        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
                                   Tegra::SubmissionMode::Increasing),
         {}};
 
     for (u32 count = 0; count < add_increment; ++count) {
-        result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+        result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
                                                       Tegra::SubmissionMode::Increasing));
         result.emplace_back(
             BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
@@ -239,7 +239,7 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence
 static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence,
                                                                           u32 add_increment) {
     std::vector<Tegra::CommandHeader> result{
-        Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1,
                                   Tegra::SubmissionMode::Increasing),
         {}};
     const std::vector<Tegra::CommandHeader> increment{
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 8c3013f83..aa112021d 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -24,6 +24,8 @@
 #include "core/hle/service/vi/layer/vi_layer.h"
 #include "core/hle/service/vi/vi_results.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/syncpoint_manager.h"
 
 namespace Service::NVFlinger {
 
@@ -267,12 +269,12 @@ void NVFlinger::Compose() {
             return; // We are likely shutting down
         }
 
-        auto& gpu = system.GPU();
+        auto& syncpoint_manager = system.Host1x().GetSyncpointManager();
         const auto& multi_fence = buffer.fence;
         guard->unlock();
         for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
             const auto& fence = multi_fence.fences[fence_id];
-            gpu.WaitFence(fence.id, fence.value);
+            syncpoint_manager.WaitGuest(fence.id, fence.value);
         }
         guard->lock();
 
@@ -284,6 +286,7 @@ void NVFlinger::Compose() {
         auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd);
         ASSERT(nvdisp);
 
+        guard->unlock();
         Common::Rectangle<int> crop_rect{
             static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()),
             static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())};
@@ -292,6 +295,8 @@ void NVFlinger::Compose() {
                      igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(),
                      static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect);
 
+        guard->lock();
+
         swap_interval = buffer.swap_interval;
 
         auto fence = android::Fence::NoFence();
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 35faa70a0..723f9b67c 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -4,7 +4,7 @@
 add_subdirectory(host_shaders)
 
 if(LIBVA_FOUND)
-    set_source_files_properties(command_classes/codecs/codec.cpp
+    set_source_files_properties(host1x/codecs/codec.cpp
         PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
     list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
 endif()
@@ -15,24 +15,6 @@ add_library(video_core STATIC
     buffer_cache/buffer_cache.h
     cdma_pusher.cpp
     cdma_pusher.h
-    command_classes/codecs/codec.cpp
-    command_classes/codecs/codec.h
-    command_classes/codecs/h264.cpp
-    command_classes/codecs/h264.h
-    command_classes/codecs/vp8.cpp
-    command_classes/codecs/vp8.h
-    command_classes/codecs/vp9.cpp
-    command_classes/codecs/vp9.h
-    command_classes/codecs/vp9_types.h
-    command_classes/host1x.cpp
-    command_classes/host1x.h
-    command_classes/nvdec.cpp
-    command_classes/nvdec.h
-    command_classes/nvdec_common.h
-    command_classes/sync_manager.cpp
-    command_classes/sync_manager.h
-    command_classes/vic.cpp
-    command_classes/vic.h
     compatible_formats.cpp
     compatible_formats.h
     control/channel_state.cpp
@@ -63,6 +45,26 @@ add_library(video_core STATIC
     engines/puller.cpp
     engines/puller.h
     framebuffer_config.h
+    host1x/codecs/codec.cpp
+    host1x/codecs/codec.h
+    host1x/codecs/h264.cpp
+    host1x/codecs/h264.h
+    host1x/codecs/vp8.cpp
+    host1x/codecs/vp8.h
+    host1x/codecs/vp9.cpp
+    host1x/codecs/vp9.h
+    host1x/codecs/vp9_types.h
+    host1x/control.cpp
+    host1x/control.h
+    host1x/nvdec.cpp
+    host1x/nvdec.h
+    host1x/nvdec_common.h
+    host1x/sync_manager.cpp
+    host1x/sync_manager.h
+    host1x/syncpoint_manager.cpp
+    host1x/syncpoint_manager.h
+    host1x/vic.cpp
+    host1x/vic.h
     macro/macro.cpp
     macro/macro.h
     macro/macro_hle.cpp
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index 8e890a85e..148126347 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -2,20 +2,22 @@
 // SPDX-License-Identifier: MIT
 
 #include <bit>
-#include "command_classes/host1x.h"
-#include "command_classes/nvdec.h"
-#include "command_classes/vic.h"
 #include "video_core/cdma_pusher.h"
-#include "video_core/command_classes/sync_manager.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/control.h"
+#include "video_core/host1x/nvdec.h"
+#include "video_core/host1x/nvdec_common.h"
+#include "video_core/host1x/sync_manager.h"
+#include "video_core/host1x/vic.h"
+#include "video_core/memory_manager.h"
 
 namespace Tegra {
 CDmaPusher::CDmaPusher(GPU& gpu_)
-    : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
-      vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
-      host1x_processor(std::make_unique<Host1x>(gpu)),
-      sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
+    : gpu{gpu_}, nvdec_processor(std::make_shared<Host1x::Nvdec>(gpu)),
+      vic_processor(std::make_unique<Host1x::Vic>(gpu, nvdec_processor)),
+      host1x_processor(std::make_unique<Host1x::Control>(gpu)),
+      sync_manager(std::make_unique<Host1x::SyncptIncrManager>(gpu)) {}
 
 CDmaPusher::~CDmaPusher() = default;
 
@@ -109,16 +111,17 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
         case ThiMethod::SetMethod1:
             LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
                       static_cast<u32>(vic_thi_state.method_0), data);
-            vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), data);
+            vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0),
+                                         data);
             break;
         default:
             break;
         }
         break;
-    case ChClassId::Host1x:
+    case ChClassId::Control:
         // This device is mainly for syncpoint synchronization
         LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
-        host1x_processor->ProcessMethod(static_cast<Host1x::Method>(offset), data);
+        host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data);
         break;
     default:
         UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index d6ffef95f..de17c2082 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -13,10 +13,13 @@
 namespace Tegra {
 
 class GPU;
-class Host1x;
+
+namespace Host1x {
+class Control;
 class Nvdec;
 class SyncptIncrManager;
 class Vic;
+} // namespace Host1x
 
 enum class ChSubmissionMode : u32 {
     SetClass = 0,
@@ -30,7 +33,7 @@ enum class ChSubmissionMode : u32 {
 
 enum class ChClassId : u32 {
     NoClass = 0x0,
-    Host1x = 0x1,
+    Control = 0x1,
     VideoEncodeMpeg = 0x20,
     VideoEncodeNvEnc = 0x21,
     VideoStreamingVi = 0x30,
@@ -102,10 +105,10 @@ private:
     void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument);
 
     GPU& gpu;
-    std::shared_ptr<Tegra::Nvdec> nvdec_processor;
-    std::unique_ptr<Tegra::Vic> vic_processor;
-    std::unique_ptr<Tegra::Host1x> host1x_processor;
-    std::unique_ptr<SyncptIncrManager> sync_manager;
+    std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
+    std::unique_ptr<Tegra::Host1x::Vic> vic_processor;
+    std::unique_ptr<Tegra::Host1x::Control> host1x_processor;
+    std::unique_ptr<Host1x::SyncptIncrManager> sync_manager;
     ChClassId current_class{};
     ThiRegisters vic_thi_state{};
     ThiRegisters nvdec_thi_state{};
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
index 67803fe94..3613c4992 100644
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -1,5 +1,5 @@
 // Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
+// Licensed under GPLv3 or any later version
 // Refer to the license.txt file included.
 
 #include "common/assert.h"
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h
index 82808a6b8..08a7591e1 100644
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@@ -1,5 +1,5 @@
 // Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
+// Licensed under GPLv3 or any later version
 // Refer to the license.txt file included.
 
 #pragma once
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
index 31d80e8b7..dbf833de7 100644
--- a/src/video_core/control/channel_state_cache.h
+++ b/src/video_core/control/channel_state_cache.h
@@ -1,3 +1,7 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv3 or any later version
+// Refer to the license.txt file included.
+
 #pragma once
 
 #include <deque>
diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp
index e1abcb188..a9bb00aa7 100644
--- a/src/video_core/control/scheduler.cpp
+++ b/src/video_core/control/scheduler.cpp
@@ -1,5 +1,5 @@
 // Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
+// Licensed under GPLv3 or any later version
 // Refer to the license.txt file included.
 
 #include <memory>
diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h
index 802e9caff..c1a773946 100644
--- a/src/video_core/control/scheduler.h
+++ b/src/video_core/control/scheduler.h
@@ -1,5 +1,5 @@
 // Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
+// Licensed under GPLv3 or any later version
 // Refer to the license.txt file included.
 
 #pragma once
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index fd7c936c4..938f0f11c 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -37,24 +37,32 @@ enum class SubmissionMode : u32 {
 // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
 // So the values you see in docs might be multiplied by 4.
+// Register documentation:
+// https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/cla26f.h
+//
+// Register Description (approx):
+// https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
 enum class BufferMethods : u32 {
     BindObject = 0x0,
+    Illegal = 0x1,
     Nop = 0x2,
     SemaphoreAddressHigh = 0x4,
     SemaphoreAddressLow = 0x5,
-    SemaphoreSequence = 0x6,
-    SemaphoreTrigger = 0x7,
-    NotifyIntr = 0x8,
+    SemaphoreSequencePayload = 0x6,
+    SemaphoreOperation = 0x7,
+    NonStallInterrupt = 0x8,
     WrcacheFlush = 0x9,
-    Unk28 = 0xA,
-    UnkCacheFlush = 0xB,
+    MemOpA = 0xA,
+    MemOpB = 0xB,
+    MemOpC = 0xC,
+    MemOpD = 0xD,
     RefCnt = 0x14,
     SemaphoreAcquire = 0x1A,
     SemaphoreRelease = 0x1B,
-    FenceValue = 0x1C,
-    FenceAction = 0x1D,
-    WaitForInterrupt = 0x1E,
-    Unk7c = 0x1F,
+    SyncpointPayload = 0x1C,
+    SyncpointOperation = 0x1D,
+    WaitForIdle = 0x1E,
+    CRCCheck = 0x1F,
     Yield = 0x20,
     NonPullerMethods = 0x40,
 };
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 3866c8746..8c17639e4 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -68,11 +68,6 @@ void Puller::ProcessFenceActionMethod() {
     }
 }
 
-void Puller::ProcessWaitForInterruptMethod() {
-    // TODO(bunnei) ImplementMe
-    LOG_WARNING(HW_GPU, "(STUBBED) called");
-}
-
 void Puller::ProcessSemaphoreTriggerMethod() {
     const auto semaphoreOperationMask = 0xF;
     const auto op =
@@ -91,29 +86,33 @@ void Puller::ProcessSemaphoreTriggerMethod() {
         block.timestamp = gpu.GetTicks();
         memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
     } else {
-        const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
-        if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
-            (op == GpuSemaphoreOperation::AcquireGequal &&
-             static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
-            (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
-            // Nothing to do in this case
-        } else {
+        do {
+            const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
             regs.acquire_source = true;
             regs.acquire_value = regs.semaphore_sequence;
             if (op == GpuSemaphoreOperation::AcquireEqual) {
                 regs.acquire_active = true;
                 regs.acquire_mode = false;
+                if (word != regs.acquire_value) {
+                    std::this_thread::sleep_for(std::chrono::milliseconds(1));
+                    continue;
+                }
             } else if (op == GpuSemaphoreOperation::AcquireGequal) {
                 regs.acquire_active = true;
                 regs.acquire_mode = true;
+                if (word < regs.acquire_value) {
+                    std::this_thread::sleep_for(std::chrono::milliseconds(1));
+                    continue;
+                }
             } else if (op == GpuSemaphoreOperation::AcquireMask) {
-                // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
-                // semaphore_sequence, gives a non-0 result
-                LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
+                if (word & regs.semaphore_sequence == 0) {
+                    std::this_thread::sleep_for(std::chrono::milliseconds(1));
+                    continue;
+                }
             } else {
                 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
             }
-        }
+        } while (false);
     }
 }
 
@@ -124,6 +123,7 @@ void Puller::ProcessSemaphoreRelease() {
 void Puller::ProcessSemaphoreAcquire() {
     const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
     const auto value = regs.semaphore_acquire;
+    std::this_thread::sleep_for(std::chrono::milliseconds(5));
     if (word != value) {
         regs.acquire_active = true;
         regs.acquire_value = value;
@@ -146,32 +146,39 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
     case BufferMethods::Nop:
     case BufferMethods::SemaphoreAddressHigh:
     case BufferMethods::SemaphoreAddressLow:
-    case BufferMethods::SemaphoreSequence:
-    case BufferMethods::UnkCacheFlush:
+    case BufferMethods::SemaphoreSequencePayload:
     case BufferMethods::WrcacheFlush:
-    case BufferMethods::FenceValue:
+    case BufferMethods::SyncpointPayload:
         break;
     case BufferMethods::RefCnt:
         rasterizer->SignalReference();
         break;
-    case BufferMethods::FenceAction:
+    case BufferMethods::SyncpointOperation:
         ProcessFenceActionMethod();
         break;
-    case BufferMethods::WaitForInterrupt:
-        ProcessWaitForInterruptMethod();
+    case BufferMethods::WaitForIdle:
+        rasterizer->WaitForIdle();
         break;
-    case BufferMethods::SemaphoreTrigger: {
+    case BufferMethods::SemaphoreOperation: {
         ProcessSemaphoreTriggerMethod();
         break;
     }
-    case BufferMethods::NotifyIntr: {
-        // TODO(Kmather73): Research and implement this method.
-        LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
+    case BufferMethods::NonStallInterrupt: {
+        LOG_ERROR(HW_GPU, "Special puller engine method NonStallInterrupt not implemented");
         break;
     }
-    case BufferMethods::Unk28: {
-        // TODO(Kmather73): Research and implement this method.
-        LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
+    case BufferMethods::MemOpA: {
+        LOG_ERROR(HW_GPU, "Memory Operation A");
+        break;
+    }
+    case BufferMethods::MemOpB: {
+        // Implement this better.
+        rasterizer->SyncGuestHost();
+        break;
+    }
+    case BufferMethods::MemOpC:
+    case BufferMethods::MemOpD: {
+        LOG_ERROR(HW_GPU, "Memory Operation C,D");
         break;
     }
     case BufferMethods::SemaphoreAcquire: {
diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h
index d948ec790..b4619e9a8 100644
--- a/src/video_core/engines/puller.h
+++ b/src/video_core/engines/puller.h
@@ -141,7 +141,6 @@ private:
     void ProcessSemaphoreAcquire();
     void ProcessSemaphoreRelease();
     void ProcessSemaphoreTriggerMethod();
-    void ProcessWaitForInterruptMethod();
     [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
 
     /// Mapping of command subchannels to their bound engine ids
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index d658e038d..03a70e5e0 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -11,6 +11,8 @@
 #include "common/common_types.h"
 #include "video_core/delayed_destruction_ring.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/syncpoint_manager.h"
 #include "video_core/rasterizer_interface.h"
 
 namespace VideoCommon {
@@ -72,6 +74,7 @@ public:
     }
 
     void SignalSyncPoint(u32 value) {
+        syncpoint_manager.IncrementGuest(value);
         TryReleasePendingFences();
         const bool should_flush = ShouldFlush();
         CommitAsyncFlushes();
@@ -96,7 +99,7 @@ public:
                 auto payload = current_fence->GetPayload();
                 std::memcpy(address, &payload, sizeof(payload));
             } else {
-                gpu.IncrementSyncPoint(current_fence->GetPayload());
+                syncpoint_manager.IncrementHost(current_fence->GetPayload());
             }
             PopFence();
         }
@@ -106,8 +109,8 @@ protected:
     explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
                           TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
                           TQueryCache& query_cache_)
-        : rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_},
-          buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
+        : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()},
+          texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
 
     virtual ~FenceManager() = default;
 
@@ -125,6 +128,7 @@ protected:
 
     VideoCore::RasterizerInterface& rasterizer;
     Tegra::GPU& gpu;
+    Tegra::Host1x::SyncpointManager& syncpoint_manager;
     TTextureCache& texture_cache;
     TTBufferCache& buffer_cache;
     TQueryCache& query_cache;
@@ -142,7 +146,7 @@ private:
                 const auto payload = current_fence->GetPayload();
                 std::memcpy(address, &payload, sizeof(payload));
             } else {
-                gpu.IncrementSyncPoint(current_fence->GetPayload());
+                syncpoint_manager.IncrementHost(current_fence->GetPayload());
             }
             PopFence();
         }
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index eebd7f3ff..1097db08a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -28,6 +28,8 @@
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/syncpoint_manager.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_base.h"
 #include "video_core/shader_notify.h"
@@ -38,7 +40,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
 
 struct GPU::Impl {
     explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
-        : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_},
+        : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
           shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
           gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
 
@@ -115,31 +117,35 @@ struct GPU::Impl {
     }
 
     /// Request a host GPU memory flush from the CPU.
-    [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) {
-        std::unique_lock lck{flush_request_mutex};
-        const u64 fence = ++last_flush_fence;
-        flush_requests.emplace_back(fence, addr, size);
+    template <typename Func>
+    [[nodiscard]] u64 RequestSyncOperation(Func&& action) {
+        std::unique_lock lck{sync_request_mutex};
+        const u64 fence = ++last_sync_fence;
+        sync_requests.emplace_back(action);
         return fence;
     }
 
     /// Obtains current flush request fence id.
-    [[nodiscard]] u64 CurrentFlushRequestFence() const {
-        return current_flush_fence.load(std::memory_order_relaxed);
+    [[nodiscard]] u64 CurrentSyncRequestFence() const {
+        return current_sync_fence.load(std::memory_order_relaxed);
+    }
+
+    void WaitForSyncOperation(const u64 fence) {
+        std::unique_lock lck{sync_request_mutex};
+        sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
     }
 
     /// Tick pending requests within the GPU.
     void TickWork() {
-        std::unique_lock lck{flush_request_mutex};
-        while (!flush_requests.empty()) {
-            auto& request = flush_requests.front();
-            const u64 fence = request.fence;
-            const VAddr addr = request.addr;
-            const std::size_t size = request.size;
-            flush_requests.pop_front();
-            flush_request_mutex.unlock();
-            rasterizer->FlushRegion(addr, size);
-            current_flush_fence.store(fence);
-            flush_request_mutex.lock();
+        std::unique_lock lck{sync_request_mutex};
+        while (!sync_requests.empty()) {
+            auto request = std::move(sync_requests.front());
+            sync_requests.pop_front();
+            sync_request_mutex.unlock();
+            request();
+            current_sync_fence.fetch_add(1, std::memory_order_release);
+            sync_request_mutex.lock();
+            sync_request_cv.notify_all();
         }
     }
 
@@ -207,78 +213,26 @@ struct GPU::Impl {
 
     /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
     void WaitFence(u32 syncpoint_id, u32 value) {
-        // Synced GPU, is always in sync
-        if (!is_async) {
-            return;
-        }
         if (syncpoint_id == UINT32_MAX) {
-            // TODO: Research what this does.
-            LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
             return;
         }
         MICROPROFILE_SCOPE(GPU_wait);
-        std::unique_lock lock{sync_mutex};
-        sync_cv.wait(lock, [=, this] {
-            if (shutting_down.load(std::memory_order_relaxed)) {
-                // We're shutting down, ensure no threads continue to wait for the next syncpoint
-                return true;
-            }
-            return syncpoints.at(syncpoint_id).load() >= value;
-        });
+        host1x.GetSyncpointManager().WaitHost(syncpoint_id, value);
     }
 
     void IncrementSyncPoint(u32 syncpoint_id) {
-        auto& syncpoint = syncpoints.at(syncpoint_id);
-        syncpoint++;
-        std::scoped_lock lock{sync_mutex};
-        sync_cv.notify_all();
-        auto& interrupt = syncpt_interrupts.at(syncpoint_id);
-        if (!interrupt.empty()) {
-            u32 value = syncpoint.load();
-            auto it = interrupt.begin();
-            while (it != interrupt.end()) {
-                if (value >= *it) {
-                    TriggerCpuInterrupt(syncpoint_id, *it);
-                    it = interrupt.erase(it);
-                    continue;
-                }
-                it++;
-            }
-        }
+        host1x.GetSyncpointManager().IncrementHost(syncpoint_id);
     }
 
     [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const {
-        return syncpoints.at(syncpoint_id).load();
+        return host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id);
     }
 
     void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
-        std::scoped_lock lock{sync_mutex};
-        u32 current_value = syncpoints.at(syncpoint_id).load();
-        if ((static_cast<s32>(current_value) - static_cast<s32>(value)) >= 0) {
+        auto& syncpoint_manager = host1x.GetSyncpointManager();
+        syncpoint_manager.RegisterHostAction(syncpoint_id, value, [this, syncpoint_id, value]() {
             TriggerCpuInterrupt(syncpoint_id, value);
-            return;
-        }
-        auto& interrupt = syncpt_interrupts.at(syncpoint_id);
-        bool contains = std::any_of(interrupt.begin(), interrupt.end(),
-                                    [value](u32 in_value) { return in_value == value; });
-        if (contains) {
-            return;
-        }
-        interrupt.emplace_back(value);
-    }
-
-    [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
-        std::scoped_lock lock{sync_mutex};
-        auto& interrupt = syncpt_interrupts.at(syncpoint_id);
-        const auto iter =
-            std::find_if(interrupt.begin(), interrupt.end(),
-                         [value](u32 interrupt_value) { return value == interrupt_value; });
-
-        if (iter == interrupt.end()) {
-            return false;
-        }
-        interrupt.erase(iter);
-        return true;
+        });
     }
 
     [[nodiscard]] u64 GetTicks() const {
@@ -387,8 +341,48 @@ struct GPU::Impl {
         interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
     }
 
+    void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
+                            Service::Nvidia::NvFence* fences, size_t num_fences) {
+        size_t current_request_counter{};
+        {
+            std::unique_lock<std::mutex> lk(request_swap_mutex);
+            if (free_swap_counters.empty()) {
+                current_request_counter = request_swap_counters.size();
+                request_swap_counters.emplace_back(num_fences);
+            } else {
+                current_request_counter = free_swap_counters.front();
+                request_swap_counters[current_request_counter] = num_fences;
+                free_swap_counters.pop_front();
+            }
+        }
+        const auto wait_fence =
+            RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] {
+                auto& syncpoint_manager = host1x.GetSyncpointManager();
+                if (num_fences == 0) {
+                    renderer->SwapBuffers(framebuffer);
+                }
+                const auto executer = [this, current_request_counter,
+                                       framebuffer_copy = *framebuffer]() {
+                    {
+                        std::unique_lock<std::mutex> lk(request_swap_mutex);
+                        if (--request_swap_counters[current_request_counter] != 0) {
+                            return;
+                        }
+                        free_swap_counters.push_back(current_request_counter);
+                    }
+                    renderer->SwapBuffers(&framebuffer_copy);
+                };
+                for (size_t i = 0; i < num_fences; i++) {
+                    syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer);
+                }
+            });
+        gpu_thread.TickGPU();
+        WaitForSyncOperation(wait_fence);
+    }
+
     GPU& gpu;
     Core::System& system;
+    Host1x::Host1x& host1x;
 
     std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
     std::unique_ptr<VideoCore::RendererBase> renderer;
@@ -411,18 +405,11 @@ struct GPU::Impl {
 
     std::condition_variable sync_cv;
 
-    struct FlushRequest {
-        explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
-            : fence{fence_}, addr{addr_}, size{size_} {}
-        u64 fence;
-        VAddr addr;
-        std::size_t size;
-    };
-
-    std::list<FlushRequest> flush_requests;
-    std::atomic<u64> current_flush_fence{};
-    u64 last_flush_fence{};
-    std::mutex flush_request_mutex;
+    std::list<std::function<void(void)>> sync_requests;
+    std::atomic<u64> current_sync_fence{};
+    u64 last_sync_fence{};
+    std::mutex sync_request_mutex;
+    std::condition_variable sync_request_cv;
 
     const bool is_async;
 
@@ -433,6 +420,10 @@ struct GPU::Impl {
     std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
     Tegra::Control::ChannelState* current_channel;
     s32 bound_channel{-1};
+
+    std::deque<size_t> free_swap_counters;
+    std::deque<size_t> request_swap_counters;
+    std::mutex request_swap_mutex;
 };
 
 GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
@@ -477,17 +468,32 @@ void GPU::OnCommandListEnd() {
 }
 
 u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
-    return impl->RequestFlush(addr, size);
+    return impl->RequestSyncOperation(
+        [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); });
 }
 
-u64 GPU::CurrentFlushRequestFence() const {
-    return impl->CurrentFlushRequestFence();
+u64 GPU::CurrentSyncRequestFence() const {
+    return impl->CurrentSyncRequestFence();
+}
+
+void GPU::WaitForSyncOperation(u64 fence) {
+    return impl->WaitForSyncOperation(fence);
 }
 
 void GPU::TickWork() {
     impl->TickWork();
 }
 
+/// Gets a mutable reference to the Host1x interface
+Host1x::Host1x& GPU::Host1x() {
+    return impl->host1x;
+}
+
+/// Gets an immutable reference to the Host1x interface.
+const Host1x::Host1x& GPU::Host1x() const {
+    return impl->host1x;
+}
+
 Engines::Maxwell3D& GPU::Maxwell3D() {
     return impl->Maxwell3D();
 }
@@ -536,6 +542,11 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
     return impl->ShaderNotify();
 }
 
+void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
+                             Service::Nvidia::NvFence* fences, size_t num_fences) {
+    impl->RequestSwapBuffers(framebuffer, fences, num_fences);
+}
+
 void GPU::WaitFence(u32 syncpoint_id, u32 value) {
     impl->WaitFence(syncpoint_id, value);
 }
@@ -552,10 +563,6 @@ void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
     impl->RegisterSyncptInterrupt(syncpoint_id, value);
 }
 
-bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
-    return impl->CancelSyncptInterrupt(syncpoint_id, value);
-}
-
 u64 GPU::GetTicks() const {
     return impl->GetTicks();
 }
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 7e84b0d2f..c1a538257 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -93,6 +93,10 @@ namespace Control {
 struct ChannelState;
 }
 
+namespace Host1x {
+class Host1x;
+} // namespace Host1x
+
 class MemoryManager;
 
 class GPU final {
@@ -124,11 +128,19 @@ public:
     [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
 
     /// Obtains current flush request fence id.
-    [[nodiscard]] u64 CurrentFlushRequestFence() const;
+    [[nodiscard]] u64 CurrentSyncRequestFence() const;
+
+    void WaitForSyncOperation(u64 fence);
 
     /// Tick pending requests within the GPU.
     void TickWork();
 
+    /// Gets a mutable reference to the Host1x interface
+    [[nodiscard]] Host1x::Host1x& Host1x();
+
+    /// Gets an immutable reference to the Host1x interface.
+    [[nodiscard]] const Host1x::Host1x& Host1x() const;
+
     /// Returns a reference to the Maxwell3D GPU engine.
     [[nodiscard]] Engines::Maxwell3D& Maxwell3D();
 
@@ -174,8 +186,6 @@ public:
 
     void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
 
-    bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
-
     [[nodiscard]] u64 GetTicks() const;
 
     [[nodiscard]] bool IsAsync() const;
@@ -184,6 +194,9 @@ public:
 
     void RendererFrameEndNotify();
 
+    void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
+                            Service::Nvidia::NvFence* fences, size_t num_fences);
+
     /// Performs any additional setup necessary in order to begin GPU emulation.
     /// This can be used to launch any necessary threads and register any necessary
     /// core timing events.
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 9844cde43..2c03545bf 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -93,8 +93,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
     }
     auto& gpu = system.GPU();
     u64 fence = gpu.RequestFlush(addr, size);
+    TickGPU();
+    gpu.WaitForSyncOperation(fence);
+}
+
+void ThreadManager::TickGPU() {
     PushCommand(GPUTickCommand(), true);
-    ASSERT(fence <= gpu.CurrentFlushRequestFence());
 }
 
 void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index c5078a2b3..64628d3e3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -135,6 +135,8 @@ public:
 
     void OnCommandListEnd();
 
+    void TickGPU();
+
 private:
     /// Pushes a command to be executed by the GPU thread
     u64 PushCommand(CommandData&& command_data, bool block = false);
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
similarity index 91%
rename from src/video_core/command_classes/codecs/codec.cpp
rename to src/video_core/host1x/codecs/codec.cpp
index a5eb97b7f..70c47ae03 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -6,11 +6,11 @@
 #include <vector>
 #include "common/assert.h"
 #include "common/settings.h"
-#include "video_core/command_classes/codecs/codec.h"
-#include "video_core/command_classes/codecs/h264.h"
-#include "video_core/command_classes/codecs/vp8.h"
-#include "video_core/command_classes/codecs/vp9.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/codecs/codec.h"
+#include "video_core/host1x/codecs/h264.h"
+#include "video_core/host1x/codecs/vp8.h"
+#include "video_core/host1x/codecs/vp9.h"
 #include "video_core/memory_manager.h"
 
 extern "C" {
@@ -73,7 +73,7 @@ void AVFrameDeleter(AVFrame* ptr) {
     av_frame_free(&ptr);
 }
 
-Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
+Codec::Codec(GPU& gpu_, const Host1x::NvdecCommon::NvdecRegisters& regs)
     : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
       vp8_decoder(std::make_unique<Decoder::VP8>(gpu)),
       vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
@@ -168,11 +168,11 @@ void Codec::InitializeGpuDecoder() {
 void Codec::Initialize() {
     const AVCodecID codec = [&] {
         switch (current_codec) {
-        case NvdecCommon::VideoCodec::H264:
+        case Host1x::NvdecCommon::VideoCodec::H264:
             return AV_CODEC_ID_H264;
-        case NvdecCommon::VideoCodec::VP8:
+        case Host1x::NvdecCommon::VideoCodec::VP8:
             return AV_CODEC_ID_VP8;
-        case NvdecCommon::VideoCodec::VP9:
+        case Host1x::NvdecCommon::VideoCodec::VP9:
             return AV_CODEC_ID_VP9;
         default:
             UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
@@ -197,7 +197,7 @@ void Codec::Initialize() {
     initialized = true;
 }
 
-void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
+void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
     if (current_codec != codec) {
         current_codec = codec;
         LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
@@ -215,11 +215,11 @@ void Codec::Decode() {
     bool vp9_hidden_frame = false;
     const auto& frame_data = [&]() {
         switch (current_codec) {
-        case Tegra::NvdecCommon::VideoCodec::H264:
+        case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
             return h264_decoder->ComposeFrame(state, is_first_frame);
-        case Tegra::NvdecCommon::VideoCodec::VP8:
+        case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
             return vp8_decoder->ComposeFrame(state);
-        case Tegra::NvdecCommon::VideoCodec::VP9:
+        case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
             vp9_decoder->ComposeFrame(state);
             vp9_hidden_frame = vp9_decoder->WasFrameHidden();
             return vp9_decoder->GetFrameBytes();
@@ -287,21 +287,21 @@ AVFramePtr Codec::GetCurrentFrame() {
     return frame;
 }
 
-NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
+Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
     return current_codec;
 }
 
 std::string_view Codec::GetCurrentCodecName() const {
     switch (current_codec) {
-    case NvdecCommon::VideoCodec::None:
+    case Host1x::NvdecCommon::VideoCodec::None:
         return "None";
-    case NvdecCommon::VideoCodec::H264:
+    case Host1x::NvdecCommon::VideoCodec::H264:
         return "H264";
-    case NvdecCommon::VideoCodec::VP8:
+    case Host1x::NvdecCommon::VideoCodec::VP8:
         return "VP8";
-    case NvdecCommon::VideoCodec::H265:
+    case Host1x::NvdecCommon::VideoCodec::H265:
         return "H265";
-    case NvdecCommon::VideoCodec::VP9:
+    case Host1x::NvdecCommon::VideoCodec::VP9:
         return "VP9";
     default:
         return "Unknown";
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/host1x/codecs/codec.h
similarity index 78%
rename from src/video_core/command_classes/codecs/codec.h
rename to src/video_core/host1x/codecs/codec.h
index 0c2405465..117cb3ccd 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/host1x/codecs/codec.h
@@ -6,8 +6,8 @@
 #include <memory>
 #include <string_view>
 #include <queue>
-
-#include "video_core/command_classes/nvdec_common.h"
+#include "common/common_types.h"
+#include "video_core/host1x/nvdec_common.h"
 
 extern "C" {
 #if defined(__GNUC__) || defined(__clang__)
@@ -34,14 +34,14 @@ class VP9;
 
 class Codec {
 public:
-    explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs);
+    explicit Codec(GPU& gpu, const Host1x::NvdecCommon::NvdecRegisters& regs);
     ~Codec();
 
     /// Initialize the codec, returning success or failure
     void Initialize();
 
     /// Sets NVDEC video stream codec
-    void SetTargetCodec(NvdecCommon::VideoCodec codec);
+    void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec);
 
     /// Call decoders to construct headers, decode AVFrame with ffmpeg
     void Decode();
@@ -50,7 +50,7 @@ public:
     [[nodiscard]] AVFramePtr GetCurrentFrame();
 
     /// Returns the value of current_codec
-    [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+    [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
 
     /// Return name of the current codec
     [[nodiscard]] std::string_view GetCurrentCodecName() const;
@@ -63,14 +63,14 @@ private:
     bool CreateGpuAvDevice();
 
     bool initialized{};
-    NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
+    Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
 
     const AVCodec* av_codec{nullptr};
     AVCodecContext* av_codec_ctx{nullptr};
     AVBufferRef* av_gpu_decoder{nullptr};
 
     GPU& gpu;
-    const NvdecCommon::NvdecRegisters& state;
+    const Host1x::NvdecCommon::NvdecRegisters& state;
     std::unique_ptr<Decoder::H264> h264_decoder;
     std::unique_ptr<Decoder::VP8> vp8_decoder;
     std::unique_ptr<Decoder::VP9> vp9_decoder;
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
similarity index 98%
rename from src/video_core/command_classes/codecs/h264.cpp
rename to src/video_core/host1x/codecs/h264.cpp
index e2acd54d4..95534bc85 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -5,8 +5,8 @@
 #include <bit>
 
 #include "common/settings.h"
-#include "video_core/command_classes/codecs/h264.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/codecs/h264.h"
 #include "video_core/memory_manager.h"
 
 namespace Tegra::Decoder {
@@ -28,7 +28,7 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {}
 
 H264::~H264() = default;
 
-const std::vector<u8>& H264::ComposeFrame(const NvdecCommon::NvdecRegisters& state,
+const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
                                           bool is_first_frame) {
     H264DecoderContext context;
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/host1x/codecs/h264.h
similarity index 96%
rename from src/video_core/command_classes/codecs/h264.h
rename to src/video_core/host1x/codecs/h264.h
index 261574364..a98730474 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/host1x/codecs/h264.h
@@ -8,7 +8,7 @@
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
-#include "video_core/command_classes/nvdec_common.h"
+#include "video_core/host1x/nvdec_common.h"
 
 namespace Tegra {
 class GPU;
@@ -59,8 +59,8 @@ public:
     ~H264();
 
     /// Compose the H264 frame for FFmpeg decoding
-    [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state,
-                                                      bool is_first_frame = false);
+    [[nodiscard]] const std::vector<u8>& ComposeFrame(
+        const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
 
 private:
     std::vector<u8> frame;
diff --git a/src/video_core/command_classes/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp
similarity index 93%
rename from src/video_core/command_classes/codecs/vp8.cpp
rename to src/video_core/host1x/codecs/vp8.cpp
index c83b9bbc2..aac026e17 100644
--- a/src/video_core/command_classes/codecs/vp8.cpp
+++ b/src/video_core/host1x/codecs/vp8.cpp
@@ -3,8 +3,8 @@
 
 #include <vector>
 
-#include "video_core/command_classes/codecs/vp8.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/codecs/vp8.h"
 #include "video_core/memory_manager.h"
 
 namespace Tegra::Decoder {
@@ -12,7 +12,7 @@ VP8::VP8(GPU& gpu_) : gpu(gpu_) {}
 
 VP8::~VP8() = default;
 
-const std::vector<u8>& VP8::ComposeFrame(const NvdecCommon::NvdecRegisters& state) {
+const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
     VP8PictureInfo info;
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
 
diff --git a/src/video_core/command_classes/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h
similarity index 93%
rename from src/video_core/command_classes/codecs/vp8.h
rename to src/video_core/host1x/codecs/vp8.h
index 3357667b0..a1dfa5f03 100644
--- a/src/video_core/command_classes/codecs/vp8.h
+++ b/src/video_core/host1x/codecs/vp8.h
@@ -8,7 +8,7 @@
 
 #include "common/common_funcs.h"
 #include "common/common_types.h"
-#include "video_core/command_classes/nvdec_common.h"
+#include "video_core/host1x/nvdec_common.h"
 
 namespace Tegra {
 class GPU;
@@ -20,7 +20,8 @@ public:
     ~VP8();
 
     /// Compose the VP8 frame for FFmpeg decoding
-    [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state);
+    [[nodiscard]] const std::vector<u8>& ComposeFrame(
+        const Host1x::NvdecCommon::NvdecRegisters& state);
 
 private:
     std::vector<u8> frame;
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp
similarity index 99%
rename from src/video_core/command_classes/codecs/vp9.cpp
rename to src/video_core/host1x/codecs/vp9.cpp
index c01431441..bc50c6ba4 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/host1x/codecs/vp9.cpp
@@ -4,8 +4,8 @@
 #include <algorithm> // for std::copy
 #include <numeric>
 #include "common/assert.h"
-#include "video_core/command_classes/codecs/vp9.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/codecs/vp9.h"
 #include "video_core/memory_manager.h"
 
 namespace Tegra::Decoder {
@@ -355,7 +355,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
     }
 }
 
-Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
+Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) {
     PictureInfo picture_info;
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
     Vp9PictureInfo vp9_info = picture_info.Convert();
@@ -376,7 +376,7 @@ void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
     entropy.Convert(dst);
 }
 
-Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) {
+Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
     Vp9FrameContainer current_frame{};
     {
         gpu.SyncGuestHost();
@@ -769,7 +769,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
     return uncomp_writer;
 }
 
-void VP9::ComposeFrame(const NvdecCommon::NvdecRegisters& state) {
+void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
     std::vector<u8> bitstream;
     {
         Vp9FrameContainer curr_frame = GetCurrentFrame(state);
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h
similarity index 93%
rename from src/video_core/command_classes/codecs/vp9.h
rename to src/video_core/host1x/codecs/vp9.h
index ecc40e8b1..a425c0fa4 100644
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/host1x/codecs/vp9.h
@@ -8,8 +8,8 @@
 
 #include "common/common_types.h"
 #include "common/stream.h"
-#include "video_core/command_classes/codecs/vp9_types.h"
-#include "video_core/command_classes/nvdec_common.h"
+#include "video_core/host1x/codecs/vp9_types.h"
+#include "video_core/host1x/nvdec_common.h"
 
 namespace Tegra {
 class GPU;
@@ -117,7 +117,7 @@ public:
 
     /// Composes the VP9 frame from the GPU state information.
     /// Based on the official VP9 spec documentation
-    void ComposeFrame(const NvdecCommon::NvdecRegisters& state);
+    void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state);
 
     /// Returns true if the most recent frame was a hidden frame.
     [[nodiscard]] bool WasFrameHidden() const {
@@ -162,13 +162,15 @@ private:
     void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
 
     /// Returns VP9 information from NVDEC provided offset and size
-    [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
+    [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(
+        const Host1x::NvdecCommon::NvdecRegisters& state);
 
     /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
     void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
 
     /// Returns frame to be decoded after buffering
-    [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
+    [[nodiscard]] Vp9FrameContainer GetCurrentFrame(
+        const Host1x::NvdecCommon::NvdecRegisters& state);
 
     /// Use NVDEC providied information to compose the headers for the current frame
     [[nodiscard]] std::vector<u8> ComposeCompressedHeader();
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h
similarity index 100%
rename from src/video_core/command_classes/codecs/vp9_types.h
rename to src/video_core/host1x/codecs/vp9_types.h
diff --git a/src/video_core/host1x/control.cpp b/src/video_core/host1x/control.cpp
new file mode 100644
index 000000000..b72b01aa3
--- /dev/null
+++ b/src/video_core/host1x/control.cpp
@@ -0,0 +1,35 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv3 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/gpu.h"
+#include "video_core/host1x/control.h"
+#include "video_core/host1x/host1x.h"
+
+namespace Tegra::Host1x {
+
+Control::Control(GPU& gpu_) : gpu(gpu_) {}
+
+Control::~Control() = default;
+
+void Control::ProcessMethod(Method method, u32 argument) {
+    switch (method) {
+    case Method::LoadSyncptPayload32:
+        syncpoint_value = argument;
+        break;
+    case Method::WaitSyncpt:
+    case Method::WaitSyncpt32:
+        Execute(argument);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Control method 0x{:X}", static_cast<u32>(method));
+        break;
+    }
+}
+
+void Control::Execute(u32 data) {
+    gpu.Host1x().GetSyncpointManager().WaitHost(data, syncpoint_value);
+}
+
+} // namespace Tegra::Host1x
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/host1x/control.h
similarity index 60%
rename from src/video_core/command_classes/host1x.h
rename to src/video_core/host1x/control.h
index bb48a4381..04dac7d51 100644
--- a/src/video_core/command_classes/host1x.h
+++ b/src/video_core/host1x/control.h
@@ -1,5 +1,7 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors
+// (https://github.com/skyline-emu/)
+// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
+// or any later version Refer to the license.txt file included.
 
 #pragma once
 
@@ -7,9 +9,12 @@
 
 namespace Tegra {
 class GPU;
+
+namespace Host1x {
+
 class Nvdec;
 
-class Host1x {
+class Control {
 public:
     enum class Method : u32 {
         WaitSyncpt = 0x8,
@@ -17,8 +22,8 @@ public:
         WaitSyncpt32 = 0x50,
     };
 
-    explicit Host1x(GPU& gpu);
-    ~Host1x();
+    explicit Control(GPU& gpu);
+    ~Control();
 
     /// Writes the method into the state, Invoke Execute() if encountered
     void ProcessMethod(Method method, u32 argument);
@@ -31,4 +36,6 @@ private:
     GPU& gpu;
 };
 
+} // namespace Host1x
+
 } // namespace Tegra
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h
new file mode 100644
index 000000000..2971be286
--- /dev/null
+++ b/src/video_core/host1x/host1x.h
@@ -0,0 +1,33 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv3 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+#include "video_core/host1x/syncpoint_manager.h"
+
+namespace Tegra {
+
+namespace Host1x {
+
+class Host1x {
+public:
+    Host1x() : syncpoint_manager{} {}
+
+    SyncpointManager& GetSyncpointManager() {
+        return syncpoint_manager;
+    }
+
+    const SyncpointManager& GetSyncpointManager() const {
+        return syncpoint_manager;
+    }
+
+private:
+    SyncpointManager syncpoint_manager;
+};
+
+} // namespace Host1x
+
+} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/host1x/nvdec.cpp
similarity index 92%
rename from src/video_core/command_classes/nvdec.cpp
rename to src/video_core/host1x/nvdec.cpp
index 4fbbe3da6..5f6decd0d 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/host1x/nvdec.cpp
@@ -2,10 +2,10 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include "common/assert.h"
-#include "video_core/command_classes/nvdec.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/nvdec.h"
 
-namespace Tegra {
+namespace Tegra::Host1x {
 
 #define NVDEC_REG_INDEX(field_name)                                                                \
     (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
@@ -44,4 +44,4 @@ void Nvdec::Execute() {
     }
 }
 
-} // namespace Tegra
+} // namespace Tegra::Host1x
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/host1x/nvdec.h
similarity index 88%
rename from src/video_core/command_classes/nvdec.h
rename to src/video_core/host1x/nvdec.h
index 488531fc6..41ba1f7a0 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/host1x/nvdec.h
@@ -6,11 +6,13 @@
 #include <memory>
 #include <vector>
 #include "common/common_types.h"
-#include "video_core/command_classes/codecs/codec.h"
+#include "video_core/host1x/codecs/codec.h"
 
 namespace Tegra {
 class GPU;
 
+namespace Host1x {
+
 class Nvdec {
 public:
     explicit Nvdec(GPU& gpu);
@@ -30,4 +32,7 @@ private:
     NvdecCommon::NvdecRegisters state;
     std::unique_ptr<Codec> codec;
 };
+
+} // namespace Host1x
+
 } // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/host1x/nvdec_common.h
similarity index 98%
rename from src/video_core/command_classes/nvdec_common.h
rename to src/video_core/host1x/nvdec_common.h
index 521e5b52b..49d67ebbe 100644
--- a/src/video_core/command_classes/nvdec_common.h
+++ b/src/video_core/host1x/nvdec_common.h
@@ -7,7 +7,7 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 
-namespace Tegra::NvdecCommon {
+namespace Tegra::Host1x::NvdecCommon {
 
 enum class VideoCodec : u64 {
     None = 0x0,
@@ -94,4 +94,4 @@ ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
 
 #undef ASSERT_REG_POSITION
 
-} // namespace Tegra::NvdecCommon
+} // namespace Tegra::Host1x::NvdecCommon
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/host1x/sync_manager.cpp
similarity index 77%
rename from src/video_core/command_classes/sync_manager.cpp
rename to src/video_core/host1x/sync_manager.cpp
index 67e58046f..8694f77e2 100644
--- a/src/video_core/command_classes/sync_manager.cpp
+++ b/src/video_core/host1x/sync_manager.cpp
@@ -4,8 +4,12 @@
 #include <algorithm>
 #include "sync_manager.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/host1x.h"
+#include "video_core/host1x/syncpoint_manager.h"
 
 namespace Tegra {
+namespace Host1x {
+
 SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
 SyncptIncrManager::~SyncptIncrManager() = default;
 
@@ -36,8 +40,12 @@ void SyncptIncrManager::IncrementAllDone() {
         if (!increments[done_count].complete) {
             break;
         }
-        gpu.IncrementSyncPoint(increments[done_count].syncpt_id);
+        auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager();
+        syncpoint_manager.IncrementGuest(increments[done_count].syncpt_id);
+        syncpoint_manager.IncrementHost(increments[done_count].syncpt_id);
     }
     increments.erase(increments.begin(), increments.begin() + done_count);
 }
+
+} // namespace Host1x
 } // namespace Tegra
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/host1x/sync_manager.h
similarity index 95%
rename from src/video_core/command_classes/sync_manager.h
rename to src/video_core/host1x/sync_manager.h
index 6dfaae080..aba72d5c5 100644
--- a/src/video_core/command_classes/sync_manager.h
+++ b/src/video_core/host1x/sync_manager.h
@@ -8,7 +8,11 @@
 #include "common/common_types.h"
 
 namespace Tegra {
+
 class GPU;
+
+namespace Host1x {
+
 struct SyncptIncr {
     u32 id;
     u32 class_id;
@@ -44,4 +48,6 @@ private:
     GPU& gpu;
 };
 
+} // namespace Host1x
+
 } // namespace Tegra
diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp
new file mode 100644
index 000000000..c606b8bd0
--- /dev/null
+++ b/src/video_core/host1x/syncpoint_manager.cpp
@@ -0,0 +1,93 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv3 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/host1x/syncpoint_manager.h"
+
+namespace Tegra {
+
+namespace Host1x {
+
+SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
+    std::atomic<u32>& syncpoint, std::list<RegisteredAction>& action_storage, u32 expected_value,
+    std::function<void(void)>& action) {
+    if (syncpoint.load(std::memory_order_acquire) >= expected_value) {
+        action();
+        return {};
+    }
+
+    std::unique_lock<std::mutex> lk(guard);
+    if (syncpoint.load(std::memory_order_relaxed) >= expected_value) {
+        action();
+        return {};
+    }
+    auto it = action_storage.begin();
+    while (it != action_storage.end()) {
+        if (it->expected_value >= expected_value) {
+            break;
+        }
+        ++it;
+    }
+    return action_storage.emplace(it, expected_value, action);
+}
+
+void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage,
+                                        ActionHandle& handle) {
+    std::unique_lock<std::mutex> lk(guard);
+    action_storage.erase(handle);
+}
+
+void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle) {
+    DeregisterAction(guest_action_storage[syncpoint_id], handle);
+}
+
+void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle) {
+    DeregisterAction(host_action_storage[syncpoint_id], handle);
+}
+
+void SyncpointManager::IncrementGuest(u32 syncpoint_id) {
+    Increment(syncpoints_guest[syncpoint_id], wait_guest_cv, guest_action_storage[syncpoint_id]);
+}
+
+void SyncpointManager::IncrementHost(u32 syncpoint_id) {
+    Increment(syncpoints_host[syncpoint_id], wait_host_cv, host_action_storage[syncpoint_id]);
+}
+
+void SyncpointManager::WaitGuest(u32 syncpoint_id, u32 expected_value) {
+    Wait(syncpoints_guest[syncpoint_id], wait_guest_cv, expected_value);
+}
+
+void SyncpointManager::WaitHost(u32 syncpoint_id, u32 expected_value) {
+    Wait(syncpoints_host[syncpoint_id], wait_host_cv, expected_value);
+}
+
+void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
+                                 std::list<RegisteredAction>& action_storage) {
+    auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1};
+
+    std::unique_lock<std::mutex> lk(guard);
+    auto it = action_storage.begin();
+    while (it != action_storage.end()) {
+        if (it->expected_value > new_value) {
+            break;
+        }
+        it->action();
+        it = action_storage.erase(it);
+    }
+    wait_cv.notify_all();
+}
+
+void SyncpointManager::Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
+                            u32 expected_value) {
+    const auto pred = [&]() { return syncpoint.load(std::memory_order_acquire) >= expected_value; };
+    if (pred()) {
+        return;
+    }
+
+    std::unique_lock<std::mutex> lk(guard);
+    wait_cv.wait(lk, pred);
+}
+
+} // namespace Host1x
+
+} // namespace Tegra
diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h
new file mode 100644
index 000000000..0ecc040ab
--- /dev/null
+++ b/src/video_core/host1x/syncpoint_manager.h
@@ -0,0 +1,99 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv3 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <condition_variable>
+#include <functional>
+#include <list>
+#include <mutex>
+
+#include "common/common_types.h"
+
+namespace Tegra {
+
+namespace Host1x {
+
+class SyncpointManager {
+public:
+    u32 GetGuestSyncpointValue(u32 id) {
+        return syncpoints_guest[id].load(std::memory_order_acquire);
+    }
+
+    u32 GetHostSyncpointValue(u32 id) {
+        return syncpoints_host[id].load(std::memory_order_acquire);
+    }
+
+    struct RegisteredAction {
+        RegisteredAction(u32 expected_value_, std::function<void(void)>& action_)
+            : expected_value{expected_value_}, action{action_} {}
+        u32 expected_value;
+        std::function<void(void)> action;
+    };
+    using ActionHandle = std::list<RegisteredAction>::iterator;
+
+    template <typename Func>
+    ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
+        std::function<void(void)> func(action);
+        return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id],
+                              expected_value, func);
+    }
+
+    template <typename Func>
+    ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
+        std::function<void(void)> func(action);
+        return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id],
+                              expected_value, func);
+    }
+
+    void DeregisterGuestAction(u32 syncpoint_id,ActionHandle& handle);
+
+    void DeregisterHostAction(u32 syncpoint_id,ActionHandle& handle);
+
+    void IncrementGuest(u32 syncpoint_id);
+
+    void IncrementHost(u32 syncpoint_id);
+
+    void WaitGuest(u32 syncpoint_id, u32 expected_value);
+
+    void WaitHost(u32 syncpoint_id, u32 expected_value);
+
+    bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) {
+        return syncpoints_guest[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
+    }
+
+    bool IsReadyHost(u32 syncpoint_id, u32 expected_value) {
+        return syncpoints_host[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
+    }
+
+private:
+    void Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
+                   std::list<RegisteredAction>& action_storage);
+
+    ActionHandle RegisterAction(std::atomic<u32>& syncpoint,
+                                std::list<RegisteredAction>& action_storage, u32 expected_value,
+                                std::function<void(void)>& action);
+
+    void DeregisterAction(std::list<RegisteredAction>& action_storage, ActionHandle& handle);
+
+    void Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, u32 expected_value);
+
+    static constexpr size_t NUM_MAX_SYNCPOINTS = 192;
+
+    std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_guest{};
+    std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_host{};
+
+    std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> guest_action_storage;
+    std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> host_action_storage;
+
+    std::mutex guard;
+    std::condition_variable wait_guest_cv;
+    std::condition_variable wait_host_cv;
+};
+
+} // namespace Host1x
+
+} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/host1x/vic.cpp
similarity index 98%
rename from src/video_core/command_classes/vic.cpp
rename to src/video_core/host1x/vic.cpp
index 7c17df353..a9422670a 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -18,14 +18,17 @@ extern "C" {
 #include "common/bit_field.h"
 #include "common/logging/log.h"
 
-#include "video_core/command_classes/nvdec.h"
-#include "video_core/command_classes/vic.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/gpu.h"
+#include "video_core/host1x/nvdec.h"
+#include "video_core/host1x/vic.h"
 #include "video_core/memory_manager.h"
 #include "video_core/textures/decoders.h"
 
 namespace Tegra {
+
+namespace Host1x {
+
 namespace {
 enum class VideoPixelFormat : u64_le {
     RGBA8 = 0x1f,
@@ -235,4 +238,6 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
                                    chroma_buffer.size());
 }
 
+} // namespace Host1x
+
 } // namespace Tegra
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/host1x/vic.h
similarity index 93%
rename from src/video_core/command_classes/vic.h
rename to src/video_core/host1x/vic.h
index 010daa6b6..c51f8af7e 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/host1x/vic.h
@@ -11,6 +11,9 @@ struct SwsContext;
 
 namespace Tegra {
 class GPU;
+
+namespace Host1x {
+
 class Nvdec;
 union VicConfig;
 
@@ -40,7 +43,7 @@ private:
     void WriteYUVFrame(const AVFrame* frame, const VicConfig& config);
 
     GPU& gpu;
-    std::shared_ptr<Tegra::Nvdec> nvdec_processor;
+    std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
 
     /// Avoid reallocation of the following buffers every frame, as their
     /// size does not change during a stream
@@ -58,4 +61,6 @@ private:
     s32 scaler_height{};
 };
 
+} // namespace Host1x
+
 } // namespace Tegra