From faacec980117cad9b43dd7fa40300d20b3f12162 Mon Sep 17 00:00:00 2001 From: FluffyOMC <45863583+FluffyOMC@users.noreply.github.com> Date: Mon, 10 Feb 2025 16:53:44 -0500 Subject: [PATCH] JIT Cache Regions + HLE SoNoSigpipe BSD socket mapping (#615) Instead of one big 2048MB JIT Cache that'd crash the emulator when maxed out, we now have it where we add 256MB JIT Cache regions when needed, helping reduce allocated memory where games don't use the JIT cache for it, and helping bigger games that DO need JIT cache bigger than 2048MB! ![image](https://github.com/user-attachments/assets/ff17dc48-6028-4377-8c73-746ab21ab83b) (SSBU goes past the 2048MB JIT Cache limit that would normally crash Ryujinx ^) Also I added a BSD socket that Baba is You's networking for downloading custom levels uses. --- src/ARMeilleure/Memory/ReservedRegion.cs | 2 + src/ARMeilleure/Translation/Cache/JitCache.cs | 112 ++++++++++++------ .../LightningJit/Cache/JitCache.cs | 94 +++++++++------ .../LightningJit/Cache/NoWxCache.cs | 2 +- .../Sockets/Bsd/Impl/WinSockHelper.cs | 1 + 5 files changed, 142 insertions(+), 69 deletions(-) diff --git a/src/ARMeilleure/Memory/ReservedRegion.cs b/src/ARMeilleure/Memory/ReservedRegion.cs index a3ebd610d..dfe17c933 100644 --- a/src/ARMeilleure/Memory/ReservedRegion.cs +++ b/src/ARMeilleure/Memory/ReservedRegion.cs @@ -7,6 +7,7 @@ namespace ARMeilleure.Memory public const int DefaultGranularity = 65536; // Mapping granularity in Windows. public IJitMemoryBlock Block { get; } + public IJitMemoryAllocator Allocator { get; } public nint Pointer => Block.Pointer; @@ -21,6 +22,7 @@ namespace ARMeilleure.Memory granularity = DefaultGranularity; } + Allocator = allocator; Block = allocator.Reserve(maxSize); _maxSize = maxSize; _sizeGranularity = granularity; diff --git a/src/ARMeilleure/Translation/Cache/JitCache.cs b/src/ARMeilleure/Translation/Cache/JitCache.cs index d7e8201d8..0ede558b6 100644 --- a/src/ARMeilleure/Translation/Cache/JitCache.cs +++ b/src/ARMeilleure/Translation/Cache/JitCache.cs @@ -2,6 +2,8 @@ using ARMeilleure.CodeGen; using ARMeilleure.CodeGen.Unwinding; using ARMeilleure.Memory; using ARMeilleure.Native; +using Humanizer; +using Ryujinx.Common.Logging; using Ryujinx.Memory; using System; using System.Collections.Generic; @@ -18,9 +20,8 @@ namespace ARMeilleure.Translation.Cache private static readonly int _pageMask = _pageSize - 1; private const int CodeAlignment = 4; // Bytes. - private const int CacheSize = 2047 * 1024 * 1024; + private const int CacheSize = 256 * 1024 * 1024; - private static ReservedRegion _jitRegion; private static JitCacheInvalidation _jitCacheInvalidator; private static CacheMemoryAllocator _cacheAllocator; @@ -30,6 +31,9 @@ namespace ARMeilleure.Translation.Cache private static readonly Lock _lock = new(); private static bool _initialized; + private static readonly List _jitRegions = new(); + private static int _activeRegionIndex = 0; + [SupportedOSPlatform("windows")] [LibraryImport("kernel32.dll", SetLastError = true)] public static partial nint FlushInstructionCache(nint hProcess, nint lpAddress, nuint dwSize); @@ -48,7 +52,9 @@ namespace ARMeilleure.Translation.Cache return; } - _jitRegion = new ReservedRegion(allocator, CacheSize); + var firstRegion = new ReservedRegion(allocator, CacheSize); + _jitRegions.Add(firstRegion); + _activeRegionIndex = 0; if (!OperatingSystem.IsWindows() && !OperatingSystem.IsMacOS()) { @@ -59,7 +65,9 @@ namespace ARMeilleure.Translation.Cache if (OperatingSystem.IsWindows()) { - JitUnwindWindows.InstallFunctionTableHandler(_jitRegion.Pointer, CacheSize, _jitRegion.Pointer + Allocate(_pageSize)); + JitUnwindWindows.InstallFunctionTableHandler( + firstRegion.Pointer, CacheSize, firstRegion.Pointer + Allocate(_pageSize) + ); } _initialized = true; @@ -75,8 +83,8 @@ namespace ARMeilleure.Translation.Cache Debug.Assert(_initialized); int funcOffset = Allocate(code.Length); - - nint funcPtr = _jitRegion.Pointer + funcOffset; + ReservedRegion targetRegion = _jitRegions[_activeRegionIndex]; + nint funcPtr = targetRegion.Pointer + funcOffset; if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) { @@ -90,9 +98,9 @@ namespace ARMeilleure.Translation.Cache } else { - ReprotectAsWritable(funcOffset, code.Length); + ReprotectAsWritable(targetRegion, funcOffset, code.Length); Marshal.Copy(code, 0, funcPtr, code.Length); - ReprotectAsExecutable(funcOffset, code.Length); + ReprotectAsExecutable(targetRegion, funcOffset, code.Length); if (OperatingSystem.IsWindows() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) { @@ -116,52 +124,83 @@ namespace ARMeilleure.Translation.Cache { Debug.Assert(_initialized); - int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64()); - - if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset) + foreach (var region in _jitRegions) { - _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size)); - _cacheEntries.RemoveAt(entryIndex); + if (pointer.ToInt64() < region.Pointer.ToInt64() || + pointer.ToInt64() >= (region.Pointer + CacheSize).ToInt64()) + { + continue; + } + + int funcOffset = (int)(pointer.ToInt64() - region.Pointer.ToInt64()); + + if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset) + { + _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size)); + _cacheEntries.RemoveAt(entryIndex); + } + + return; } } } - private static void ReprotectAsWritable(int offset, int size) + private static void ReprotectAsWritable(ReservedRegion region, int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; int regionEnd = (endOffs + _pageMask) & ~_pageMask; - _jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + region.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } - private static void ReprotectAsExecutable(int offset, int size) + private static void ReprotectAsExecutable(ReservedRegion region, int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; int regionEnd = (endOffs + _pageMask) & ~_pageMask; - _jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + region.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } private static int Allocate(int codeSize) { codeSize = AlignCodeSize(codeSize); - int allocOffset = _cacheAllocator.Allocate(codeSize); - - if (allocOffset < 0) + for (int i = _activeRegionIndex; i < _jitRegions.Count; i++) { - throw new OutOfMemoryException("JIT Cache exhausted."); + int allocOffset = _cacheAllocator.Allocate(codeSize); + + if (allocOffset >= 0) + { + _jitRegions[i].ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + _activeRegionIndex = i; + return allocOffset; + } } - _jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + int exhaustedRegion = _activeRegionIndex; + var newRegion = new ReservedRegion(_jitRegions[0].Allocator, CacheSize); + _jitRegions.Add(newRegion); + _activeRegionIndex = _jitRegions.Count - 1; + + int newRegionNumber = _activeRegionIndex; - return allocOffset; + Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache Region {exhaustedRegion} exhausted, creating new Cache Region {newRegionNumber} ({((newRegionNumber + 1) * CacheSize).Bytes()} Total Allocation)."); + + _cacheAllocator = new CacheMemoryAllocator(CacheSize); + + int allocOffsetNew = _cacheAllocator.Allocate(codeSize); + if (allocOffsetNew < 0) + { + throw new OutOfMemoryException("Failed to allocate in new Cache Region!"); + } + + newRegion.ExpandIfNeeded((ulong)allocOffsetNew + (ulong)codeSize); + return allocOffsetNew; } + private static int AlignCodeSize(int codeSize) { return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1); @@ -185,18 +224,21 @@ namespace ARMeilleure.Translation.Cache { lock (_lock) { - int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default)); - - if (index < 0) + foreach (var region in _jitRegions) { - index = ~index - 1; - } + int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default)); - if (index >= 0) - { - entry = _cacheEntries[index]; - entryIndex = index; - return true; + if (index < 0) + { + index = ~index - 1; + } + + if (index >= 0) + { + entry = _cacheEntries[index]; + entryIndex = index; + return true; + } } } diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs b/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs index 5849401ab..c994d424e 100644 --- a/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs +++ b/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs @@ -1,4 +1,6 @@ using ARMeilleure.Memory; +using Humanizer; +using Ryujinx.Common.Logging; using Ryujinx.Memory; using System; using System.Collections.Generic; @@ -15,9 +17,8 @@ namespace Ryujinx.Cpu.LightningJit.Cache private static readonly int _pageMask = _pageSize - 1; private const int CodeAlignment = 4; // Bytes. - private const int CacheSize = 2047 * 1024 * 1024; + private const int CacheSize = 256 * 1024 * 1024; - private static ReservedRegion _jitRegion; private static JitCacheInvalidation _jitCacheInvalidator; private static CacheMemoryAllocator _cacheAllocator; @@ -26,6 +27,8 @@ namespace Ryujinx.Cpu.LightningJit.Cache private static readonly Lock _lock = new(); private static bool _initialized; + private static readonly List _jitRegions = new(); + private static int _activeRegionIndex = 0; [SupportedOSPlatform("windows")] [LibraryImport("kernel32.dll", SetLastError = true)] @@ -45,7 +48,9 @@ namespace Ryujinx.Cpu.LightningJit.Cache return; } - _jitRegion = new ReservedRegion(allocator, CacheSize); + var firstRegion = new ReservedRegion(allocator, CacheSize); + _jitRegions.Add(firstRegion); + _activeRegionIndex = 0; if (!OperatingSystem.IsWindows() && !OperatingSystem.IsMacOS()) { @@ -65,8 +70,8 @@ namespace Ryujinx.Cpu.LightningJit.Cache Debug.Assert(_initialized); int funcOffset = Allocate(code.Length); - - nint funcPtr = _jitRegion.Pointer + funcOffset; + ReservedRegion targetRegion = _jitRegions[_activeRegionIndex]; + nint funcPtr = targetRegion.Pointer + funcOffset; if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) { @@ -80,18 +85,11 @@ namespace Ryujinx.Cpu.LightningJit.Cache } else { - ReprotectAsWritable(funcOffset, code.Length); - code.CopyTo(new Span((void*)funcPtr, code.Length)); - ReprotectAsExecutable(funcOffset, code.Length); + ReprotectAsWritable(targetRegion, funcOffset, code.Length); + Marshal.Copy(code.ToArray(), 0, funcPtr, code.Length); + ReprotectAsExecutable(targetRegion, funcOffset, code.Length); - if (OperatingSystem.IsWindows() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) - { - FlushInstructionCache(Process.GetCurrentProcess().Handle, funcPtr, (nuint)code.Length); - } - else - { - _jitCacheInvalidator?.Invalidate(funcPtr, (ulong)code.Length); - } + _jitCacheInvalidator?.Invalidate(funcPtr, (ulong)code.Length); } Add(funcOffset, code.Length); @@ -106,50 +104,80 @@ namespace Ryujinx.Cpu.LightningJit.Cache { Debug.Assert(_initialized); - int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64()); - - if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset) + foreach (var region in _jitRegions) { - _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size)); - _cacheEntries.RemoveAt(entryIndex); + if (pointer.ToInt64() < region.Pointer.ToInt64() || + pointer.ToInt64() >= (region.Pointer + CacheSize).ToInt64()) + { + continue; + } + + int funcOffset = (int)(pointer.ToInt64() - region.Pointer.ToInt64()); + + if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset) + { + _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size)); + _cacheEntries.RemoveAt(entryIndex); + } + + return; } } } - private static void ReprotectAsWritable(int offset, int size) + private static void ReprotectAsWritable(ReservedRegion region, int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; int regionEnd = (endOffs + _pageMask) & ~_pageMask; - _jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + region.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } - private static void ReprotectAsExecutable(int offset, int size) + private static void ReprotectAsExecutable(ReservedRegion region, int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; int regionEnd = (endOffs + _pageMask) & ~_pageMask; - _jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + region.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } private static int Allocate(int codeSize) { codeSize = AlignCodeSize(codeSize); - int allocOffset = _cacheAllocator.Allocate(codeSize); - - if (allocOffset < 0) + for (int i = _activeRegionIndex; i < _jitRegions.Count; i++) { - throw new OutOfMemoryException("JIT Cache exhausted."); + int allocOffset = _cacheAllocator.Allocate(codeSize); + + if (allocOffset >= 0) + { + _jitRegions[i].ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + _activeRegionIndex = i; + return allocOffset; + } } - _jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + int exhaustedRegion = _activeRegionIndex; + var newRegion = new ReservedRegion(_jitRegions[0].Allocator, CacheSize); + _jitRegions.Add(newRegion); + _activeRegionIndex = _jitRegions.Count - 1; + + int newRegionNumber = _activeRegionIndex; - return allocOffset; + Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache Region {exhaustedRegion} exhausted, creating new Cache Region {newRegionNumber} ({((newRegionNumber + 1) * CacheSize).Bytes()} Total Allocation)."); + + _cacheAllocator = new CacheMemoryAllocator(CacheSize); + + int allocOffsetNew = _cacheAllocator.Allocate(codeSize); + if (allocOffsetNew < 0) + { + throw new OutOfMemoryException("Failed to allocate in new Cache Region!"); + } + + newRegion.ExpandIfNeeded((ulong)allocOffsetNew + (ulong)codeSize); + return allocOffsetNew; } private static int AlignCodeSize(int codeSize) diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs b/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs index 1bbf70182..65d297c28 100644 --- a/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs +++ b/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs @@ -12,7 +12,7 @@ namespace Ryujinx.Cpu.LightningJit.Cache { private const int CodeAlignment = 4; // Bytes. private const int SharedCacheSize = 2047 * 1024 * 1024; - private const int LocalCacheSize = 128 * 1024 * 1024; + private const int LocalCacheSize = 256 * 1024 * 1024; // How many calls to the same function we allow until we pad the shared cache to force the function to become available there // and allow the guest to take the fast path. diff --git a/src/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/WinSockHelper.cs b/src/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/WinSockHelper.cs index 3db2712f3..018bb8f14 100644 --- a/src/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/WinSockHelper.cs +++ b/src/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/WinSockHelper.cs @@ -150,6 +150,7 @@ namespace Ryujinx.HLE.HOS.Services.Sockets.Bsd.Impl { BsdSocketOption.SoLinger, SocketOptionName.Linger }, { BsdSocketOption.SoOobInline, SocketOptionName.OutOfBandInline }, { BsdSocketOption.SoReusePort, SocketOptionName.ReuseAddress }, + { BsdSocketOption.SoNoSigpipe, SocketOptionName.DontLinger }, { BsdSocketOption.SoSndBuf, SocketOptionName.SendBuffer }, { BsdSocketOption.SoRcvBuf, SocketOptionName.ReceiveBuffer }, { BsdSocketOption.SoSndLoWat, SocketOptionName.SendLowWater },