Skip to content

Commit

Permalink
GPU: Migrate buffers on GPU project, pre-emptively flush device local…
Browse files Browse the repository at this point in the history
… mappings (#6794)

* GPU: Migrate buffers on GPU project, pre-emptively flush device local mappings

Essentially retreading #4540, but it's on the GPU project now instead of the backend. This allows us to have a lot more control + knowledge of where the buffer backing has been changed and allows us to pre-emptively flush pages to host memory for quicker readback. It will allow us to do other stuff in the future, but we'll get there when we get there.

Performance greatly improved in Hyrule Warriors: Age of Calamity. Performance notably improved in TOTK (average). Performance for BOTW restored to how it was before #4911, perhaps a bit better.

- Rewrites a bunch of buffer migration stuff. Might want to tighten up how dispose stuff works.
- Fixed an issue where the copy for texture pre-flush would happen _after_ the syncpoint.

TODO: remove a page from pre-flush if it isn't flushed after a certain number of copies.

* Add copy deactivation

* Fix dependent virtual buffers

* Remove logging

* Fix format issues (maybe)

* Vulkan: Remove backing swap

* Add explicit memory access types for most buffers

* Fix typo

* Add device local force expiry, change buffer inheritance behaviour

* General cleanup, OGL fix

* BufferPreFlush comments

* BufferBackingState comments

* Add an extra precaution to BufferMigration

This is very unlikely, but it's important to cover loose ends like this.

* Address some feedback

* Docs
  • Loading branch information
riperiperi committed May 19, 2024
1 parent 2f427de commit eb1ce41
Show file tree
Hide file tree
Showing 29 changed files with 1,334 additions and 515 deletions.
11 changes: 8 additions & 3 deletions src/Ryujinx.Graphics.GAL/BufferAccess.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@ namespace Ryujinx.Graphics.GAL
public enum BufferAccess
{
Default = 0,
FlushPersistent = 1 << 0,
Stream = 1 << 1,
SparseCompatible = 1 << 2,
HostMemory = 1,
DeviceMemory = 2,
DeviceMemoryMapped = 3,

MemoryTypeMask = 0xf,

Stream = 1 << 4,
SparseCompatible = 1 << 5,
}
}
3 changes: 3 additions & 0 deletions src/Ryujinx.Graphics.GAL/Capabilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ namespace Ryujinx.Graphics.GAL
{
public readonly TargetApi Api;
public readonly string VendorName;
public readonly SystemMemoryType MemoryType;

public readonly bool HasFrontFacingBug;
public readonly bool HasVectorIndexingBug;
Expand Down Expand Up @@ -66,6 +67,7 @@ namespace Ryujinx.Graphics.GAL
public Capabilities(
TargetApi api,
string vendorName,
SystemMemoryType memoryType,
bool hasFrontFacingBug,
bool hasVectorIndexingBug,
bool needsFragmentOutputSpecialization,
Expand Down Expand Up @@ -120,6 +122,7 @@ namespace Ryujinx.Graphics.GAL
{
Api = api;
VendorName = vendorName;
MemoryType = memoryType;
HasFrontFacingBug = hasFrontFacingBug;
HasVectorIndexingBug = hasVectorIndexingBug;
NeedsFragmentOutputSpecialization = needsFragmentOutputSpecialization;
Expand Down
1 change: 0 additions & 1 deletion src/Ryujinx.Graphics.GAL/IRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ public interface IRenderer : IDisposable
void BackgroundContextAction(Action action, bool alwaysBackground = false);

BufferHandle CreateBuffer(int size, BufferAccess access = BufferAccess.Default);
BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint);
BufferHandle CreateBuffer(nint pointer, int size);
BufferHandle CreateBufferSparse(ReadOnlySpan<BufferRange> storageBuffers);

Expand Down
1 change: 0 additions & 1 deletion src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ private static int InitLookup()
}

Register<ActionCommand>(CommandType.Action);
Register<CreateBufferCommand>(CommandType.CreateBuffer);
Register<CreateBufferAccessCommand>(CommandType.CreateBufferAccess);
Register<CreateBufferSparseCommand>(CommandType.CreateBufferSparse);
Register<CreateHostBufferCommand>(CommandType.CreateHostBuffer);
Expand Down
1 change: 0 additions & 1 deletion src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading
enum CommandType : byte
{
Action,
CreateBuffer,
CreateBufferAccess,
CreateBufferSparse,
CreateHostBuffer,
Expand Down

This file was deleted.

9 changes: 0 additions & 9 deletions src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -272,15 +272,6 @@ public BufferHandle CreateBuffer(int size, BufferAccess access)
return handle;
}

public BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint)
{
BufferHandle handle = Buffers.CreateBufferHandle();
New<CreateBufferCommand>().Set(handle, size, access, storageHint);
QueueCommand();

return handle;
}

public BufferHandle CreateBuffer(nint pointer, int size)
{
BufferHandle handle = Buffers.CreateBufferHandle();
Expand Down
29 changes: 29 additions & 0 deletions src/Ryujinx.Graphics.GAL/SystemMemoryType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
namespace Ryujinx.Graphics.GAL
{
public enum SystemMemoryType
{
/// <summary>
/// The backend manages the ownership of memory. This mode never supports host imported memory.
/// </summary>
BackendManaged,

/// <summary>
/// Device memory has similar performance to host memory, usually because it's shared between CPU/GPU.
/// Use host memory whenever possible.
/// </summary>
UnifiedMemory,

/// <summary>
/// GPU storage to host memory goes though a slow interconnect, but it would still be preferable to use it if the data is flushed back often.
/// Assumes constant buffer access to host memory is rather fast.
/// </summary>
DedicatedMemory,

/// <summary>
/// GPU storage to host memory goes though a slow interconnect, that is very slow when doing access from storage.
/// When frequently accessed, copy buffers to host memory using DMA.
/// Assumes constant buffer access to host memory is rather fast.
/// </summary>
DedicatedMemorySlowStorage
}
}
5 changes: 3 additions & 2 deletions src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using Ryujinx.Graphics.Gpu.Engine.GPFifo;
using Ryujinx.Graphics.Gpu.Engine.Threed;
using Ryujinx.Graphics.Gpu.Engine.Types;
using Ryujinx.Graphics.Gpu.Memory;
using Ryujinx.Memory.Range;
using System;
using System.Collections.Generic;
Expand Down Expand Up @@ -495,8 +496,8 @@ private void MultiDrawElementsIndirectCount(IDeviceState state, int arg0)

ulong indirectBufferSize = (ulong)maxDrawCount * (ulong)stride;

MultiRange indirectBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize);
MultiRange parameterBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, parameterBufferGpuVa, 4);
MultiRange indirectBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize, BufferStage.Indirect);
MultiRange parameterBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, parameterBufferGpuVa, 4, BufferStage.Indirect);

_processor.ThreedClass.DrawIndirect(
topology,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ private IndexBuffer CreateTopologyRemapBuffer(PrimitiveTopology topology, int co

ReadOnlySpan<byte> dataBytes = MemoryMarshal.Cast<int, byte>(data);

BufferHandle buffer = _context.Renderer.CreateBuffer(dataBytes.Length);
BufferHandle buffer = _context.Renderer.CreateBuffer(dataBytes.Length, BufferAccess.DeviceMemory);
_context.Renderer.SetBufferData(buffer, 0, dataBytes);

return new IndexBuffer(buffer, count, dataBytes.Length);
Expand Down Expand Up @@ -529,7 +529,7 @@ public BufferRange GetDummyBufferRange()
{
if (_dummyBuffer == BufferHandle.Null)
{
_dummyBuffer = _context.Renderer.CreateBuffer(DummyBufferSize);
_dummyBuffer = _context.Renderer.CreateBuffer(DummyBufferSize, BufferAccess.DeviceMemory);
_context.Renderer.Pipeline.ClearBuffer(_dummyBuffer, 0, DummyBufferSize, 0);
}

Expand All @@ -550,7 +550,7 @@ public BufferHandle GetSequentialIndexBuffer(int count)
_context.Renderer.DeleteBuffer(_sequentialIndexBuffer);
}

_sequentialIndexBuffer = _context.Renderer.CreateBuffer(count * sizeof(uint));
_sequentialIndexBuffer = _context.Renderer.CreateBuffer(count * sizeof(uint), BufferAccess.DeviceMemory);
_sequentialIndexBufferCount = count;

Span<int> data = new int[count];
Expand Down Expand Up @@ -583,7 +583,7 @@ public BufferHandle GetSequentialIndexBuffer(int count)
_context.Renderer.DeleteBuffer(buffer.Handle);
}

buffer.Handle = _context.Renderer.CreateBuffer(newSize);
buffer.Handle = _context.Renderer.CreateBuffer(newSize, BufferAccess.DeviceMemory);
buffer.Size = newSize;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.Types;
using Ryujinx.Graphics.Gpu.Image;
using Ryujinx.Graphics.Gpu.Memory;
using Ryujinx.Graphics.Gpu.Shader;
using Ryujinx.Graphics.Shader;
using Ryujinx.Graphics.Shader.Translation;
Expand Down Expand Up @@ -370,7 +371,7 @@ private static int GetMaxCompleteStrips(int verticesPerPrimitive, int maxOutputV
{
var memoryManager = _channel.MemoryManager;

BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address, size));
BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address, size), BufferStage.VertexBuffer);

ITexture bufferTexture = _vacContext.EnsureBufferTexture(index + 2, format);
bufferTexture.SetStorage(range);
Expand Down Expand Up @@ -412,7 +413,9 @@ private static int GetMaxCompleteStrips(int verticesPerPrimitive, int maxOutputV
var memoryManager = _channel.MemoryManager;

ulong misalign = address & ((ulong)_context.Capabilities.TextureBufferOffsetAlignment - 1);
BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address + indexOffset - misalign, size + misalign));
BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(
memoryManager.GetPhysicalRegions(address + indexOffset - misalign, size + misalign),
BufferStage.IndexBuffer);
misalignedOffset = (int)misalign >> shift;

SetIndexBufferTexture(reservations, range, format);
Expand Down
6 changes: 3 additions & 3 deletions src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -684,8 +684,8 @@ static float FixedToFloat(int fixedValue)

if (hasCount)
{
var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange);
var parameterBuffer = memory.BufferCache.GetBufferRange(parameterBufferRange);
var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange, BufferStage.Indirect);
var parameterBuffer = memory.BufferCache.GetBufferRange(parameterBufferRange, BufferStage.Indirect);

if (indexed)
{
Expand All @@ -698,7 +698,7 @@ static float FixedToFloat(int fixedValue)
}
else
{
var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange);
var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange, BufferStage.Indirect);

if (indexed)
{
Expand Down
5 changes: 3 additions & 2 deletions src/Ryujinx.Graphics.Gpu/GpuContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -393,17 +393,18 @@ internal void CreateHostSyncIfNeeded(HostSyncFlags flags)

if (force || _pendingSync || (syncpoint && SyncpointActions.Count > 0))
{
Renderer.CreateSync(SyncNumber, strict);

foreach (var action in SyncActions)
{
action.SyncPreAction(syncpoint);
}

foreach (var action in SyncpointActions)
{
action.SyncPreAction(syncpoint);
}

Renderer.CreateSync(SyncNumber, strict);

SyncNumber++;

SyncActions.RemoveAll(action => action.SyncAction(syncpoint));
Expand Down
8 changes: 4 additions & 4 deletions src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs
Original file line number Diff line number Diff line change
Expand Up @@ -708,11 +708,11 @@ private void UpdateFromPool(TexturePool texturePool, SamplerPool samplerPool, Sh
format = texture.Format;
}

_channel.BufferManager.SetBufferTextureStorage(entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format);
_channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format);
}
else
{
_channel.BufferManager.SetBufferTextureStorage(entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format);
_channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format);
}
}
else if (isImage)
Expand Down Expand Up @@ -921,11 +921,11 @@ private void UpdateFromPool(TexturePool texturePool, SamplerPool samplerPool, Sh
format = texture.Format;
}

_channel.BufferManager.SetBufferTextureStorage(entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format);
_channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format);
}
else
{
_channel.BufferManager.SetBufferTextureStorage(entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format);
_channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format);
}
}
else if (isImage)
Expand Down
2 changes: 1 addition & 1 deletion src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ public void FlushIntoBuffer(TextureGroupHandle handle)
}
else
{
_flushBuffer = _context.Renderer.CreateBuffer((int)Storage.Size, BufferAccess.FlushPersistent);
_flushBuffer = _context.Renderer.CreateBuffer((int)Storage.Size, BufferAccess.HostMemory);
_flushBufferImported = false;
}

Expand Down

0 comments on commit eb1ce41

Please sign in to comment.