vault backup: 2024-02-05 09:54:25

This commit is contained in:
BlueRose 2024-02-05 09:54:25 +08:00
parent 491a20299b
commit 37ae208d98
5 changed files with 572 additions and 2 deletions

View File

@ -0,0 +1,9 @@
---
title: 剖析虚幻渲染体系09- 材质体系
date: 2024-02-04 21:44:37
excerpt:
tags:
rating: ⭐
---
# 前言
https://www.cnblogs.com/timlly/p/15109132.html

View File

@ -0,0 +1,9 @@
---
title: 剖析虚幻渲染体系08- Shader体系
date: 2024-02-04 21:44:10
excerpt:
tags:
rating: ⭐
---
# 前言
https://www.cnblogs.com/timlly/p/15092257.html

View File

@ -48,7 +48,10 @@ rating: ⭐
- D3D12存储了FD3D12Adapter* Adapter、FD3D12CommandContext* CmdContext、 FD3D12CommandContextRedirector* CmdContextRedirector。 - D3D12存储了FD3D12Adapter* Adapter、FD3D12CommandContext* CmdContext、 FD3D12CommandContextRedirector* CmdContextRedirector。
- FDynamicRHIFDynamicRHI是由动态绑定的RHI实现的接口它定义的接口和CommandList、CommandContext比较相似。 - FDynamicRHIFDynamicRHI是由动态绑定的RHI实现的接口它定义的接口和CommandList、CommandContext比较相似。
- 代码详见[[#FDynamicRHI]] - 代码详见[[#FDynamicRHI]]
- FRHICommandListExecutor - FRHICommandListExecutor负责将**Renderer层的RHI中间指令转译或直接调用到目标平台的图形API**它在RHI体系中起着举足轻重的作用。
- FParallelCommandListSet用于实现并行渲染。使用案例详见[[#FParallelCommandListSet]]。目前5.3只有下面2个子类
- FRDGParallelCommandListSet
- FShadowParallelCommandListSet
## FDynamicRHI ## FDynamicRHI
```c++ ```c++
@ -200,3 +203,535 @@ class RHI_API FDynamicRHI
>需要注意的是传统图形APID3D11、OpenGL除了继承FDynamicRHI还需要继承**IRHICommandContextPSOFallback**因为需要借助后者的接口处理PSO的数据和行为以保证传统和现代API对PSO的一致处理行为。也正因为此现代图形APID3D12、Vulkan、Metal不需要继承**IRHICommandContext**的任何继承体系的类型,单单直接继承**FDynamicRHI**就可以处理RHI层的所有数据和操作。 >需要注意的是传统图形APID3D11、OpenGL除了继承FDynamicRHI还需要继承**IRHICommandContextPSOFallback**因为需要借助后者的接口处理PSO的数据和行为以保证传统和现代API对PSO的一致处理行为。也正因为此现代图形APID3D12、Vulkan、Metal不需要继承**IRHICommandContext**的任何继承体系的类型,单单直接继承**FDynamicRHI**就可以处理RHI层的所有数据和操作。
既然现代图形APID3D12、Vulkan、Metal的**DynamicRHI**没有继承**IRHICommandContext**的任何继承体系的类型那么它们是如何实现FDynamicRHI::RHIGetDefaultContext的接口下面以FD3D12DynamicRHI为例 既然现代图形APID3D12、Vulkan、Metal的**DynamicRHI**没有继承**IRHICommandContext**的任何继承体系的类型那么它们是如何实现FDynamicRHI::RHIGetDefaultContext的接口下面以FD3D12DynamicRHI为例
## FParallelCommandListSet
```c++
//Engine\Source\Runtime\Renderer\Private\DepthRendering.cpp
void FDeferredShadingSceneRenderer::RenderPrePass(FRDGBuilder& GraphBuilder, FRDGTextureRef SceneDepthTexture, FInstanceCullingManager& InstanceCullingManager, FRDGTextureRef* FirstStageDepthBuffer)
{
RDG_EVENT_SCOPE(GraphBuilder, "PrePass %s %s", GetDepthDrawingModeString(DepthPass.EarlyZPassMode), GetDepthPassReason(DepthPass.bDitheredLODTransitionsUseStencil, ShaderPlatform));
RDG_CSV_STAT_EXCLUSIVE_SCOPE(GraphBuilder, RenderPrePass);
RDG_GPU_STAT_SCOPE(GraphBuilder, Prepass);
SCOPED_NAMED_EVENT(FDeferredShadingSceneRenderer_RenderPrePass, FColor::Emerald);
SCOPE_CYCLE_COUNTER(STAT_DepthDrawTime);
const bool bParallelDepthPass = GRHICommandList.UseParallelAlgorithms() && CVarParallelPrePass.GetValueOnRenderThread();
RenderPrePassHMD(GraphBuilder, SceneDepthTexture);
if (DepthPass.IsRasterStencilDitherEnabled())
{
AddDitheredStencilFillPass(GraphBuilder, Views, SceneDepthTexture, DepthPass);
}
auto RenderDepthPass = [&](uint8 DepthMeshPass)
{
check(DepthMeshPass == EMeshPass::DepthPass || DepthMeshPass == EMeshPass::SecondStageDepthPass);
const bool bSecondStageDepthPass = DepthMeshPass == EMeshPass::SecondStageDepthPass;
if (bParallelDepthPass)
{
RDG_WAIT_FOR_TASKS_CONDITIONAL(GraphBuilder, IsDepthPassWaitForTasksEnabled());
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ++ViewIndex)
{
FViewInfo& View = Views[ViewIndex];
RDG_GPU_MASK_SCOPE(GraphBuilder, View.GPUMask);
RDG_EVENT_SCOPE_CONDITIONAL(GraphBuilder, Views.Num() > 1, "View%d", ViewIndex);
FMeshPassProcessorRenderState DrawRenderState;
SetupDepthPassState(DrawRenderState);
const bool bShouldRenderView = View.ShouldRenderView() && (bSecondStageDepthPass ? View.bUsesSecondStageDepthPass : true);
if (bShouldRenderView)
{
View.BeginRenderView();
FDepthPassParameters* PassParameters = GetDepthPassParameters(GraphBuilder, View, SceneDepthTexture);
View.ParallelMeshDrawCommandPasses[DepthMeshPass].BuildRenderingCommands(GraphBuilder, Scene->GPUScene, PassParameters->InstanceCullingDrawParams);
GraphBuilder.AddPass(
bSecondStageDepthPass ? RDG_EVENT_NAME("SecondStageDepthPassParallel") : RDG_EVENT_NAME("DepthPassParallel"),
PassParameters,
ERDGPassFlags::Raster | ERDGPassFlags::SkipRenderPass,
[this, &View, PassParameters, DepthMeshPass](const FRDGPass* InPass, FRHICommandListImmediate& RHICmdList)
{
//并行渲染逻辑主要在这里
FRDGParallelCommandListSet ParallelCommandListSet(InPass, RHICmdList, GET_STATID(STAT_CLP_Prepass), View, FParallelCommandListBindings(PassParameters));
ParallelCommandListSet.SetHighPriority();
View.ParallelMeshDrawCommandPasses[DepthMeshPass].DispatchDraw(&ParallelCommandListSet, RHICmdList, &PassParameters->InstanceCullingDrawParams);
});
RenderPrePassEditorPrimitives(GraphBuilder, View, PassParameters, DrawRenderState, DepthPass.EarlyZPassMode, InstanceCullingManager);
}
}
}
···
}
//Engine\Source\Runtime\Renderer\Private\MeshDrawCommands.cpp
void FParallelMeshDrawCommandPass::DispatchDraw(FParallelCommandListSet* ParallelCommandListSet, FRHICommandList& RHICmdList, const FInstanceCullingDrawParams* InstanceCullingDrawParams) const
{
TRACE_CPUPROFILER_EVENT_SCOPE(ParallelMdcDispatchDraw);
if (MaxNumDraws <= 0)
{
return;
}
FMeshDrawCommandOverrideArgs OverrideArgs;
if (InstanceCullingDrawParams)
{
OverrideArgs = GetMeshDrawCommandOverrideArgs(*InstanceCullingDrawParams);
}
if (ParallelCommandListSet)
{
const ENamedThreads::Type RenderThread = ENamedThreads::GetRenderThread();
FGraphEventArray Prereqs;
if (ParallelCommandListSet->GetPrereqs())
{
Prereqs.Append(*ParallelCommandListSet->GetPrereqs());
}
if (TaskEventRef.IsValid())
{
Prereqs.Add(TaskEventRef);
}
// Distribute work evenly to the available task graph workers based on NumEstimatedDraws.
// Every task will then adjust it's working range based on FVisibleMeshDrawCommandProcessTask results.
const int32 NumThreads = FMath::Min<int32>(FTaskGraphInterface::Get().GetNumWorkerThreads(), ParallelCommandListSet->Width);
const int32 NumTasks = FMath::Min<int32>(NumThreads, FMath::DivideAndRoundUp(MaxNumDraws, ParallelCommandListSet->MinDrawsPerCommandList));
const int32 NumDrawsPerTask = FMath::DivideAndRoundUp(MaxNumDraws, NumTasks);
for (int32 TaskIndex = 0; TaskIndex < NumTasks; TaskIndex++)
{
const int32 StartIndex = TaskIndex * NumDrawsPerTask;
const int32 NumDraws = FMath::Min(NumDrawsPerTask, MaxNumDraws - StartIndex);
checkSlow(NumDraws > 0);
FRHICommandList* CmdList = ParallelCommandListSet->NewParallelCommandList();
FGraphEventRef AnyThreadCompletionEvent = TGraphTask<FDrawVisibleMeshCommandsAnyThreadTask>::CreateTask(&Prereqs, RenderThread)
.ConstructAndDispatchWhenReady(*CmdList, TaskContext.InstanceCullingContext, TaskContext.MeshDrawCommands, TaskContext.MinimalPipelineStatePassSet,
OverrideArgs,
TaskContext.InstanceFactor,
TaskIndex, NumTasks);
ParallelCommandListSet->AddParallelCommandList(CmdList, AnyThreadCompletionEvent, NumDraws);
}
}
else
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_MeshPassDrawImmediate);
WaitForMeshPassSetupTask(IsInActualRenderingThread() ? EWaitThread::Render : EWaitThread::Task);
if (TaskContext.bUseGPUScene)
{
if (TaskContext.MeshDrawCommands.Num() > 0)
{
TaskContext.InstanceCullingContext.SubmitDrawCommands(
TaskContext.MeshDrawCommands,
TaskContext.MinimalPipelineStatePassSet,
OverrideArgs,
0,
TaskContext.MeshDrawCommands.Num(),
TaskContext.InstanceFactor,
RHICmdList);
}
}
else
{
SubmitMeshDrawCommandsRange(TaskContext.MeshDrawCommands, TaskContext.MinimalPipelineStatePassSet, nullptr, 0, 0, TaskContext.bDynamicInstancing, 0, TaskContext.MeshDrawCommands.Num(), TaskContext.InstanceFactor, RHICmdList);
}
}
}
```
## 普通Pass渲染
```c++
// 代码为UE5旧版本代码
// Engine\Source\Runtime\RHI\Public\RHIResources.h
// 渲染通道信息.
struct FRHIRenderPassInfo
{
// 渲染纹理信息.
struct FColorEntry
{
FRHITexture* RenderTarget;
FRHITexture* ResolveTarget;
int32 ArraySlice;
uint8 MipIndex;
ERenderTargetActions Action;
};
FColorEntry ColorRenderTargets[MaxSimultaneousRenderTargets];
// 深度模板信息.
struct FDepthStencilEntry
{
FRHITexture* DepthStencilTarget;
FRHITexture* ResolveTarget;
EDepthStencilTargetActions Action;
FExclusiveDepthStencil ExclusiveDepthStencil;
};
FDepthStencilEntry DepthStencilRenderTarget;
// 解析参数.
FResolveParams ResolveParameters;
// 部分RHI可以使用纹理来控制不同区域的采样和/或阴影分辨率
FTextureRHIRef FoveationTexture = nullptr;
// 部分RHI需要一个提示遮挡查询将在这个渲染通道中使用
uint32 NumOcclusionQueries = 0;
bool bOcclusionQueries = false;
// 部分RHI需要知道在为部分资源转换生成mip映射的情况下这个渲染通道是否将读取和写入相同的纹理.
bool bGeneratingMips = false;
// 如果这个renderpass应该是多视图则需要多少视图.
uint8 MultiViewCount = 0;
// 部分RHI的提示渲染通道将有特定的子通道.
ESubpassHint SubpassHint = ESubpassHint::None;
// 是否太多UAV.
bool bTooManyUAVs = false;
bool bIsMSAA = false;
// 不同的构造函数.
// Color, no depth, optional resolve, optional mip, optional array slice
explicit FRHIRenderPassInfo(FRHITexture* ColorRT, ERenderTargetActions ColorAction, FRHITexture* ResolveRT = nullptr, uint32 InMipIndex = 0, int32 InArraySlice = -1);
// Color MRTs, no depth
explicit FRHIRenderPassInfo(int32 NumColorRTs, FRHITexture* ColorRTs[], ERenderTargetActions ColorAction);
// Color MRTs, no depth
explicit FRHIRenderPassInfo(int32 NumColorRTs, FRHITexture* ColorRTs[], ERenderTargetActions ColorAction, FRHITexture* ResolveTargets[]);
// Color MRTs and depth
explicit FRHIRenderPassInfo(int32 NumColorRTs, FRHITexture* ColorRTs[], ERenderTargetActions ColorAction, FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite);
// Color MRTs and depth
explicit FRHIRenderPassInfo(int32 NumColorRTs, FRHITexture* ColorRTs[], ERenderTargetActions ColorAction, FRHITexture* ResolveRTs[], FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FRHITexture* ResolveDepthRT, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite);
// Depth, no color
explicit FRHIRenderPassInfo(FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FRHITexture* ResolveDepthRT = nullptr, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite);
// Depth, no color, occlusion queries
explicit FRHIRenderPassInfo(FRHITexture* DepthRT, uint32 InNumOcclusionQueries, EDepthStencilTargetActions DepthActions, FRHITexture* ResolveDepthRT = nullptr, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite);
// Color and depth
explicit FRHIRenderPassInfo(FRHITexture* ColorRT, ERenderTargetActions ColorAction, FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite);
// Color and depth with resolve
explicit FRHIRenderPassInfo(FRHITexture* ColorRT, ERenderTargetActions ColorAction, FRHITexture* ResolveColorRT,
FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FRHITexture* ResolveDepthRT, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite);
// Color and depth with resolve and optional sample density
explicit FRHIRenderPassInfo(FRHITexture* ColorRT, ERenderTargetActions ColorAction, FRHITexture* ResolveColorRT,
FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FRHITexture* ResolveDepthRT, FRHITexture* InFoveationTexture, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite);
enum ENoRenderTargets
{
NoRenderTargets,
};
explicit FRHIRenderPassInfo(ENoRenderTargets Dummy);
explicit FRHIRenderPassInfo();
inline int32 GetNumColorRenderTargets() const;
RHI_API void Validate() const;
RHI_API void ConvertToRenderTargetsInfo(FRHISetRenderTargetsInfo& OutRTInfo) const;
(......)
};
// Engine\Source\Runtime\RHI\Public\RHICommandList.h
class RHI_API FRHICommandList : public FRHIComputeCommandList
{
public:
void BeginRenderPass(const FRHIRenderPassInfo& InInfo, const TCHAR* Name)
{
if (InInfo.bTooManyUAVs)
{
UE_LOG(LogRHI, Warning, TEXT("RenderPass %s has too many UAVs"));
}
InInfo.Validate();
// 直接调用RHI的接口.
if (Bypass())
{
GetContext().RHIBeginRenderPass(InInfo, Name);
}
// 分配RHI命令.
else
{
TCHAR* NameCopy = AllocString(Name);
ALLOC_COMMAND(FRHICommandBeginRenderPass)(InInfo, NameCopy);
}
// 设置在RenderPass内标记.
Data.bInsideRenderPass = true;
// 缓存活动的RT.
CacheActiveRenderTargets(InInfo);
// 重置子Pass.
ResetSubpass(InInfo.SubpassHint);
Data.bInsideRenderPass = true;
}
void EndRenderPass()
{
// 调用或分配RHI接口.
if (Bypass())
{
GetContext().RHIEndRenderPass();
}
else
{
ALLOC_COMMAND(FRHICommandEndRenderPass)();
}
// 重置在RenderPass内标记.
Data.bInsideRenderPass = false;
// 重置子Pass标记为None.
ResetSubpass(ESubpassHint::None);
}
};
```
它们的使用案例如下:
主要是`FRHIRenderPassInfo RenderPassInfo(1, ColorRTs, ERenderTargetActions::DontLoad_DontStore)``RHICmdList.BeginRenderPass(RenderPassInfo, TEXT("Test_MultiDrawIndirect"))`
```c++
bool FRHIDrawTests::Test_MultiDrawIndirect(FRHICommandListImmediate& RHICmdList)
{
if (!GRHIGlobals.SupportsMultiDrawIndirect)
{
return true;
}
// Probably could/should automatically enable in the outer scope when running RHI Unit Tests
// RenderCaptureInterface::FScopedCapture RenderCapture(true /*bEnable*/, &RHICmdList, TEXT("Test_MultiDrawIndirect"));
static constexpr uint32 MaxInstances = 8;
// D3D12 does not have a way to get the base instance ID (SV_InstanceID always starts from 0), so we must emulate it...
const uint32 InstanceIDs[MaxInstances] = { 0, 1, 2, 3, 4, 5, 6, 7 };
FBufferRHIRef InstanceIDBuffer = CreateBufferWithData(EBufferUsageFlags::VertexBuffer, ERHIAccess::VertexOrIndexBuffer, TEXT("Test_MultiDrawIndirect_InstanceID"), MakeArrayView(InstanceIDs));
FVertexDeclarationElementList VertexDeclarationElements;
VertexDeclarationElements.Add(FVertexElement(0, 0, VET_UInt, 0, 4, true /*per instance frequency*/));
FVertexDeclarationRHIRef VertexDeclarationRHI = PipelineStateCache::GetOrCreateVertexDeclaration(VertexDeclarationElements);
const uint16 Indices[3] = { 0, 1, 2 };
FBufferRHIRef IndexBuffer = CreateBufferWithData(EBufferUsageFlags::IndexBuffer, ERHIAccess::VertexOrIndexBuffer, TEXT("Test_MultiDrawIndirect_IndexBuffer"), MakeArrayView(Indices));
static constexpr uint32 OutputBufferStride = sizeof(uint32);
static constexpr uint32 OutputBufferSize = OutputBufferStride * MaxInstances;
FRHIResourceCreateInfo OutputBufferCreateInfo(TEXT("Test_MultiDrawIndirect_OutputBuffer"));
FBufferRHIRef OutputBuffer = RHICmdList.CreateBuffer(OutputBufferSize, EBufferUsageFlags::UnorderedAccess | EBufferUsageFlags::SourceCopy, OutputBufferStride, ERHIAccess::UAVCompute, OutputBufferCreateInfo);
const uint32 CountValues[4] = { 1, 1, 16, 0 };
FBufferRHIRef CountBuffer = CreateBufferWithData(EBufferUsageFlags::DrawIndirect | EBufferUsageFlags::UnorderedAccess, ERHIAccess::IndirectArgs, TEXT("Test_MultiDrawIndirect_Count"), MakeArrayView(CountValues));
const FRHIDrawIndexedIndirectParameters DrawArgs[] =
{
// IndexCountPerInstance, InstanceCount, StartIndexLocation, BaseVertexLocation, StartInstanceLocation
{3, 1, 0, 0, 0}, // fill slot 0
// gap in slot 1
{3, 2, 0, 0, 2}, // fill slots 2, 3 using 1 sub-draw
// gap in slot 4
{3, 1, 0, 0, 5}, // fill slots 5, 6 using 2 sub-draws
{3, 1, 0, 0, 6},
{3, 1, 0, 0, 7}, // this draw is expected to never execute
};
const uint32 ExpectedDrawnInstances[MaxInstances] = { 1, 0, 1, 1, 0, 1, 1, 0 };
FBufferRHIRef DrawArgBuffer = CreateBufferWithData(EBufferUsageFlags::DrawIndirect | EBufferUsageFlags::UnorderedAccess | EBufferUsageFlags::VertexBuffer, ERHIAccess::IndirectArgs,
TEXT("Test_MultiDrawIndirect_DrawArgs"), MakeArrayView(DrawArgs));
FUnorderedAccessViewRHIRef OutputBufferUAV = RHICmdList.CreateUnorderedAccessView(OutputBuffer,
FRHIViewDesc::CreateBufferUAV()
.SetType(FRHIViewDesc::EBufferType::Typed)
.SetFormat(PF_R32_UINT));
RHICmdList.ClearUAVUint(OutputBufferUAV, FUintVector4(0));
const FIntPoint RenderTargetSize(4, 4);
FRHITextureDesc RenderTargetTextureDesc(ETextureDimension::Texture2D, ETextureCreateFlags::RenderTargetable, PF_B8G8R8A8, FClearValueBinding(), RenderTargetSize, 1, 1, 1, 1, 0);
FRHITextureCreateDesc RenderTargetCreateDesc(RenderTargetTextureDesc, ERHIAccess::RTV, TEXT("Test_MultiDrawIndirect_RenderTarget"));
FTextureRHIRef RenderTarget = RHICreateTexture(RenderTargetCreateDesc);
TShaderMapRef<FTestDrawInstancedVS> VertexShader(GetGlobalShaderMap(GMaxRHIFeatureLevel));
TShaderMapRef<FTestDrawInstancedPS> PixelShader(GetGlobalShaderMap(GMaxRHIFeatureLevel));
FGraphicsPipelineStateInitializer GraphicsPSOInit;
GraphicsPSOInit.BoundShaderState.VertexShaderRHI = VertexShader.GetVertexShader();
GraphicsPSOInit.BoundShaderState.VertexDeclarationRHI = VertexDeclarationRHI;
GraphicsPSOInit.BoundShaderState.PixelShaderRHI = PixelShader.GetPixelShader();
GraphicsPSOInit.DepthStencilState = TStaticDepthStencilState<false, CF_Always>::GetRHI();
GraphicsPSOInit.BlendState = TStaticBlendState<>::GetRHI();
GraphicsPSOInit.RasterizerState = TStaticRasterizerState<>::GetRHI();
GraphicsPSOInit.PrimitiveType = EPrimitiveType::PT_TriangleList;
FRHITexture* ColorRTs[1] = { RenderTarget.GetReference() };
FRHIRenderPassInfo RenderPassInfo(1, ColorRTs, ERenderTargetActions::DontLoad_DontStore);
RHICmdList.Transition(FRHITransitionInfo(OutputBufferUAV, ERHIAccess::UAVCompute, ERHIAccess::UAVGraphics, EResourceTransitionFlags::None));
RHICmdList.BeginUAVOverlap(); // Output UAV can be written without syncs between draws (each draw is expected to write into different slots)
RHICmdList.BeginRenderPass(RenderPassInfo, TEXT("Test_MultiDrawIndirect"));
RHICmdList.SetViewport(0, 0, 0, float(RenderTargetSize.X), float(RenderTargetSize.Y), 1);
RHICmdList.ApplyCachedRenderTargets(GraphicsPSOInit);
SetGraphicsPipelineState(RHICmdList, GraphicsPSOInit, 0);
check(InstanceIDBuffer->GetStride() == 4);
RHICmdList.SetStreamSource(0, InstanceIDBuffer, 0);
FRHIBatchedShaderParameters ShaderParameters;
ShaderParameters.SetUAVParameter(PixelShader->OutDrawnInstances.GetBaseIndex(), OutputBufferUAV);
RHICmdList.SetBatchedShaderParameters(PixelShader.GetPixelShader(), ShaderParameters);
const uint32 DrawArgsStride = sizeof(DrawArgs[0]);
const uint32 CountStride = sizeof(CountValues[0]);
RHICmdList.MultiDrawIndexedPrimitiveIndirect(IndexBuffer,
DrawArgBuffer, DrawArgsStride*0, // 1 sub-draw with instance index 0
CountBuffer, CountStride*0, // count buffer contains 1 in this slot
5 // expect to draw only 1 instance due to GPU-side upper bound
);
RHICmdList.MultiDrawIndexedPrimitiveIndirect(IndexBuffer,
DrawArgBuffer, DrawArgsStride*1, // 1 sub-draw with 2 instances at base index 2
CountBuffer, CountStride*1, // count buffer contains 1 in this slot
4 // expect to draw only 1 instance due to GPU-side upper bound
);
RHICmdList.MultiDrawIndexedPrimitiveIndirect(IndexBuffer,
DrawArgBuffer, DrawArgsStride*2, // 2 sub-draws with 1 instance each starting at base index 5
CountBuffer, CountStride*2, // count buffer contains 16 in this slot
2 // expect to draw only 2 instances due to CPU-side upper bound
);
RHICmdList.MultiDrawIndexedPrimitiveIndirect(IndexBuffer,
DrawArgBuffer, DrawArgsStride*4, // 1 sub-draw with 1 instance each starting at base index 7
CountBuffer, CountStride*3, // count buffer contains 0 in this slot
1 // expect to skip the draw due to GPU-side count of 0
);
RHICmdList.MultiDrawIndexedPrimitiveIndirect(IndexBuffer,
DrawArgBuffer, DrawArgsStride*4, // 1 sub-draw with 1 instance each starting at base index 7
CountBuffer, CountStride*0, // count buffer contains 1 in this slot
0 // expect to skip the draw due to CPU-side count of 0
);
RHICmdList.EndRenderPass();
RHICmdList.EndUAVOverlap();
RHICmdList.Transition(FRHITransitionInfo(OutputBufferUAV, ERHIAccess::UAVGraphics, ERHIAccess::CopySrc, EResourceTransitionFlags::None));
TConstArrayView<uint8> ExpectedOutputView = MakeArrayView(reinterpret_cast<const uint8*>(ExpectedDrawnInstances), sizeof(ExpectedDrawnInstances));
bool bSucceeded = FRHIBufferTests::VerifyBufferContents(TEXT("Test_MultiDrawIndirect"), RHICmdList, OutputBuffer, ExpectedOutputView);
return bSucceeded;
}
```
## Subpass
先说一下Subpass的由来、作用和特点。
在传统的多Pass渲染中每个Pass结束时通常会渲染出一组渲染纹理部分成为着色器参数提供给下一个Pass采样读取。这种纹理采样方式不受任何限制可以读取任意的领域像素使用任意的纹理过滤方式。这种方式虽然使用灵活但在TBRTile-Based Renderer硬件架构的设备中会有较大的消耗渲染纹理的Pass通常会将渲染结果存储在On-chip的Tile Memory中待Pass结束后会写回GPU显存VRAM写回GPU显存是个耗时耗耗电的操作。
![](https://img2020.cnblogs.com/blog/1617944/202108/1617944-20210818142400565-369905116.jpg)
_传统多Pass之间的内存存取模型多次发生于On-Chip和全局存储器之间。_
如果出现一种特殊的纹理使用情况上一个Pass渲染处理的纹理立即被下一个Pass使用并且下一个Pass只采样像素位置自身的数据而不需要采样邻域像素的位置。这种情况就符合了Subpass的使用情景。使用Subpass渲染的纹理结果只会存储在Tile Memory中在Subpass结束后不会写回VRAM而直接提供Tile Memory的数据给下一个Subpass采样读取。这样就避免了传统Pass结束写回GPU显存以及下一个Pass又从GPU显存读数据的耗时耗电操作从而提升了性能。
![](https://img2020.cnblogs.com/blog/1617944/202108/1617944-20210818142406863-486058547.jpg)
_Subpass之间的内存存取模型都发生在On-Chip内。_
Subpass的相关代码主要集中在移动端中。UE涉及Subpass的接口和类型如下
```c++
// 提供给RHI的Subpass标记.
enum class ESubpassHint : uint8
{
None, // 传统渲染(非Subpass)
DepthReadSubpass, // 深度读取Subpass.
DeferredShadingSubpass, // 移动端延迟着色Subpass.
};
// Engine\Source\Runtime\RHI\Public\RHICommandList.h
class RHI_API FRHICommandListBase : public FNoncopyable
{
(......)
protected:
// PSO上下文.
struct FPSOContext
{
uint32 CachedNumSimultanousRenderTargets = 0;
TStaticArray<FRHIRenderTargetView, MaxSimultaneousRenderTargets> CachedRenderTargets;
FRHIDepthRenderTargetView CachedDepthStencilTarget;
// Subpass提示标记.
ESubpassHint SubpassHint = ESubpassHint::None;
uint8 SubpassIndex = 0;
uint8 MultiViewCount = 0;
bool HasFragmentDensityAttachment = false;
} PSOContext;
};
class RHI_API FRHICommandList : public FRHIComputeCommandList
{
public:
void BeginRenderPass(const FRHIRenderPassInfo& InInfo, const TCHAR* Name)
{
(......)
CacheActiveRenderTargets(InInfo);
// 设置Subpass数据.
ResetSubpass(InInfo.SubpassHint);
Data.bInsideRenderPass = true;
}
void EndRenderPass()
{
(......)
// 重置Subpass标记为None.
ResetSubpass(ESubpassHint::None);
}
// 下一个Subpass.
void NextSubpass()
{
// 分配或调用RHI接口.
if (Bypass())
{
GetContext().RHINextSubpass();
}
else
{
ALLOC_COMMAND(FRHICommandNextSubpass)();
}
// 增加Subpass计数.
IncrementSubpass();
}
// 增加subpass计数.
void IncrementSubpass()
{
PSOContext.SubpassIndex++;
}
// 重置Subpass数据.
void ResetSubpass(ESubpassHint SubpassHint)
{
PSOContext.SubpassHint = SubpassHint;
PSOContext.SubpassIndex = 0;
}
};
```

View File

@ -0,0 +1,9 @@
---
title: 剖析虚幻渲染体系11- RDG
date: 2024-02-04 21:42:54
excerpt:
tags:
rating: ⭐
---
# 前言
https://www.cnblogs.com/timlly/p/15217090.html

View File

@ -0,0 +1,8 @@
---
title: c++内存泄漏分析工具
date: 2024-02-04 21:15:26
excerpt:
tags:
rating: ⭐
---
https://github.com/dpull/tracing_malloc?tab=readme-ov-file