diff --git a/03-UnrealEngine/Rendering/RenderingPipeline/剖析虚幻渲染体系(09)- 材质体系.md b/03-UnrealEngine/Rendering/RenderingPipeline/剖析虚幻渲染体系(09)- 材质体系.md new file mode 100644 index 0000000..8a2dc5f --- /dev/null +++ b/03-UnrealEngine/Rendering/RenderingPipeline/剖析虚幻渲染体系(09)- 材质体系.md @@ -0,0 +1,9 @@ +--- +title: 剖析虚幻渲染体系(09)- 材质体系 +date: 2024-02-04 21:44:37 +excerpt: +tags: +rating: ⭐ +--- +# 前言 +https://www.cnblogs.com/timlly/p/15109132.html \ No newline at end of file diff --git a/03-UnrealEngine/Rendering/RenderingPipeline/向往渲染系列文章阅读笔记/剖析虚幻渲染体系(08)- Shader体系.md b/03-UnrealEngine/Rendering/RenderingPipeline/向往渲染系列文章阅读笔记/剖析虚幻渲染体系(08)- Shader体系.md new file mode 100644 index 0000000..12a7798 --- /dev/null +++ b/03-UnrealEngine/Rendering/RenderingPipeline/向往渲染系列文章阅读笔记/剖析虚幻渲染体系(08)- Shader体系.md @@ -0,0 +1,9 @@ +--- +title: 剖析虚幻渲染体系(08)- Shader体系 +date: 2024-02-04 21:44:10 +excerpt: +tags: +rating: ⭐ +--- +# 前言 +https://www.cnblogs.com/timlly/p/15092257.html \ No newline at end of file diff --git a/03-UnrealEngine/Rendering/RenderingPipeline/向往渲染系列文章阅读笔记/剖析虚幻渲染体系(10)- RHI.md b/03-UnrealEngine/Rendering/RenderingPipeline/向往渲染系列文章阅读笔记/剖析虚幻渲染体系(10)- RHI.md index bc5bd99..ec56472 100644 --- a/03-UnrealEngine/Rendering/RenderingPipeline/向往渲染系列文章阅读笔记/剖析虚幻渲染体系(10)- RHI.md +++ b/03-UnrealEngine/Rendering/RenderingPipeline/向往渲染系列文章阅读笔记/剖析虚幻渲染体系(10)- RHI.md @@ -48,7 +48,10 @@ rating: ⭐ - D3D12存储了FD3D12Adapter* Adapter、FD3D12CommandContext* CmdContext、 FD3D12CommandContextRedirector* CmdContextRedirector。 - FDynamicRHI:FDynamicRHI是由动态绑定的RHI实现的接口,它定义的接口和CommandList、CommandContext比较相似。 - 代码详见[[#FDynamicRHI]] -- FRHICommandListExecutor +- FRHICommandListExecutor:负责将**Renderer层的RHI中间指令转译(或直接调用)到目标平台的图形API**,它在RHI体系中起着举足轻重的作用。 +- FParallelCommandListSet:用于实现并行渲染。使用案例详见[[#FParallelCommandListSet]]。目前5.3只有下面2个子类: + - FRDGParallelCommandListSet + - FShadowParallelCommandListSet ## FDynamicRHI ```c++ @@ -199,4 +202,536 @@ class RHI_API FDynamicRHI 可以在**FRHICommandListImmediate**的**ExecuteCommandList()**、**BlockUntilGPUIdle()**、**Tick()** 看到调用。 >需要注意的是,传统图形API(D3D11、OpenGL)除了继承FDynamicRHI,还需要继承**IRHICommandContextPSOFallback**,因为需要借助后者的接口处理PSO的数据和行为,以保证传统和现代API对PSO的一致处理行为。也正因为此,现代图形API(D3D12、Vulkan、Metal)不需要继承**IRHICommandContext**的任何继承体系的类型,单单直接继承**FDynamicRHI**就可以处理RHI层的所有数据和操作。 -既然现代图形API(D3D12、Vulkan、Metal)的**DynamicRHI**没有继承**IRHICommandContext**的任何继承体系的类型,那么它们是如何实现FDynamicRHI::RHIGetDefaultContext的接口?下面以FD3D12DynamicRHI为例: \ No newline at end of file +既然现代图形API(D3D12、Vulkan、Metal)的**DynamicRHI**没有继承**IRHICommandContext**的任何继承体系的类型,那么它们是如何实现FDynamicRHI::RHIGetDefaultContext的接口?下面以FD3D12DynamicRHI为例: + +## FParallelCommandListSet +```c++ +//Engine\Source\Runtime\Renderer\Private\DepthRendering.cpp +void FDeferredShadingSceneRenderer::RenderPrePass(FRDGBuilder& GraphBuilder, FRDGTextureRef SceneDepthTexture, FInstanceCullingManager& InstanceCullingManager, FRDGTextureRef* FirstStageDepthBuffer) +{ + RDG_EVENT_SCOPE(GraphBuilder, "PrePass %s %s", GetDepthDrawingModeString(DepthPass.EarlyZPassMode), GetDepthPassReason(DepthPass.bDitheredLODTransitionsUseStencil, ShaderPlatform)); + RDG_CSV_STAT_EXCLUSIVE_SCOPE(GraphBuilder, RenderPrePass); + RDG_GPU_STAT_SCOPE(GraphBuilder, Prepass); + + SCOPED_NAMED_EVENT(FDeferredShadingSceneRenderer_RenderPrePass, FColor::Emerald); + SCOPE_CYCLE_COUNTER(STAT_DepthDrawTime); + + const bool bParallelDepthPass = GRHICommandList.UseParallelAlgorithms() && CVarParallelPrePass.GetValueOnRenderThread(); + + RenderPrePassHMD(GraphBuilder, SceneDepthTexture); + + if (DepthPass.IsRasterStencilDitherEnabled()) + { + AddDitheredStencilFillPass(GraphBuilder, Views, SceneDepthTexture, DepthPass); + } + + auto RenderDepthPass = [&](uint8 DepthMeshPass) + { + check(DepthMeshPass == EMeshPass::DepthPass || DepthMeshPass == EMeshPass::SecondStageDepthPass); + const bool bSecondStageDepthPass = DepthMeshPass == EMeshPass::SecondStageDepthPass; + + if (bParallelDepthPass) + { + RDG_WAIT_FOR_TASKS_CONDITIONAL(GraphBuilder, IsDepthPassWaitForTasksEnabled()); + + for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ++ViewIndex) + { + FViewInfo& View = Views[ViewIndex]; + RDG_GPU_MASK_SCOPE(GraphBuilder, View.GPUMask); + RDG_EVENT_SCOPE_CONDITIONAL(GraphBuilder, Views.Num() > 1, "View%d", ViewIndex); + + FMeshPassProcessorRenderState DrawRenderState; + SetupDepthPassState(DrawRenderState); + + const bool bShouldRenderView = View.ShouldRenderView() && (bSecondStageDepthPass ? View.bUsesSecondStageDepthPass : true); + if (bShouldRenderView) + { + View.BeginRenderView(); + + FDepthPassParameters* PassParameters = GetDepthPassParameters(GraphBuilder, View, SceneDepthTexture); + View.ParallelMeshDrawCommandPasses[DepthMeshPass].BuildRenderingCommands(GraphBuilder, Scene->GPUScene, PassParameters->InstanceCullingDrawParams); + + GraphBuilder.AddPass( + bSecondStageDepthPass ? RDG_EVENT_NAME("SecondStageDepthPassParallel") : RDG_EVENT_NAME("DepthPassParallel"), + PassParameters, + ERDGPassFlags::Raster | ERDGPassFlags::SkipRenderPass, + [this, &View, PassParameters, DepthMeshPass](const FRDGPass* InPass, FRHICommandListImmediate& RHICmdList) + { + //并行渲染逻辑主要在这里 + FRDGParallelCommandListSet ParallelCommandListSet(InPass, RHICmdList, GET_STATID(STAT_CLP_Prepass), View, FParallelCommandListBindings(PassParameters)); + ParallelCommandListSet.SetHighPriority(); + View.ParallelMeshDrawCommandPasses[DepthMeshPass].DispatchDraw(&ParallelCommandListSet, RHICmdList, &PassParameters->InstanceCullingDrawParams); + }); + + RenderPrePassEditorPrimitives(GraphBuilder, View, PassParameters, DrawRenderState, DepthPass.EarlyZPassMode, InstanceCullingManager); + } + } + } +··· +} + +//Engine\Source\Runtime\Renderer\Private\MeshDrawCommands.cpp +void FParallelMeshDrawCommandPass::DispatchDraw(FParallelCommandListSet* ParallelCommandListSet, FRHICommandList& RHICmdList, const FInstanceCullingDrawParams* InstanceCullingDrawParams) const +{ + TRACE_CPUPROFILER_EVENT_SCOPE(ParallelMdcDispatchDraw); + if (MaxNumDraws <= 0) + { + return; + } + + FMeshDrawCommandOverrideArgs OverrideArgs; + if (InstanceCullingDrawParams) + { + OverrideArgs = GetMeshDrawCommandOverrideArgs(*InstanceCullingDrawParams); + } + + if (ParallelCommandListSet) + { + const ENamedThreads::Type RenderThread = ENamedThreads::GetRenderThread(); + + FGraphEventArray Prereqs; + if (ParallelCommandListSet->GetPrereqs()) + { + Prereqs.Append(*ParallelCommandListSet->GetPrereqs()); + } + if (TaskEventRef.IsValid()) + { + Prereqs.Add(TaskEventRef); + } + + // Distribute work evenly to the available task graph workers based on NumEstimatedDraws. + // Every task will then adjust it's working range based on FVisibleMeshDrawCommandProcessTask results. + const int32 NumThreads = FMath::Min(FTaskGraphInterface::Get().GetNumWorkerThreads(), ParallelCommandListSet->Width); + const int32 NumTasks = FMath::Min(NumThreads, FMath::DivideAndRoundUp(MaxNumDraws, ParallelCommandListSet->MinDrawsPerCommandList)); + const int32 NumDrawsPerTask = FMath::DivideAndRoundUp(MaxNumDraws, NumTasks); + + for (int32 TaskIndex = 0; TaskIndex < NumTasks; TaskIndex++) + { + const int32 StartIndex = TaskIndex * NumDrawsPerTask; + const int32 NumDraws = FMath::Min(NumDrawsPerTask, MaxNumDraws - StartIndex); + checkSlow(NumDraws > 0); + + FRHICommandList* CmdList = ParallelCommandListSet->NewParallelCommandList(); + + FGraphEventRef AnyThreadCompletionEvent = TGraphTask::CreateTask(&Prereqs, RenderThread) + .ConstructAndDispatchWhenReady(*CmdList, TaskContext.InstanceCullingContext, TaskContext.MeshDrawCommands, TaskContext.MinimalPipelineStatePassSet, + OverrideArgs, + TaskContext.InstanceFactor, + TaskIndex, NumTasks); + + ParallelCommandListSet->AddParallelCommandList(CmdList, AnyThreadCompletionEvent, NumDraws); + } + } + else + { + QUICK_SCOPE_CYCLE_COUNTER(STAT_MeshPassDrawImmediate); + + WaitForMeshPassSetupTask(IsInActualRenderingThread() ? EWaitThread::Render : EWaitThread::Task); + + if (TaskContext.bUseGPUScene) + { + if (TaskContext.MeshDrawCommands.Num() > 0) + { + TaskContext.InstanceCullingContext.SubmitDrawCommands( + TaskContext.MeshDrawCommands, + TaskContext.MinimalPipelineStatePassSet, + OverrideArgs, + 0, + TaskContext.MeshDrawCommands.Num(), + TaskContext.InstanceFactor, + RHICmdList); + } + } + else + { + SubmitMeshDrawCommandsRange(TaskContext.MeshDrawCommands, TaskContext.MinimalPipelineStatePassSet, nullptr, 0, 0, TaskContext.bDynamicInstancing, 0, TaskContext.MeshDrawCommands.Num(), TaskContext.InstanceFactor, RHICmdList); + } + } +} +``` + +## 普通Pass渲染 +```c++ +// 代码为UE5旧版本代码 +// Engine\Source\Runtime\RHI\Public\RHIResources.h + +// 渲染通道信息. +struct FRHIRenderPassInfo +{ + // 渲染纹理信息. + struct FColorEntry + { + FRHITexture* RenderTarget; + FRHITexture* ResolveTarget; + int32 ArraySlice; + uint8 MipIndex; + ERenderTargetActions Action; + }; + FColorEntry ColorRenderTargets[MaxSimultaneousRenderTargets]; + + // 深度模板信息. + struct FDepthStencilEntry + { + FRHITexture* DepthStencilTarget; + FRHITexture* ResolveTarget; + EDepthStencilTargetActions Action; + FExclusiveDepthStencil ExclusiveDepthStencil; + }; + FDepthStencilEntry DepthStencilRenderTarget; + + // 解析参数. + FResolveParams ResolveParameters; + + // 部分RHI可以使用纹理来控制不同区域的采样和/或阴影分辨率 + FTextureRHIRef FoveationTexture = nullptr; + + // 部分RHI需要一个提示,遮挡查询将在这个渲染通道中使用 + uint32 NumOcclusionQueries = 0; + bool bOcclusionQueries = false; + + // 部分RHI需要知道,在为部分资源转换生成mip映射的情况下,这个渲染通道是否将读取和写入相同的纹理. + bool bGeneratingMips = false; + + // 如果这个renderpass应该是多视图,则需要多少视图. + uint8 MultiViewCount = 0; + + // 部分RHI的提示,渲染通道将有特定的子通道. + ESubpassHint SubpassHint = ESubpassHint::None; + + // 是否太多UAV. + bool bTooManyUAVs = false; + bool bIsMSAA = false; + + // 不同的构造函数. + + // Color, no depth, optional resolve, optional mip, optional array slice + explicit FRHIRenderPassInfo(FRHITexture* ColorRT, ERenderTargetActions ColorAction, FRHITexture* ResolveRT = nullptr, uint32 InMipIndex = 0, int32 InArraySlice = -1); + // Color MRTs, no depth + explicit FRHIRenderPassInfo(int32 NumColorRTs, FRHITexture* ColorRTs[], ERenderTargetActions ColorAction); + // Color MRTs, no depth + explicit FRHIRenderPassInfo(int32 NumColorRTs, FRHITexture* ColorRTs[], ERenderTargetActions ColorAction, FRHITexture* ResolveTargets[]); + // Color MRTs and depth + explicit FRHIRenderPassInfo(int32 NumColorRTs, FRHITexture* ColorRTs[], ERenderTargetActions ColorAction, FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite); + // Color MRTs and depth + explicit FRHIRenderPassInfo(int32 NumColorRTs, FRHITexture* ColorRTs[], ERenderTargetActions ColorAction, FRHITexture* ResolveRTs[], FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FRHITexture* ResolveDepthRT, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite); + // Depth, no color + explicit FRHIRenderPassInfo(FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FRHITexture* ResolveDepthRT = nullptr, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite); + // Depth, no color, occlusion queries + explicit FRHIRenderPassInfo(FRHITexture* DepthRT, uint32 InNumOcclusionQueries, EDepthStencilTargetActions DepthActions, FRHITexture* ResolveDepthRT = nullptr, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite); + // Color and depth + explicit FRHIRenderPassInfo(FRHITexture* ColorRT, ERenderTargetActions ColorAction, FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite); + // Color and depth with resolve + explicit FRHIRenderPassInfo(FRHITexture* ColorRT, ERenderTargetActions ColorAction, FRHITexture* ResolveColorRT, + FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FRHITexture* ResolveDepthRT, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite); + // Color and depth with resolve and optional sample density + explicit FRHIRenderPassInfo(FRHITexture* ColorRT, ERenderTargetActions ColorAction, FRHITexture* ResolveColorRT, + FRHITexture* DepthRT, EDepthStencilTargetActions DepthActions, FRHITexture* ResolveDepthRT, FRHITexture* InFoveationTexture, FExclusiveDepthStencil InEDS = FExclusiveDepthStencil::DepthWrite_StencilWrite); + + enum ENoRenderTargets + { + NoRenderTargets, + }; + explicit FRHIRenderPassInfo(ENoRenderTargets Dummy); + explicit FRHIRenderPassInfo(); + + inline int32 GetNumColorRenderTargets() const; + RHI_API void Validate() const; + RHI_API void ConvertToRenderTargetsInfo(FRHISetRenderTargetsInfo& OutRTInfo) const; + + (......) +}; + +// Engine\Source\Runtime\RHI\Public\RHICommandList.h + +class RHI_API FRHICommandList : public FRHIComputeCommandList +{ +public: + void BeginRenderPass(const FRHIRenderPassInfo& InInfo, const TCHAR* Name) + { + if (InInfo.bTooManyUAVs) + { + UE_LOG(LogRHI, Warning, TEXT("RenderPass %s has too many UAVs")); + } + InInfo.Validate(); + + // 直接调用RHI的接口. + if (Bypass()) + { + GetContext().RHIBeginRenderPass(InInfo, Name); + } + // 分配RHI命令. + else + { + TCHAR* NameCopy = AllocString(Name); + ALLOC_COMMAND(FRHICommandBeginRenderPass)(InInfo, NameCopy); + } + // 设置在RenderPass内标记. + Data.bInsideRenderPass = true; + + // 缓存活动的RT. + CacheActiveRenderTargets(InInfo); + // 重置子Pass. + ResetSubpass(InInfo.SubpassHint); + Data.bInsideRenderPass = true; + } + + void EndRenderPass() + { + // 调用或分配RHI接口. + if (Bypass()) + { + GetContext().RHIEndRenderPass(); + } + else + { + ALLOC_COMMAND(FRHICommandEndRenderPass)(); + } + // 重置在RenderPass内标记. + Data.bInsideRenderPass = false; + // 重置子Pass标记为None. + ResetSubpass(ESubpassHint::None); + } +}; +``` + +它们的使用案例如下: +主要是`FRHIRenderPassInfo RenderPassInfo(1, ColorRTs, ERenderTargetActions::DontLoad_DontStore)`与`RHICmdList.BeginRenderPass(RenderPassInfo, TEXT("Test_MultiDrawIndirect"))` +```c++ +bool FRHIDrawTests::Test_MultiDrawIndirect(FRHICommandListImmediate& RHICmdList) +{ + if (!GRHIGlobals.SupportsMultiDrawIndirect) + { + return true; + } + + // Probably could/should automatically enable in the outer scope when running RHI Unit Tests + // RenderCaptureInterface::FScopedCapture RenderCapture(true /*bEnable*/, &RHICmdList, TEXT("Test_MultiDrawIndirect")); + + static constexpr uint32 MaxInstances = 8; + + // D3D12 does not have a way to get the base instance ID (SV_InstanceID always starts from 0), so we must emulate it... + const uint32 InstanceIDs[MaxInstances] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + FBufferRHIRef InstanceIDBuffer = CreateBufferWithData(EBufferUsageFlags::VertexBuffer, ERHIAccess::VertexOrIndexBuffer, TEXT("Test_MultiDrawIndirect_InstanceID"), MakeArrayView(InstanceIDs)); + + FVertexDeclarationElementList VertexDeclarationElements; + VertexDeclarationElements.Add(FVertexElement(0, 0, VET_UInt, 0, 4, true /*per instance frequency*/)); + FVertexDeclarationRHIRef VertexDeclarationRHI = PipelineStateCache::GetOrCreateVertexDeclaration(VertexDeclarationElements); + + const uint16 Indices[3] = { 0, 1, 2 }; + FBufferRHIRef IndexBuffer = CreateBufferWithData(EBufferUsageFlags::IndexBuffer, ERHIAccess::VertexOrIndexBuffer, TEXT("Test_MultiDrawIndirect_IndexBuffer"), MakeArrayView(Indices)); + + static constexpr uint32 OutputBufferStride = sizeof(uint32); + static constexpr uint32 OutputBufferSize = OutputBufferStride * MaxInstances; + FRHIResourceCreateInfo OutputBufferCreateInfo(TEXT("Test_MultiDrawIndirect_OutputBuffer")); + FBufferRHIRef OutputBuffer = RHICmdList.CreateBuffer(OutputBufferSize, EBufferUsageFlags::UnorderedAccess | EBufferUsageFlags::SourceCopy, OutputBufferStride, ERHIAccess::UAVCompute, OutputBufferCreateInfo); + + const uint32 CountValues[4] = { 1, 1, 16, 0 }; + FBufferRHIRef CountBuffer = CreateBufferWithData(EBufferUsageFlags::DrawIndirect | EBufferUsageFlags::UnorderedAccess, ERHIAccess::IndirectArgs, TEXT("Test_MultiDrawIndirect_Count"), MakeArrayView(CountValues)); + + const FRHIDrawIndexedIndirectParameters DrawArgs[] = + { + // IndexCountPerInstance, InstanceCount, StartIndexLocation, BaseVertexLocation, StartInstanceLocation + {3, 1, 0, 0, 0}, // fill slot 0 + // gap in slot 1 + {3, 2, 0, 0, 2}, // fill slots 2, 3 using 1 sub-draw + // gap in slot 4 + {3, 1, 0, 0, 5}, // fill slots 5, 6 using 2 sub-draws + {3, 1, 0, 0, 6}, + {3, 1, 0, 0, 7}, // this draw is expected to never execute + }; + + const uint32 ExpectedDrawnInstances[MaxInstances] = { 1, 0, 1, 1, 0, 1, 1, 0 }; + + FBufferRHIRef DrawArgBuffer = CreateBufferWithData(EBufferUsageFlags::DrawIndirect | EBufferUsageFlags::UnorderedAccess | EBufferUsageFlags::VertexBuffer, ERHIAccess::IndirectArgs, + TEXT("Test_MultiDrawIndirect_DrawArgs"), MakeArrayView(DrawArgs)); + + FUnorderedAccessViewRHIRef OutputBufferUAV = RHICmdList.CreateUnorderedAccessView(OutputBuffer, + FRHIViewDesc::CreateBufferUAV() + .SetType(FRHIViewDesc::EBufferType::Typed) + .SetFormat(PF_R32_UINT)); + + RHICmdList.ClearUAVUint(OutputBufferUAV, FUintVector4(0)); + + const FIntPoint RenderTargetSize(4, 4); + FRHITextureDesc RenderTargetTextureDesc(ETextureDimension::Texture2D, ETextureCreateFlags::RenderTargetable, PF_B8G8R8A8, FClearValueBinding(), RenderTargetSize, 1, 1, 1, 1, 0); + FRHITextureCreateDesc RenderTargetCreateDesc(RenderTargetTextureDesc, ERHIAccess::RTV, TEXT("Test_MultiDrawIndirect_RenderTarget")); + FTextureRHIRef RenderTarget = RHICreateTexture(RenderTargetCreateDesc); + + TShaderMapRef VertexShader(GetGlobalShaderMap(GMaxRHIFeatureLevel)); + TShaderMapRef PixelShader(GetGlobalShaderMap(GMaxRHIFeatureLevel)); + + FGraphicsPipelineStateInitializer GraphicsPSOInit; + + GraphicsPSOInit.BoundShaderState.VertexShaderRHI = VertexShader.GetVertexShader(); + GraphicsPSOInit.BoundShaderState.VertexDeclarationRHI = VertexDeclarationRHI; + GraphicsPSOInit.BoundShaderState.PixelShaderRHI = PixelShader.GetPixelShader(); + GraphicsPSOInit.DepthStencilState = TStaticDepthStencilState::GetRHI(); + GraphicsPSOInit.BlendState = TStaticBlendState<>::GetRHI(); + GraphicsPSOInit.RasterizerState = TStaticRasterizerState<>::GetRHI(); + GraphicsPSOInit.PrimitiveType = EPrimitiveType::PT_TriangleList; + + FRHITexture* ColorRTs[1] = { RenderTarget.GetReference() }; + FRHIRenderPassInfo RenderPassInfo(1, ColorRTs, ERenderTargetActions::DontLoad_DontStore); + + RHICmdList.Transition(FRHITransitionInfo(OutputBufferUAV, ERHIAccess::UAVCompute, ERHIAccess::UAVGraphics, EResourceTransitionFlags::None)); + RHICmdList.BeginUAVOverlap(); // Output UAV can be written without syncs between draws (each draw is expected to write into different slots) + + RHICmdList.BeginRenderPass(RenderPassInfo, TEXT("Test_MultiDrawIndirect")); + RHICmdList.SetViewport(0, 0, 0, float(RenderTargetSize.X), float(RenderTargetSize.Y), 1); + + RHICmdList.ApplyCachedRenderTargets(GraphicsPSOInit); + SetGraphicsPipelineState(RHICmdList, GraphicsPSOInit, 0); + + check(InstanceIDBuffer->GetStride() == 4); + RHICmdList.SetStreamSource(0, InstanceIDBuffer, 0); + + FRHIBatchedShaderParameters ShaderParameters; + ShaderParameters.SetUAVParameter(PixelShader->OutDrawnInstances.GetBaseIndex(), OutputBufferUAV); + RHICmdList.SetBatchedShaderParameters(PixelShader.GetPixelShader(), ShaderParameters); + + const uint32 DrawArgsStride = sizeof(DrawArgs[0]); + const uint32 CountStride = sizeof(CountValues[0]); + + RHICmdList.MultiDrawIndexedPrimitiveIndirect(IndexBuffer, + DrawArgBuffer, DrawArgsStride*0, // 1 sub-draw with instance index 0 + CountBuffer, CountStride*0, // count buffer contains 1 in this slot + 5 // expect to draw only 1 instance due to GPU-side upper bound + ); + + RHICmdList.MultiDrawIndexedPrimitiveIndirect(IndexBuffer, + DrawArgBuffer, DrawArgsStride*1, // 1 sub-draw with 2 instances at base index 2 + CountBuffer, CountStride*1, // count buffer contains 1 in this slot + 4 // expect to draw only 1 instance due to GPU-side upper bound + ); + + RHICmdList.MultiDrawIndexedPrimitiveIndirect(IndexBuffer, + DrawArgBuffer, DrawArgsStride*2, // 2 sub-draws with 1 instance each starting at base index 5 + CountBuffer, CountStride*2, // count buffer contains 16 in this slot + 2 // expect to draw only 2 instances due to CPU-side upper bound + ); + + RHICmdList.MultiDrawIndexedPrimitiveIndirect(IndexBuffer, + DrawArgBuffer, DrawArgsStride*4, // 1 sub-draw with 1 instance each starting at base index 7 + CountBuffer, CountStride*3, // count buffer contains 0 in this slot + 1 // expect to skip the draw due to GPU-side count of 0 + ); + + RHICmdList.MultiDrawIndexedPrimitiveIndirect(IndexBuffer, + DrawArgBuffer, DrawArgsStride*4, // 1 sub-draw with 1 instance each starting at base index 7 + CountBuffer, CountStride*0, // count buffer contains 1 in this slot + 0 // expect to skip the draw due to CPU-side count of 0 + ); + + RHICmdList.EndRenderPass(); + + RHICmdList.EndUAVOverlap(); + + RHICmdList.Transition(FRHITransitionInfo(OutputBufferUAV, ERHIAccess::UAVGraphics, ERHIAccess::CopySrc, EResourceTransitionFlags::None)); + + TConstArrayView ExpectedOutputView = MakeArrayView(reinterpret_cast(ExpectedDrawnInstances), sizeof(ExpectedDrawnInstances)); + bool bSucceeded = FRHIBufferTests::VerifyBufferContents(TEXT("Test_MultiDrawIndirect"), RHICmdList, OutputBuffer, ExpectedOutputView); + + return bSucceeded; +} +``` + +## Subpass +先说一下Subpass的由来、作用和特点。 + +在传统的多Pass渲染中,每个Pass结束时通常会渲染出一组渲染纹理,部分成为着色器参数提供给下一个Pass采样读取。这种纹理采样方式不受任何限制,可以读取任意的领域像素,使用任意的纹理过滤方式。这种方式虽然使用灵活,但在TBR(Tile-Based Renderer)硬件架构的设备中会有较大的消耗:渲染纹理的Pass通常会将渲染结果存储在On-chip的Tile Memory中,待Pass结束后会写回GPU显存(VRAM)中,写回GPU显存是个耗时耗耗电的操作。 + +![](https://img2020.cnblogs.com/blog/1617944/202108/1617944-20210818142400565-369905116.jpg) + +_传统多Pass之间的内存存取模型,多次发生于On-Chip和全局存储器之间。_ + +如果出现一种特殊的纹理使用情况:上一个Pass渲染处理的纹理,立即被下一个Pass使用,并且下一个Pass只采样像素位置自身的数据,而不需要采样邻域像素的位置。这种情况就符合了Subpass的使用情景。使用Subpass渲染的纹理结果只会存储在Tile Memory中,在Subpass结束后不会写回VRAM,而直接提供Tile Memory的数据给下一个Subpass采样读取。这样就避免了传统Pass结束写回GPU显存以及下一个Pass又从GPU显存读数据的耗时耗电操作,从而提升了性能。 + +![](https://img2020.cnblogs.com/blog/1617944/202108/1617944-20210818142406863-486058547.jpg) + +_Subpass之间的内存存取模型,都发生在On-Chip内。_ + +Subpass的相关代码主要集中在移动端中。UE涉及Subpass的接口和类型如下: +```c++ +// 提供给RHI的Subpass标记. +enum class ESubpassHint : uint8 +{ + None, // 传统渲染(非Subpass) + DepthReadSubpass, // 深度读取Subpass. + DeferredShadingSubpass, // 移动端延迟着色Subpass. +}; + + +// Engine\Source\Runtime\RHI\Public\RHICommandList.h + +class RHI_API FRHICommandListBase : public FNoncopyable +{ + (......) + +protected: + // PSO上下文. + struct FPSOContext + { + uint32 CachedNumSimultanousRenderTargets = 0; + TStaticArray CachedRenderTargets; + FRHIDepthRenderTargetView CachedDepthStencilTarget; + + // Subpass提示标记. + ESubpassHint SubpassHint = ESubpassHint::None; + uint8 SubpassIndex = 0; + uint8 MultiViewCount = 0; + bool HasFragmentDensityAttachment = false; + } PSOContext; +}; + +class RHI_API FRHICommandList : public FRHIComputeCommandList +{ +public: + void BeginRenderPass(const FRHIRenderPassInfo& InInfo, const TCHAR* Name) + { + (......) + + CacheActiveRenderTargets(InInfo); + // 设置Subpass数据. + ResetSubpass(InInfo.SubpassHint); + Data.bInsideRenderPass = true; + } + + void EndRenderPass() + { + (......) + + // 重置Subpass标记为None. + ResetSubpass(ESubpassHint::None); + } + + // 下一个Subpass. + void NextSubpass() + { + // 分配或调用RHI接口. + if (Bypass()) + { + GetContext().RHINextSubpass(); + } + else + { + ALLOC_COMMAND(FRHICommandNextSubpass)(); + } + + // 增加Subpass计数. + IncrementSubpass(); + } + + // 增加subpass计数. + void IncrementSubpass() + { + PSOContext.SubpassIndex++; + } + + // 重置Subpass数据. + void ResetSubpass(ESubpassHint SubpassHint) + { + PSOContext.SubpassHint = SubpassHint; + PSOContext.SubpassIndex = 0; + } +}; +``` \ No newline at end of file diff --git a/03-UnrealEngine/Rendering/RenderingPipeline/向往渲染系列文章阅读笔记/剖析虚幻渲染体系(11)- RDG.md b/03-UnrealEngine/Rendering/RenderingPipeline/向往渲染系列文章阅读笔记/剖析虚幻渲染体系(11)- RDG.md new file mode 100644 index 0000000..1d55fc5 --- /dev/null +++ b/03-UnrealEngine/Rendering/RenderingPipeline/向往渲染系列文章阅读笔记/剖析虚幻渲染体系(11)- RDG.md @@ -0,0 +1,9 @@ +--- +title: 剖析虚幻渲染体系(11)- RDG +date: 2024-02-04 21:42:54 +excerpt: +tags: +rating: ⭐ +--- +# 前言 +https://www.cnblogs.com/timlly/p/15217090.html \ No newline at end of file diff --git a/03-UnrealEngine/性能优化/c++内存泄漏分析工具.md b/03-UnrealEngine/性能优化/c++内存泄漏分析工具.md new file mode 100644 index 0000000..2beafde --- /dev/null +++ b/03-UnrealEngine/性能优化/c++内存泄漏分析工具.md @@ -0,0 +1,8 @@ +--- +title: c++内存泄漏分析工具 +date: 2024-02-04 21:15:26 +excerpt: +tags: +rating: ⭐ +--- +https://github.com/dpull/tracing_malloc?tab=readme-ov-file \ No newline at end of file