diff --git a/examples/src/examples/graphics/render-to-texture.example.mjs b/examples/src/examples/graphics/render-to-texture.example.mjs index f8553004063..1a174af17a2 100644 --- a/examples/src/examples/graphics/render-to-texture.example.mjs +++ b/examples/src/examples/graphics/render-to-texture.example.mjs @@ -26,7 +26,16 @@ const assets = { }; const gfxOptions = { - deviceTypes: [deviceType] + deviceTypes: [deviceType], + + // Request the main back-buffer's MSAA color and depth attachments to be allocated as transient + // ("memoryless") attachments. On tile-based GPUs (mobile / Apple Silicon) this lets the driver + // keep their contents in fast on-chip memory and skip VRAM allocation entirely. This is valid + // here because the back-buffer is cleared each frame and never read back: there is no scene + // color grab (sceneColorMap) and no scene depth grab (sceneDepthMap) / depth prepass. These are + // WebGPU-only hints and are silently ignored on WebGL2 or where the feature is unsupported. + transientColor: true, + transientDepth: true }; const device = await pc.createGraphicsDevice(canvas, gfxOptions); @@ -151,7 +160,17 @@ assetListLoader.load(() => { colorBuffer: texture, depth: true, flipY: !app.graphicsDevice.isWebGPU, - samples: 2 + samples: 2, + + // Allocate this render target's MSAA color and depth attachments as transient + // ("memoryless") attachments (WebGPU only; ignored elsewhere). The multi-sampled color + // buffer is only ever resolved into the single-sampled `texture` we sample below - the MSAA + // buffer itself is never sampled, stored or reloaded - and the depth buffer is used only for + // in-pass depth testing and is never grabbed or resolved. Both therefore only need their + // contents within the render pass, so tile-based GPUs can keep them on-chip. Note that + // transientColor requires MSAA (samples > 1); it is a no-op for single-sampled color. + transientColor: true, + transientDepth: true }); // create a layer for object that do not render into texture, add it right after the world layer diff --git a/src/platform/graphics/graphics-device-create.js b/src/platform/graphics/graphics-device-create.js index 03802e6fcfe..3dfe4f70cf9 100644 --- a/src/platform/graphics/graphics-device-create.js +++ b/src/platform/graphics/graphics-device-create.js @@ -45,6 +45,19 @@ import { NullGraphicsDevice } from './null/null-graphics-device.js'; * - 'low-power': Prioritizes power saving over rendering performance. * * Defaults to 'default'. + * @param {boolean} [options.transientColor] - Boolean that requests the multi-sampled (MSAA) + * color attachment of the back-buffer to be allocated as a transient ("memoryless") attachment, + * allowing tile-based GPUs to keep its contents in on-chip memory and avoid VRAM allocation. + * WebGPU only, and only effective when anti-aliasing (MSAA) is enabled - it has no effect on + * single-sampled color, which is always presented. Ignored on devices without transient attachment + * support. Incompatible with a scene color grab pass (`sceneColorMap`): the attachment must be + * cleared on load and discarded on store. Defaults to false. + * @param {boolean} [options.transientDepth] - Boolean that requests the back-buffer depth + * attachment to be allocated as a transient ("memoryless") attachment (see `transientColor`). + * Applies to both single- and multi-sampled depth. WebGPU only; ignored on devices without + * transient attachment support. Incompatible with a scene depth grab pass (`sceneDepthMap`), a + * depth prepass, or any depth resolve, as the depth cannot be sampled or copied out. Defaults to + * false. * @returns {Promise} - Promise object representing the created graphics device. * @category Graphics */ diff --git a/src/platform/graphics/graphics-device.js b/src/platform/graphics/graphics-device.js index e3c33e810e7..8d45038a515 100644 --- a/src/platform/graphics/graphics-device.js +++ b/src/platform/graphics/graphics-device.js @@ -440,6 +440,18 @@ class GraphicsDevice extends EventHandler { */ supportsClipDistances = false; + /** + * True if the device supports transient ("memoryless") render target attachments (WebGPU only). + * When supported, attachments that are only used within a single render pass (cleared on load + * and discarded on store) can be allocated as memoryless, allowing tile-based GPUs to keep their + * contents in on-chip memory and avoid VRAM allocation. See the `transientColor` / + * `transientDepth` options of {@link RenderTarget} and {@link createGraphicsDevice}. + * + * @type {boolean} + * @readonly + */ + supportsTransientAttachments = false; + /** * True if the device supports WebGPU texture format tier 1 capabilities. When enabled, a wider * set of normalized texture formats can be used as render targets and storage textures. @@ -652,6 +664,8 @@ class GraphicsDevice extends EventHandler { this.initOptions.antialias ??= true; this.initOptions.powerPreference ??= 'high-performance'; this.initOptions.displayFormat ??= DISPLAYFORMAT_LDR; + this.initOptions.transientColor ??= false; + this.initOptions.transientDepth ??= false; // If WebXR is exposed, default to an XR-suitable GPU this.initOptions.xrCompatible ??= platform.browser && !!navigator.xr; diff --git a/src/platform/graphics/render-target.js b/src/platform/graphics/render-target.js index 7b193ba3137..be8aa9c370d 100644 --- a/src/platform/graphics/render-target.js +++ b/src/platform/graphics/render-target.js @@ -66,6 +66,18 @@ class RenderTarget { */ _samples; + /** + * @type {boolean} + * @private + */ + _transientColor; + + /** + * @type {boolean} + * @private + */ + _transientDepth; + /** @type {boolean} */ autoResolve; @@ -140,6 +152,19 @@ class RenderTarget { * @param {number} [options.samples] - Number of hardware anti-aliasing samples. Default is 1. * @param {boolean} [options.stencil] - If set to true, depth buffer will include stencil. * Defaults to false. Ignored if depthBuffer is defined or depth is false. + * @param {boolean} [options.transientColor] - If set to true, the multi-sampled (MSAA) color + * attachment is allocated as a transient ("memoryless") attachment, allowing tile-based GPUs to + * keep its contents in on-chip memory and avoid VRAM allocation. WebGPU only, and only effective + * when samples > 1 - it has no effect on single-sampled color (which is always stored). Ignored + * on devices without transient attachment support. The attachment must be cleared on load and + * discarded on store, so it is incompatible with a scene color grab pass (`sceneColorMap`). + * Defaults to false. + * @param {boolean} [options.transientDepth] - If set to true, the (engine-allocated) depth + * attachment is allocated as a transient ("memoryless") attachment (see `transientColor`). + * Applies to both single- and multi-sampled depth. WebGPU only; ignored on devices without + * transient attachment support, and ignored (with a warning) when an explicit `depthBuffer` is + * provided. Incompatible with a scene depth grab pass (`sceneDepthMap`), a depth prepass, or any + * depth resolve, as the depth cannot be sampled or copied out. Defaults to false. * @example * // Create a 512x512x24-bit render target with a depth buffer * const colorBuffer = new pc.Texture(graphicsDevice, { @@ -237,6 +262,20 @@ class RenderTarget { this.name = 'Untitled'; } + // transient (memoryless) attachments (WebGPU only). Gated on device support, so they are + // silently ignored when the device does not support transient attachments. Transient color + // additionally requires MSAA (single-sampled color is always stored), also silently ignored. + const transientSupported = !!this._device.supportsTransientAttachments; + this._transientColor = (options.transientColor ?? false) && transientSupported && this._samples > 1; + this._transientDepth = (options.transientDepth ?? false) && transientSupported && !this._depthBuffer; + + // transient depth applies to the engine-allocated depth buffer only. Requesting it together + // with a user-provided depthBuffer is invalid API usage (that buffer's contents must persist), + // so warn rather than silently ignore it - unlike the unsupported-device case above. + if ((options.transientDepth ?? false) && this._depthBuffer) { + Debug.warnOnce(`RenderTarget '${this.name}' was created with both transientDepth and a depthBuffer. Transient depth applies to the engine-allocated depth buffer only and cannot be used with a provided depthBuffer; the transientDepth flag is ignored.`); + } + // render image flipped in Y this.flipY = options.flipY ?? false; @@ -484,6 +523,26 @@ class RenderTarget { return this._samples; } + /** + * True if the multi-sampled color attachment is allocated as a transient ("memoryless") + * attachment (WebGPU only). See the `transientColor` constructor option. + * + * @type {boolean} + */ + get transientColor() { + return this._transientColor; + } + + /** + * True if the depth attachment is allocated as a transient ("memoryless") attachment (WebGPU + * only). See the `transientDepth` constructor option. + * + * @type {boolean} + */ + get transientDepth() { + return this._transientDepth; + } + /** * True if the render target contains the depth attachment. * diff --git a/src/platform/graphics/webgpu/webgpu-graphics-device.js b/src/platform/graphics/webgpu/webgpu-graphics-device.js index 0e6face1150..17ce5c59d11 100644 --- a/src/platform/graphics/webgpu/webgpu-graphics-device.js +++ b/src/platform/graphics/webgpu/webgpu-graphics-device.js @@ -462,6 +462,9 @@ class WebgpuGraphicsDevice extends GraphicsDevice { // HTML-in-Canvas support (copyElementImageToTexture) this.supportsHtmlTextures = typeof this.wgpu.queue?.copyElementImageToTexture === 'function'; + // transient (memoryless) attachment support (GPUTextureUsage.TRANSIENT_ATTACHMENT) + this.supportsTransientAttachments = typeof GPUTextureUsage !== 'undefined' && 'TRANSIENT_ATTACHMENT' in GPUTextureUsage; + // handle lost device this.wgpu.lost?.then(this.handleDeviceLost.bind(this)); @@ -580,12 +583,16 @@ class WebgpuGraphicsDevice extends GraphicsDevice { createBackbuffer() { this.supportsStencil = this.initOptions.stencil; + + // transient (memoryless) attachment requests - RenderTarget gates these on device support this.backBuffer = new RenderTarget({ name: 'WebgpuFramebuffer', graphicsDevice: this, depth: this.initOptions.depth, stencil: this.supportsStencil, - samples: this.samples + samples: this.samples, + transientColor: this.initOptions.transientColor, + transientDepth: this.initOptions.transientDepth }); this.backBuffer.impl.isBackbuffer = true; } @@ -1122,7 +1129,12 @@ class WebgpuGraphicsDevice extends GraphicsDevice { if (renderPass.samples > 1 && target.autoResolve) { const depthAttachment = target.impl.depthAttachment; const destTexture = target.depthBuffer.impl.gpuTexture; - if (depthAttachment && destTexture) { + + // a transient (memoryless) depth buffer cannot be sampled, so it cannot be the + // source of a shader-based depth resolve (it has no TEXTURE_BINDING usage) + if (depthAttachment?.transient) { + Debug.errorOnce(`Depth resolve is not possible on render target '${target.name}' because its depth is a transient (memoryless) attachment. Disable transientDepth to allow depth resolve.`); + } else if (depthAttachment && destTexture) { this.resolver.resolveDepth(this.commandEncoder, depthAttachment.multisampledDepthBuffer, destTexture); } } @@ -1496,10 +1508,20 @@ class WebgpuGraphicsDevice extends GraphicsDevice { // read from supplied render target, or from the framebuffer const sourceRT = source ? source : this.renderTarget; + + // a transient (memoryless) depth buffer cannot be sampled or copied out (it has neither + // TEXTURE_BINDING nor COPY_SRC), so a depth grab is not possible. Check the actual + // allocation state on the attachment rather than the requested RT flag. + if (sourceRT.impl.depthAttachment?.transient) { + Debug.errorOnce(`copyRenderTarget cannot copy depth from render target '${sourceRT.name}' because its depth is a transient (memoryless) attachment. Disable transientDepth to allow depth grab / copy.`); + DebugGraphics.popGpuMarker(this); + return false; + } + const sourceTexture = sourceRT.impl.depthAttachment.depthTexture; const sourceMipLevel = sourceRT.mipLevel; - if (source.samples > 1) { + if (sourceRT.samples > 1) { // resolve the depth to a color buffer of destination render target const destTexture = dest.colorBuffer.impl.gpuTexture; diff --git a/src/platform/graphics/webgpu/webgpu-render-target.js b/src/platform/graphics/webgpu/webgpu-render-target.js index 8278b9661a6..db06b41e6be 100644 --- a/src/platform/graphics/webgpu/webgpu-render-target.js +++ b/src/platform/graphics/webgpu/webgpu-render-target.js @@ -29,6 +29,14 @@ class ColorAttachment { */ multisampledBuffer; + /** + * True if the multi-sampled buffer is a transient ("memoryless") attachment, and so must be + * cleared on load and discarded on store. + * + * @type {boolean} + */ + transient = false; + destroy(device) { device.deferDestroy(this.multisampledBuffer); this.multisampledBuffer = null; @@ -74,6 +82,14 @@ class DepthAttachment { */ multisampledDepthBufferKey; + /** + * True if the depth attachment is a transient ("memoryless") attachment, and so must be + * cleared on load and discarded on store. + * + * @type {boolean} + */ + transient = false; + /** * @param {string} gpuFormat - The WebGPU format (GPUTextureFormat). */ @@ -311,7 +327,15 @@ class WebgpuRenderTarget { usage: GPUTextureUsage.RENDER_ATTACHMENT }; - if (samples > 1) { + // transient (memoryless) depth - keep the contents on-chip only, which precludes + // sampling (resolve) or copying (grab) it. The RT flag is already gated on device support. + const transientDepth = renderTarget.transientDepth; + + if (transientDepth) { + // memoryless attachment: RENDER_ATTACHMENT only, never sampled or copied + depthTextureDesc.usage |= GPUTextureUsage.TRANSIENT_ATTACHMENT; + this.depthAttachment.transient = true; + } else if (samples > 1) { // enable multi-sampled depth texture to be a source of our shader based resolver in WebgpuResolver // TODO: we do not always need to resolve it, and so might consider this flag to be optional depthTextureDesc.usage |= GPUTextureUsage.TEXTURE_BINDING; @@ -446,19 +470,27 @@ class WebgpuRenderTarget { (this.colorAttachments[index]?.format ?? device.backBufferViewFormat) : colorBuffer.impl.format; + // transient (memoryless) color - the multi-sampled buffer is only ever rendered to and + // resolved into the single-sampled target, never stored, sampled or copied, so it can be + // kept on-chip. The RT flag is already gated on device support and MSAA. + const transientColor = renderTarget.transientColor; + /** @type {GPUTextureDescriptor} */ const multisampledTextureDesc = { size: [width, height, 1], dimension: '2d', sampleCount: samples, format: format, - usage: GPUTextureUsage.RENDER_ATTACHMENT + usage: transientColor ? + GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.TRANSIENT_ATTACHMENT : + GPUTextureUsage.RENDER_ATTACHMENT }; // allocate multi-sampled color buffer const multisampledColorBuffer = wgpu.createTexture(multisampledTextureDesc); DebugHelper.setLabel(multisampledColorBuffer, `${renderTarget.name}.multisampledColor`); this.setColorAttachment(index, multisampledColorBuffer, multisampledTextureDesc.format); + this.colorAttachments[index].transient = transientColor; colorAttachment.view = multisampledColorBuffer.createView(); DebugHelper.setLabel(colorAttachment.view, `${renderTarget.name}.multisampledColorView`); @@ -491,6 +523,17 @@ class WebgpuRenderTarget { colorAttachment.clearValue = srgb ? colorOps.clearValueLinear : colorOps.clearValue; colorAttachment.loadOp = colorOps.clear ? 'clear' : 'load'; colorAttachment.storeOp = colorOps.store ? 'store' : 'discard'; + + // a transient (memoryless) attachment must be cleared on load and discarded on store. + // The frame-graph store-on-no-clear optimization can flip these post-authoring (e.g. a + // later pass reuses this target without clearing, or its contents are grabbed), which + // would be an invalid use of a transient texture - force compliant ops to avoid a + // WebGPU validation error (rendering may be incorrect, hence the error). + if (this.colorAttachments[i]?.transient && (colorAttachment.loadOp !== 'clear' || colorAttachment.storeOp !== 'discard')) { + Debug.errorOnce(`Transient (memoryless) color attachment on render target '${renderTarget.name}' requires loadOp 'clear' and storeOp 'discard', but resolved to loadOp '${colorAttachment.loadOp}' / storeOp '${colorAttachment.storeOp}'. This is usually caused by a later pass reusing this target without clearing, or by a color grab pass (sceneColorMap). Forcing clear/discard to avoid a validation error; rendering may be incorrect. Disable transientColor or stop reusing/grabbing this target.`); + colorAttachment.loadOp = 'clear'; + colorAttachment.storeOp = 'discard'; + } } const depthAttachment = this.renderPassDescriptor.depthStencilAttachment; @@ -506,6 +549,21 @@ class WebgpuRenderTarget { depthAttachment.stencilStoreOp = renderPass.depthStencilOps.storeStencil ? 'store' : 'discard'; depthAttachment.stencilReadOnly = false; } + + // transient (memoryless) depth must be cleared on load and discarded on store (see the + // color attachment note above) - force compliant ops to avoid a validation error. + if (this.depthAttachment.transient && + (depthAttachment.depthLoadOp !== 'clear' || depthAttachment.depthStoreOp !== 'discard' || + (this.depthAttachment.hasStencil && (depthAttachment.stencilLoadOp !== 'clear' || depthAttachment.stencilStoreOp !== 'discard')))) { + + Debug.errorOnce(`Transient (memoryless) depth attachment on render target '${renderTarget.name}' requires loadOp 'clear' and storeOp 'discard', but resolved to depth loadOp '${depthAttachment.depthLoadOp}' / storeOp '${depthAttachment.depthStoreOp}'. This is usually caused by a later pass reusing this target without clearing, or by a depth grab pass (sceneDepthMap) / depth prepass / depth resolve. Forcing clear/discard to avoid a validation error; rendering may be incorrect. Disable transientDepth or stop reusing/grabbing this target.`); + depthAttachment.depthLoadOp = 'clear'; + depthAttachment.depthStoreOp = 'discard'; + if (this.depthAttachment.hasStencil) { + depthAttachment.stencilLoadOp = 'clear'; + depthAttachment.stencilStoreOp = 'discard'; + } + } } }