diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h index 1468ddc5a87..00b407bf506 100644 --- a/drivers/metal/metal_objects.h +++ b/drivers/metal/metal_objects.h @@ -355,7 +355,7 @@ public: DIRTY_NONE = 0b0000'0000, DIRTY_PIPELINE = 0b0000'0001, //! pipeline state DIRTY_UNIFORMS = 0b0000'0010, //! uniform sets - DIRTY_DEPTH = 0b0000'0100, //! depth / stenci state + DIRTY_DEPTH = 0b0000'0100, //! depth / stencil state DIRTY_VERTEX = 0b0000'1000, //! vertex buffers DIRTY_VIEWPORT = 0b0001'0000, //! viewport rectangles DIRTY_SCISSOR = 0b0010'0000, //! scissor rectangles @@ -625,8 +625,11 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformSet { struct ShaderCacheEntry; enum class ShaderLoadStrategy { - DEFAULT, + IMMEDIATE, LAZY, + + /// The default strategy is to load the shader immediately. + DEFAULT = IMMEDIATE, }; /// A Metal shader library. diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm index f0c5961423c..286b91203e0 100644 --- a/drivers/metal/metal_objects.mm +++ b/drivers/metal/metal_objects.mm @@ -597,37 +597,40 @@ void MDCommandBuffer::_render_clear_render_area() { MDRenderPass const &pass = *render.pass; MDSubpass const &subpass = render.get_subpass(); - // First determine attachments that should be cleared. - LocalVector clears; - clears.reserve(subpass.color_references.size() + /* possible depth stencil clear */ 1); - - for (uint32_t i = 0; i < subpass.color_references.size(); i++) { - uint32_t idx = subpass.color_references[i].attachment; - if (idx != RDD::AttachmentReference::UNUSED && pass.attachments[idx].shouldClear(subpass, false)) { - clears.push_back({ .aspect = RDD::TEXTURE_ASPECT_COLOR_BIT, .color_attachment = idx, .value = render.clear_values[idx] }); - } - } uint32_t ds_index = subpass.depth_stencil_reference.attachment; - bool shouldClearDepth = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, false)); - bool shouldClearStencil = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, true)); - if (shouldClearDepth || shouldClearStencil) { - MDAttachment const &attachment = pass.attachments[ds_index]; - BitField bits = {}; - if (shouldClearDepth && attachment.type & MDAttachmentType::Depth) { - bits.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT); - } - if (shouldClearStencil && attachment.type & MDAttachmentType::Stencil) { - bits.set_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT); - } + bool clear_depth = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, false)); + bool clear_stencil = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, true)); - clears.push_back({ .aspect = bits, .color_attachment = ds_index, .value = render.clear_values[ds_index] }); - } - - if (clears.is_empty()) { + uint32_t color_count = subpass.color_references.size(); + uint32_t clear_count = color_count + (clear_depth || clear_stencil ? 1 : 0); + if (clear_count == 0) { return; } - render_clear_attachments(clears, { render.render_area }); + RDD::AttachmentClear *clears = ALLOCA_ARRAY(RDD::AttachmentClear, clear_count); + uint32_t clears_idx = 0; + + for (uint32_t i = 0; i < color_count; i++) { + uint32_t idx = subpass.color_references[i].attachment; + if (idx != RDD::AttachmentReference::UNUSED && pass.attachments[idx].shouldClear(subpass, false)) { + clears[clears_idx++] = { .aspect = RDD::TEXTURE_ASPECT_COLOR_BIT, .color_attachment = idx, .value = render.clear_values[idx] }; + } + } + + if (clear_depth || clear_stencil) { + MDAttachment const &attachment = pass.attachments[ds_index]; + BitField bits = {}; + if (clear_depth && attachment.type & MDAttachmentType::Depth) { + bits.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT); + } + if (clear_stencil && attachment.type & MDAttachmentType::Stencil) { + bits.set_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT); + } + + clears[clears_idx++] = { .aspect = bits, .color_attachment = ds_index, .value = render.clear_values[ds_index] }; + } + + render_clear_attachments(VectorView(clears, clear_count), { render.render_area }); } void MDCommandBuffer::render_next_subpass() { @@ -1448,9 +1451,9 @@ BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, idusage); } - BindingInfo *sbi = ui.bindings_secondary.getptr(stage); + const BindingInfo *sbi = ui.bindings_secondary.getptr(stage); if (sbi) { [enc setSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)]; } @@ -1510,7 +1513,7 @@ BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id obj = rid::get(uniform.ids[0]); [enc setTexture:obj atIndex:bi->index]; add_usage(obj, stage, bi->usage); - BindingInfo *sbi = ui.bindings_secondary.getptr(stage); + const BindingInfo *sbi = ui.bindings_secondary.getptr(stage); if (sbi) { id tex = obj.parentTexture ? obj.parentTexture : obj; id buf = tex.buffer; @@ -1976,7 +1979,7 @@ void ShaderCacheEntry::notify_free() const { options:(MTLCompileOptions *)options strategy:(ShaderLoadStrategy)strategy { switch (strategy) { - case ShaderLoadStrategy::DEFAULT: + case ShaderLoadStrategy::IMMEDIATE: [[fallthrough]]; default: return [[MDImmediateLibrary alloc] initWithCacheEntry:entry device:device source:source options:options]; diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 3d61c63c2db..c21b7a98475 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -2479,6 +2479,8 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect options.languageVersion = binary_data.get_msl_version(); HashMap libraries; + r_name = String(binary_data.shader_name.ptr()); + for (ShaderStageData &shader_data : binary_data.stages) { r_shader_desc.stages.push_back(shader_data.stage); @@ -2535,7 +2537,7 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect su.stages = (ShaderStage)(uint8_t)uniform.stages; uset.write[i] = su; - UniformInfo ui; + UniformInfo &ui = set.uniforms[i]; ui.binding = uniform.binding; ui.active_stages = uniform.active_stages; for (KeyValue &kv : uniform.bindings) { @@ -2544,7 +2546,6 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect for (KeyValue &kv : uniform.bindings_secondary) { ui.bindings_secondary.insert(kv.key, kv.value); } - set.uniforms[i] = ui; } } for (UniformSetData &uniform_set : binary_data.uniforms) { @@ -3550,17 +3551,22 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create( desc.alphaToCoverageEnabled = p_multisample_state.enable_alpha_to_coverage; desc.alphaToOneEnabled = p_multisample_state.enable_alpha_to_one; - // Depth stencil. - if (p_depth_stencil_state.enable_depth_test && desc.depthAttachmentPixelFormat != MTLPixelFormatInvalid) { - pipeline->raster_state.depth_test.enabled = true; + // Depth buffer. + bool depth_enabled = p_depth_stencil_state.enable_depth_test && desc.depthAttachmentPixelFormat != MTLPixelFormatInvalid; + bool stencil_enabled = p_depth_stencil_state.enable_stencil && desc.stencilAttachmentPixelFormat != MTLPixelFormatInvalid; + + if (depth_enabled || stencil_enabled) { MTLDepthStencilDescriptor *ds_desc = [MTLDepthStencilDescriptor new]; + + pipeline->raster_state.depth_test.enabled = depth_enabled; ds_desc.depthWriteEnabled = p_depth_stencil_state.enable_depth_write; ds_desc.depthCompareFunction = COMPARE_OPERATORS[p_depth_stencil_state.depth_compare_operator]; if (p_depth_stencil_state.enable_depth_range) { WARN_PRINT("unsupported: depth range"); } - if (p_depth_stencil_state.enable_stencil) { + if (stencil_enabled) { + pipeline->raster_state.stencil.enabled = true; pipeline->raster_state.stencil.front_reference = p_depth_stencil_state.front_op.reference; pipeline->raster_state.stencil.back_reference = p_depth_stencil_state.back_op.reference;