Merge pull request #99820 from stuartcarnie/issue_99682

Metal: Add support for 2017 era iOS devices
2024-12-19 20:00:13 -06:00
parent 8b0b38ffa7 952cd796ff
commit effea567a5
15 changed files with 1358 additions and 233 deletions
--- a/drivers/metal/metal_device_properties.h
+++ b/drivers/metal/metal_device_properties.h
@ -71,18 +71,19 @@ typedef NS_OPTIONS(NSUInteger, SampleCount) {
 };

 struct API_AVAILABLE(macos(11.0), ios(14.0)) MetalFeatures {
-	uint32_t mslVersion;
-	MTLGPUFamily highestFamily;
-	MTLLanguageVersion mslVersionEnum;
-	SampleCount supportedSampleCounts;
-	long hostMemoryPageSize;
-	bool layeredRendering;
-	bool multisampleLayeredRendering;
-	bool quadPermute; /**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */
-	bool simdPermute; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */
-	bool simdReduction; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */
-	bool tessellationShader; /**< If true, tessellation shaders are supported. */
-	bool imageCubeArray; /**< If true, image cube arrays are supported. */
+	uint32_t mslVersion = 0;
+	MTLGPUFamily highestFamily = MTLGPUFamilyApple4;
+	MTLLanguageVersion mslVersionEnum = MTLLanguageVersion1_2;
+	SampleCount supportedSampleCounts = SampleCount1;
+	long hostMemoryPageSize = 0;
+	bool layeredRendering = false;
+	bool multisampleLayeredRendering = false;
+	bool quadPermute = false; /**< If true, quadgroup permutation functions (vote, ballot, shuffle) are supported in shaders. */
+	bool simdPermute = false; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */
+	bool simdReduction = false; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */
+	bool tessellationShader = false; /**< If true, tessellation shaders are supported. */
+	bool imageCubeArray = false; /**< If true, image cube arrays are supported. */
+	MTLArgumentBuffersTier argument_buffers_tier = MTLArgumentBuffersTier1;
 };

 struct MetalLimits {
--- a/drivers/metal/metal_device_properties.mm
+++ b/drivers/metal/metal_device_properties.mm
@ -98,6 +98,7 @@ void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
 	features.quadPermute = [p_device supportsFamily:MTLGPUFamilyApple4];
 	features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6];
 	features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7];
+	features.argument_buffers_tier = p_device.argumentBuffersSupport;

 	MTLCompileOptions *opts = [MTLCompileOptions new];
 	features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version.
--- a/drivers/metal/metal_objects.h
+++ b/drivers/metal/metal_objects.h
@ -696,11 +696,12 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) MDShader {
 public:
 	CharString name;
 	Vector<UniformSet> sets;
+	bool uses_argument_buffers = true;

 	virtual void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) = 0;

-	MDShader(CharString p_name, Vector<UniformSet> p_sets) :
-			name(p_name), sets(p_sets) {}
+	MDShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers) :
+			name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {}
 	virtual ~MDShader() = default;
 };

@ -719,7 +720,7 @@ public:

 	void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;

-	MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_kernel);
+	MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel);
 };

 class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderShader final : public MDShader {
@ -746,8 +747,9 @@ public:
 	void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;

 	MDRenderShader(CharString p_name,
-			bool p_needs_view_mask_buffer,
 			Vector<UniformSet> p_sets,
+			bool p_needs_view_mask_buffer,
+			bool p_uses_argument_buffers,
 			MDLibrary *p_vert, MDLibrary *p_frag);
 };

@ -783,12 +785,21 @@ struct BoundUniformSet {
 };

 class API_AVAILABLE(macos(11.0), ios(14.0)) MDUniformSet {
+private:
+	void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state);
+	void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state);
+	void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state);
+	void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state);
+
 public:
 	uint32_t index;
 	LocalVector<RDD::BoundUniform> uniforms;
 	HashMap<MDShader *, BoundUniformSet> bound_uniforms;

-	BoundUniformSet &boundUniformSetForShader(MDShader *p_shader, id<MTLDevice> p_device);
+	void bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state);
+	void bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state);
+
+	BoundUniformSet &bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage);
 };

 class API_AVAILABLE(macos(11.0), ios(14.0)) MDPipeline {
--- a/drivers/metal/metal_objects.mm
+++ b/drivers/metal/metal_objects.mm
@ -249,7 +249,7 @@ void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear>
 	const MDSubpass &subpass = render.get_subpass();

 	uint32_t vertex_count = p_rects.size() * 6 * subpass.view_count;
-	simd::float4 vertices[vertex_count];
+	simd::float4 *vertices = ALLOCA_ARRAY(simd::float4, vertex_count);
 	simd::float4 clear_colors[ClearAttKey::ATTACHMENT_COUNT];

 	Size2i size = render.frameBuffer->size;
@ -362,7 +362,7 @@ void MDCommandBuffer::_render_set_dirty_state() {

 	if (render.dirty.has_flag(RenderState::DIRTY_SCISSOR) && !render.scissors.is_empty()) {
 		size_t len = render.scissors.size();
-		MTLScissorRect rects[len];
+		MTLScissorRect *rects = ALLOCA_ARRAY(MTLScissorRect, len);
 		for (size_t i = 0; i < len; i++) {
 			rects[i] = render.clip_to_render_area(render.scissors[i]);
 		}
@ -466,9 +466,7 @@ void MDCommandBuffer::_render_bind_uniform_sets() {
 	uint64_t set_uniforms = render.uniform_set_mask;
 	render.uniform_set_mask = 0;

-	id<MTLRenderCommandEncoder> enc = render.encoder;
 	MDRenderShader *shader = render.pipeline->shader;
-	id<MTLDevice> device = enc.device;

 	while (set_uniforms != 0) {
 		// Find the index of the next set bit.
@ -479,25 +477,7 @@ void MDCommandBuffer::_render_bind_uniform_sets() {
 		if (set == nullptr || set->index >= (uint32_t)shader->sets.size()) {
 			continue;
 		}
-		UniformSet const &set_info = shader->sets[set->index];
-
-		BoundUniformSet &bus = set->boundUniformSetForShader(shader, device);
-		bus.merge_into(render.resource_usage);
-
-		// Set the buffer for the vertex stage.
-		{
-			uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_VERTEX);
-			if (offset) {
-				[enc setVertexBuffer:bus.buffer offset:*offset atIndex:set->index];
-			}
-		}
-		// Set the buffer for the fragment stage.
-		{
-			uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_FRAGMENT);
-			if (offset) {
-				[enc setFragmentBuffer:bus.buffer offset:*offset atIndex:set->index];
-			}
-		}
+		set->bind_uniforms(shader, render);
 	}
 }

@ -968,54 +948,21 @@ void MDCommandBuffer::ComputeState::end_encoding() {
 void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
 	DEV_ASSERT(type == MDCommandBufferStateType::Compute);

-	id<MTLComputeCommandEncoder> enc = compute.encoder;
-	id<MTLDevice> device = enc.device;
-
 	MDShader *shader = (MDShader *)(p_shader.id);
-	UniformSet const &set_info = shader->sets[p_set_index];
-
 	MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
-	BoundUniformSet &bus = set->boundUniformSetForShader(shader, device);
-	bus.merge_into(compute.resource_usage);
-
-	uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE);
-	if (offset) {
-		[enc setBuffer:bus.buffer offset:*offset atIndex:p_set_index];
-	}
+	set->bind_uniforms(shader, compute);
 }

 void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
 	DEV_ASSERT(type == MDCommandBufferStateType::Compute);

-	id<MTLComputeCommandEncoder> enc = compute.encoder;
-	id<MTLDevice> device = enc.device;
-
 	MDShader *shader = (MDShader *)(p_shader.id);

-	thread_local LocalVector<__unsafe_unretained id<MTLBuffer>> buffers;
-	thread_local LocalVector<NSUInteger> offsets;
-
-	buffers.resize(p_set_count);
-	offsets.resize(p_set_count);
-
+	// TODO(sgc): Bind multiple buffers using [encoder setBuffers:offsets:withRange:]
 	for (size_t i = 0u; i < p_set_count; ++i) {
-		UniformSet const &set_info = shader->sets[p_first_set_index + i];
-
 		MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
-		BoundUniformSet &bus = set->boundUniformSetForShader(shader, device);
-		bus.merge_into(compute.resource_usage);
-
-		uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE);
-		if (offset) {
-			buffers[i] = bus.buffer;
-			offsets[i] = *offset;
-		} else {
-			buffers[i] = nullptr;
-			offsets[i] = 0u;
-		}
+		set->bind_uniforms(shader, compute);
 	}
-
-	[enc setBuffers:buffers.ptr() offsets:offsets.ptr() withRange:NSMakeRange(p_first_set_index, p_set_count)];
 }

 void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
@ -1052,8 +999,11 @@ void MDCommandBuffer::_end_blit() {
 	type = MDCommandBufferStateType::None;
 }

-MDComputeShader::MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_kernel) :
-		MDShader(p_name, p_sets), kernel(p_kernel) {
+MDComputeShader::MDComputeShader(CharString p_name,
+		Vector<UniformSet> p_sets,
+		bool p_uses_argument_buffers,
+		MDLibrary *p_kernel) :
+		MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(p_kernel) {
 }

 void MDComputeShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
@ -1071,15 +1021,19 @@ void MDComputeShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDC
 }

 MDRenderShader::MDRenderShader(CharString p_name,
-		bool p_needs_view_mask_buffer,
 		Vector<UniformSet> p_sets,
+		bool p_needs_view_mask_buffer,
+		bool p_uses_argument_buffers,
 		MDLibrary *_Nonnull p_vert, MDLibrary *_Nonnull p_frag) :
-		MDShader(p_name, p_sets), needs_view_mask_buffer(p_needs_view_mask_buffer), vert(p_vert), frag(p_frag) {
+		MDShader(p_name, p_sets, p_uses_argument_buffers),
+		needs_view_mask_buffer(p_needs_view_mask_buffer),
+		vert(p_vert),
+		frag(p_frag) {
 }

 void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
 	DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Render);
-	id<MTLRenderCommandEncoder> enc = p_cb->render.encoder;
+	id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_cb->render.encoder;

 	void const *ptr = p_data.ptr();
 	size_t length = p_data.size() * sizeof(uint32_t);
@ -1093,9 +1047,373 @@ void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCo
 	}
 }

-BoundUniformSet &MDUniformSet::boundUniformSetForShader(MDShader *p_shader, id<MTLDevice> p_device) {
+void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) {
+	DEV_ASSERT(p_shader->uses_argument_buffers);
+	DEV_ASSERT(p_state.encoder != nil);
+
+	UniformSet const &set_info = p_shader->sets[index];
+
+	id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_state.encoder;
+	id<MTLDevice> __unsafe_unretained device = enc.device;
+
+	BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage);
+
+	// Set the buffer for the vertex stage.
+	{
+		uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_VERTEX);
+		if (offset) {
+			[enc setVertexBuffer:bus.buffer offset:*offset atIndex:index];
+		}
+	}
+	// Set the buffer for the fragment stage.
+	{
+		uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_FRAGMENT);
+		if (offset) {
+			[enc setFragmentBuffer:bus.buffer offset:*offset atIndex:index];
+		}
+	}
+}
+
+void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) {
+	DEV_ASSERT(!p_shader->uses_argument_buffers);
+	DEV_ASSERT(p_state.encoder != nil);
+
+	id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_state.encoder;
+
+	UniformSet const &set = p_shader->sets[index];
+
+	for (uint32_t i = 0; i < uniforms.size(); i++) {
+		RDD::BoundUniform const &uniform = uniforms[i];
+		UniformInfo ui = set.uniforms[i];
+
+		static const RDC::ShaderStage stage_usages[2] = { RDC::ShaderStage::SHADER_STAGE_VERTEX, RDC::ShaderStage::SHADER_STAGE_FRAGMENT };
+		for (const RDC::ShaderStage stage : stage_usages) {
+			ShaderStageUsage const stage_usage = ShaderStageUsage(1 << stage);
+
+			BindingInfo *bi = ui.bindings.getptr(stage);
+			if (bi == nullptr) {
+				// No binding for this stage.
+				continue;
+			}
+
+			if ((ui.active_stages & stage_usage) == 0) {
+				// Not active for this state, so don't bind anything.
+				continue;
+			}
+
+			switch (uniform.type) {
+				case RDD::UNIFORM_TYPE_SAMPLER: {
+					size_t count = uniform.ids.size();
+					id<MTLSamplerState> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count);
+					for (size_t j = 0; j < count; j += 1) {
+						objects[j] = rid::get(uniform.ids[j].id);
+					}
+					if (stage == RDD::SHADER_STAGE_VERTEX) {
+						[enc setVertexSamplerStates:objects withRange:NSMakeRange(bi->index, count)];
+					} else {
+						[enc setFragmentSamplerStates:objects withRange:NSMakeRange(bi->index, count)];
+					}
+				} break;
+				case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
+					size_t count = uniform.ids.size() / 2;
+					id<MTLTexture> __unsafe_unretained *textures = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+					id<MTLSamplerState> __unsafe_unretained *samplers = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count);
+					for (uint32_t j = 0; j < count; j += 1) {
+						id<MTLSamplerState> sampler = rid::get(uniform.ids[j * 2 + 0]);
+						id<MTLTexture> texture = rid::get(uniform.ids[j * 2 + 1]);
+						samplers[j] = sampler;
+						textures[j] = texture;
+					}
+					BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
+					if (sbi) {
+						if (stage == RDD::SHADER_STAGE_VERTEX) {
+							[enc setVertexSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)];
+						} else {
+							[enc setFragmentSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)];
+						}
+					}
+					if (stage == RDD::SHADER_STAGE_VERTEX) {
+						[enc setVertexTextures:textures withRange:NSMakeRange(bi->index, count)];
+					} else {
+						[enc setFragmentTextures:textures withRange:NSMakeRange(bi->index, count)];
+					}
+				} break;
+				case RDD::UNIFORM_TYPE_TEXTURE: {
+					size_t count = uniform.ids.size();
+					if (count == 1) {
+						id<MTLTexture> obj = rid::get(uniform.ids[0]);
+						if (stage == RDD::SHADER_STAGE_VERTEX) {
+							[enc setVertexTexture:obj atIndex:bi->index];
+						} else {
+							[enc setFragmentTexture:obj atIndex:bi->index];
+						}
+					} else {
+						id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+						for (size_t j = 0; j < count; j += 1) {
+							id<MTLTexture> obj = rid::get(uniform.ids[j]);
+							objects[j] = obj;
+						}
+						if (stage == RDD::SHADER_STAGE_VERTEX) {
+							[enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)];
+						} else {
+							[enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)];
+						}
+					}
+				} break;
+				case RDD::UNIFORM_TYPE_IMAGE: {
+					size_t count = uniform.ids.size();
+					if (count == 1) {
+						id<MTLTexture> obj = rid::get(uniform.ids[0]);
+						if (stage == RDD::SHADER_STAGE_VERTEX) {
+							[enc setVertexTexture:obj atIndex:bi->index];
+						} else {
+							[enc setFragmentTexture:obj atIndex:bi->index];
+						}
+
+						BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
+						if (sbi) {
+							id<MTLTexture> tex = obj.parentTexture ? obj.parentTexture : obj;
+							id<MTLBuffer> buf = tex.buffer;
+							if (buf) {
+								if (stage == RDD::SHADER_STAGE_VERTEX) {
+									[enc setVertexBuffer:buf offset:tex.bufferOffset atIndex:sbi->index];
+								} else {
+									[enc setFragmentBuffer:buf offset:tex.bufferOffset atIndex:sbi->index];
+								}
+							}
+						}
+					} else {
+						id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+						for (size_t j = 0; j < count; j += 1) {
+							id<MTLTexture> obj = rid::get(uniform.ids[j]);
+							objects[j] = obj;
+						}
+						if (stage == RDD::SHADER_STAGE_VERTEX) {
+							[enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)];
+						} else {
+							[enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)];
+						}
+					}
+				} break;
+				case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: {
+					ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER");
+				} break;
+				case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
+					ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER");
+				} break;
+				case RDD::UNIFORM_TYPE_IMAGE_BUFFER: {
+					CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");
+				} break;
+				case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: {
+					id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
+					if (stage == RDD::SHADER_STAGE_VERTEX) {
+						[enc setVertexBuffer:buffer offset:0 atIndex:bi->index];
+					} else {
+						[enc setFragmentBuffer:buffer offset:0 atIndex:bi->index];
+					}
+				} break;
+				case RDD::UNIFORM_TYPE_STORAGE_BUFFER: {
+					id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
+					if (stage == RDD::SHADER_STAGE_VERTEX) {
+						[enc setVertexBuffer:buffer offset:0 atIndex:bi->index];
+					} else {
+						[enc setFragmentBuffer:buffer offset:0 atIndex:bi->index];
+					}
+				} break;
+				case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: {
+					size_t count = uniform.ids.size();
+					if (count == 1) {
+						id<MTLTexture> obj = rid::get(uniform.ids[0]);
+						if (stage == RDD::SHADER_STAGE_VERTEX) {
+							[enc setVertexTexture:obj atIndex:bi->index];
+						} else {
+							[enc setFragmentTexture:obj atIndex:bi->index];
+						}
+					} else {
+						id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+						for (size_t j = 0; j < count; j += 1) {
+							id<MTLTexture> obj = rid::get(uniform.ids[j]);
+							objects[j] = obj;
+						}
+
+						if (stage == RDD::SHADER_STAGE_VERTEX) {
+							[enc setVertexTextures:objects withRange:NSMakeRange(bi->index, count)];
+						} else {
+							[enc setFragmentTextures:objects withRange:NSMakeRange(bi->index, count)];
+						}
+					}
+				} break;
+				default: {
+					DEV_ASSERT(false);
+				}
+			}
+		}
+	}
+}
+
+void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state) {
+	if (p_shader->uses_argument_buffers) {
+		bind_uniforms_argument_buffers(p_shader, p_state);
+	} else {
+		bind_uniforms_direct(p_shader, p_state);
+	}
+}
+
+void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) {
+	DEV_ASSERT(p_shader->uses_argument_buffers);
+	DEV_ASSERT(p_state.encoder != nil);
+
+	UniformSet const &set_info = p_shader->sets[index];
+
+	id<MTLComputeCommandEncoder> enc = p_state.encoder;
+	id<MTLDevice> device = enc.device;
+
+	BoundUniformSet &bus = bound_uniform_set(p_shader, device, p_state.resource_usage);
+
+	uint32_t const *offset = set_info.offsets.getptr(RDD::SHADER_STAGE_COMPUTE);
+	if (offset) {
+		[enc setBuffer:bus.buffer offset:*offset atIndex:index];
+	}
+}
+
+void MDUniformSet::bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) {
+	DEV_ASSERT(!p_shader->uses_argument_buffers);
+	DEV_ASSERT(p_state.encoder != nil);
+
+	id<MTLComputeCommandEncoder> __unsafe_unretained enc = p_state.encoder;
+
+	UniformSet const &set = p_shader->sets[index];
+
+	for (uint32_t i = 0; i < uniforms.size(); i++) {
+		RDD::BoundUniform const &uniform = uniforms[i];
+		UniformInfo ui = set.uniforms[i];
+
+		const RDC::ShaderStage stage = RDC::ShaderStage::SHADER_STAGE_COMPUTE;
+		const ShaderStageUsage stage_usage = ShaderStageUsage(1 << stage);
+
+		BindingInfo *bi = ui.bindings.getptr(stage);
+		if (bi == nullptr) {
+			// No binding for this stage.
+			continue;
+		}
+
+		if ((ui.active_stages & stage_usage) == 0) {
+			// Not active for this state, so don't bind anything.
+			continue;
+		}
+
+		switch (uniform.type) {
+			case RDD::UNIFORM_TYPE_SAMPLER: {
+				size_t count = uniform.ids.size();
+				id<MTLSamplerState> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count);
+				for (size_t j = 0; j < count; j += 1) {
+					objects[j] = rid::get(uniform.ids[j].id);
+				}
+				[enc setSamplerStates:objects withRange:NSMakeRange(bi->index, count)];
+			} break;
+			case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
+				size_t count = uniform.ids.size() / 2;
+				id<MTLTexture> __unsafe_unretained *textures = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+				id<MTLSamplerState> __unsafe_unretained *samplers = ALLOCA_ARRAY(id<MTLSamplerState> __unsafe_unretained, count);
+				for (uint32_t j = 0; j < count; j += 1) {
+					id<MTLSamplerState> sampler = rid::get(uniform.ids[j * 2 + 0]);
+					id<MTLTexture> texture = rid::get(uniform.ids[j * 2 + 1]);
+					samplers[j] = sampler;
+					textures[j] = texture;
+				}
+				BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
+				if (sbi) {
+					[enc setSamplerStates:samplers withRange:NSMakeRange(sbi->index, count)];
+				}
+				[enc setTextures:textures withRange:NSMakeRange(bi->index, count)];
+			} break;
+			case RDD::UNIFORM_TYPE_TEXTURE: {
+				size_t count = uniform.ids.size();
+				if (count == 1) {
+					id<MTLTexture> obj = rid::get(uniform.ids[0]);
+					[enc setTexture:obj atIndex:bi->index];
+				} else {
+					id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+					for (size_t j = 0; j < count; j += 1) {
+						id<MTLTexture> obj = rid::get(uniform.ids[j]);
+						objects[j] = obj;
+					}
+					[enc setTextures:objects withRange:NSMakeRange(bi->index, count)];
+				}
+			} break;
+			case RDD::UNIFORM_TYPE_IMAGE: {
+				size_t count = uniform.ids.size();
+				if (count == 1) {
+					id<MTLTexture> obj = rid::get(uniform.ids[0]);
+					[enc setTexture:obj atIndex:bi->index];
+
+					BindingInfo *sbi = ui.bindings_secondary.getptr(stage);
+					if (sbi) {
+						id<MTLTexture> tex = obj.parentTexture ? obj.parentTexture : obj;
+						id<MTLBuffer> buf = tex.buffer;
+						if (buf) {
+							[enc setBuffer:buf offset:tex.bufferOffset atIndex:sbi->index];
+						}
+					}
+				} else {
+					id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+					for (size_t j = 0; j < count; j += 1) {
+						id<MTLTexture> obj = rid::get(uniform.ids[j]);
+						objects[j] = obj;
+					}
+					[enc setTextures:objects withRange:NSMakeRange(bi->index, count)];
+				}
+			} break;
+			case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: {
+				ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER");
+			} break;
+			case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
+				ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER");
+			} break;
+			case RDD::UNIFORM_TYPE_IMAGE_BUFFER: {
+				CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");
+			} break;
+			case RDD::UNIFORM_TYPE_UNIFORM_BUFFER: {
+				id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
+				[enc setBuffer:buffer offset:0 atIndex:bi->index];
+			} break;
+			case RDD::UNIFORM_TYPE_STORAGE_BUFFER: {
+				id<MTLBuffer> buffer = rid::get(uniform.ids[0]);
+				[enc setBuffer:buffer offset:0 atIndex:bi->index];
+			} break;
+			case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: {
+				size_t count = uniform.ids.size();
+				if (count == 1) {
+					id<MTLTexture> obj = rid::get(uniform.ids[0]);
+					[enc setTexture:obj atIndex:bi->index];
+				} else {
+					id<MTLTexture> __unsafe_unretained *objects = ALLOCA_ARRAY(id<MTLTexture> __unsafe_unretained, count);
+					for (size_t j = 0; j < count; j += 1) {
+						id<MTLTexture> obj = rid::get(uniform.ids[j]);
+						objects[j] = obj;
+					}
+					[enc setTextures:objects withRange:NSMakeRange(bi->index, count)];
+				}
+			} break;
+			default: {
+				DEV_ASSERT(false);
+			}
+		}
+	}
+}
+
+void MDUniformSet::bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state) {
+	if (p_shader->uses_argument_buffers) {
+		bind_uniforms_argument_buffers(p_shader, p_state);
+	} else {
+		bind_uniforms_direct(p_shader, p_state);
+	}
+}
+
+BoundUniformSet &MDUniformSet::bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage) {
 	BoundUniformSet *sus = bound_uniforms.getptr(p_shader);
 	if (sus != nullptr) {
+		sus->merge_into(p_resource_usage);
 		return *sus;
 	}

@ -1261,6 +1579,7 @@ BoundUniformSet &MDUniformSet::boundUniformSetForShader(MDShader *p_shader, id<M

 	BoundUniformSet bs = { .buffer = enc_buffer, .usage_to_resources = usage_to_resources };
 	bound_uniforms.insert(p_shader, bs);
+	bs.merge_into(p_resource_usage);
 	return bound_uniforms.get(p_shader);
 }

--- a/drivers/metal/rendering_device_driver_metal.h
+++ b/drivers/metal/rendering_device_driver_metal.h
@ -61,7 +61,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0)) RenderingDeviceDriverMetal : public

 	uint32_t version_major = 2;
 	uint32_t version_minor = 0;
-	MetalDeviceProperties *metal_device_properties = nullptr;
+	MetalDeviceProperties *device_properties = nullptr;
 	PixelFormats *pixel_formats = nullptr;
 	std::unique_ptr<MDResourceCache> resource_cache;

@ -431,10 +431,10 @@ public:
 	id<MTLDevice> get_device() const { return device; }
 	PixelFormats &get_pixel_formats() const { return *pixel_formats; }
 	MDResourceCache &get_resource_cache() const { return *resource_cache; }
-	MetalDeviceProperties const &get_device_properties() const { return *metal_device_properties; }
+	MetalDeviceProperties const &get_device_properties() const { return *device_properties; }

 	_FORCE_INLINE_ uint32_t get_metal_buffer_index_for_vertex_attribute_binding(uint32_t p_binding) {
-		return (metal_device_properties->limits.maxPerStageBufferCount - 1) - p_binding;
+		return (device_properties->limits.maxPerStageBufferCount - 1) - p_binding;
 	}

 	size_t get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const;
--- a/drivers/metal/rendering_device_driver_metal.mm
+++ b/drivers/metal/rendering_device_driver_metal.mm
@ -218,7 +218,7 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
 	// desc.compressionType = MTLTextureCompressionTypeLossy;

 	if (p_format.samples > TEXTURE_SAMPLES_1) {
-		SampleCount supported = (*metal_device_properties).find_nearest_supported_sample_count(p_format.samples);
+		SampleCount supported = (*device_properties).find_nearest_supported_sample_count(p_format.samples);

 		if (supported > SampleCount1) {
 			bool ok = p_format.texture_type == TEXTURE_TYPE_2D || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY;
@ -277,7 +277,7 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
 	// Usage.

 	MTLResourceOptions options = 0;
-	const bool supports_memoryless = (*metal_device_properties).features.highestFamily >= MTLGPUFamilyApple2 && (*metal_device_properties).features.highestFamily < MTLGPUFamilyMac1;
+	const bool supports_memoryless = (*device_properties).features.highestFamily >= MTLGPUFamilyApple2 && (*device_properties).features.highestFamily < MTLGPUFamilyMac1;
 	if (supports_memoryless && p_format.usage_bits & TEXTURE_USAGE_TRANSIENT_BIT) {
 		options = MTLResourceStorageModeMemoryless | MTLResourceHazardTrackingModeTracked;
 		desc.storageMode = MTLStorageModeMemoryless;
@ -1058,7 +1058,7 @@ void RenderingDeviceDriverMetal::framebuffer_free(FramebufferID p_framebuffer) {

 #pragma mark - Shader

-const uint32_t SHADER_BINARY_VERSION = 3;
+const uint32_t SHADER_BINARY_VERSION = 4;

 // region Serialization

@ -1503,6 +1503,9 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) UniformSetData {
 		p_reader.read(index);
 		p_reader.read(uniforms);
 	}
+	UniformSetData() = default;
+	UniformSetData(uint32_t p_index) :
+			index(p_index) {}
 };

 struct PushConstantData {
@ -1536,6 +1539,11 @@ struct PushConstantData {
 };

 struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
+	enum Flags : uint32_t {
+		NONE = 0,
+		NEEDS_VIEW_MASK_BUFFER = 1 << 0,
+		USES_ARGUMENT_BUFFERS = 1 << 1,
+	};
 	CharString shader_name;
 	// The Metal language version specified when compiling SPIR-V to MSL.
 	// Format is major * 10000 + minor * 100 + patch.
@ -1543,8 +1551,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
 	uint32_t vertex_input_mask = UINT32_MAX;
 	uint32_t fragment_output_mask = UINT32_MAX;
 	uint32_t spirv_specialization_constants_ids_mask = UINT32_MAX;
-	uint32_t is_compute = UINT32_MAX;
-	uint32_t needs_view_mask_buffer = UINT32_MAX;
+	uint32_t flags = NONE;
 	ComputeSize compute_local_size;
 	PushConstantData push_constant;
 	LocalVector<ShaderStageData> stages;
@ -1557,17 +1564,46 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
 		return MTLLanguageVersion((major << 0x10) + minor);
 	}

+	bool is_compute() const {
+		return std::any_of(stages.begin(), stages.end(), [](ShaderStageData const &e) {
+			return e.stage == RD::ShaderStage::SHADER_STAGE_COMPUTE;
+		});
+	}
+
+	bool needs_view_mask_buffer() const {
+		return flags & NEEDS_VIEW_MASK_BUFFER;
+	}
+
+	void set_needs_view_mask_buffer(bool p_value) {
+		if (p_value) {
+			flags |= NEEDS_VIEW_MASK_BUFFER;
+		} else {
+			flags &= ~NEEDS_VIEW_MASK_BUFFER;
+		}
+	}
+
+	bool uses_argument_buffers() const {
+		return flags & USES_ARGUMENT_BUFFERS;
+	}
+
+	void set_uses_argument_buffers(bool p_value) {
+		if (p_value) {
+			flags |= USES_ARGUMENT_BUFFERS;
+		} else {
+			flags &= ~USES_ARGUMENT_BUFFERS;
+		}
+	}
+
 	size_t serialize_size() const {
 		size_t size = 0;
 		size += sizeof(uint32_t) + shader_name.length(); // shader_name
-		size += sizeof(uint32_t); // msl_version
-		size += sizeof(uint32_t); // vertex_input_mask
-		size += sizeof(uint32_t); // fragment_output_mask
-		size += sizeof(uint32_t); // spirv_specialization_constants_ids_mask
-		size += sizeof(uint32_t); // is_compute
-		size += sizeof(uint32_t); // needs_view_mask_buffer
-		size += compute_local_size.serialize_size(); // compute_local_size
-		size += push_constant.serialize_size(); // push_constant
+		size += sizeof(msl_version);
+		size += sizeof(vertex_input_mask);
+		size += sizeof(fragment_output_mask);
+		size += sizeof(spirv_specialization_constants_ids_mask);
+		size += sizeof(flags);
+		size += compute_local_size.serialize_size();
+		size += push_constant.serialize_size();
 		size += sizeof(uint32_t); // stages.size()
 		for (ShaderStageData const &e : stages) {
 			size += e.serialize_size();
@ -1589,8 +1625,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
 		p_writer.write(vertex_input_mask);
 		p_writer.write(fragment_output_mask);
 		p_writer.write(spirv_specialization_constants_ids_mask);
-		p_writer.write(is_compute);
-		p_writer.write(needs_view_mask_buffer);
+		p_writer.write(flags);
 		p_writer.write(compute_local_size);
 		p_writer.write(push_constant);
 		p_writer.write(VectorView(stages));
@ -1604,8 +1639,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
 		p_reader.read(vertex_input_mask);
 		p_reader.read(fragment_output_mask);
 		p_reader.read(spirv_specialization_constants_ids_mask);
-		p_reader.read(is_compute);
-		p_reader.read(needs_view_mask_buffer);
+		p_reader.read(flags);
 		p_reader.read(compute_local_size);
 		p_reader.read(push_constant);
 		p_reader.read(stages);
@ -1952,14 +1986,13 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
 		.y = spirv_data.compute_local_size[1],
 		.z = spirv_data.compute_local_size[2],
 	};
-	bin_data.is_compute = spirv_data.is_compute;
 	bin_data.push_constant.size = spirv_data.push_constant_size;
 	bin_data.push_constant.stages = (ShaderStageUsage)(uint8_t)spirv_data.push_constant_stages;
-	bin_data.needs_view_mask_buffer = shader_meta.has_multiview ? 1 : 0;
+	bin_data.set_needs_view_mask_buffer(shader_meta.has_multiview);

 	for (uint32_t i = 0; i < spirv_data.uniform_sets.size(); i++) {
 		const ::Vector<ShaderUniform> &spirv_set = spirv_data.uniform_sets[i];
-		UniformSetData set{ .index = i };
+		UniformSetData set(i);
 		for (const ShaderUniform &spirv_uniform : spirv_set) {
 			UniformData binding{};
 			binding.type = spirv_uniform.type;
@ -1999,10 +2032,25 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
 #endif

 #if TARGET_OS_IOS
-	msl_options.ios_use_simdgroup_functions = (*metal_device_properties).features.simdPermute;
+	msl_options.ios_use_simdgroup_functions = (*device_properties).features.simdPermute;
 #endif

-	msl_options.argument_buffers = true;
+	bool disable_argument_buffers = false;
+	if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") {
+		disable_argument_buffers = true;
+	}
+
+	if (device_properties->features.argument_buffers_tier >= MTLArgumentBuffersTier2 && !disable_argument_buffers) {
+		msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2;
+		msl_options.argument_buffers = true;
+		bin_data.set_uses_argument_buffers(true);
+	} else {
+		msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier1;
+		// Tier 1 argument buffers don't support writable textures, so we disable them completely.
+		msl_options.argument_buffers = false;
+		bin_data.set_uses_argument_buffers(false);
+	}
+
 	msl_options.force_active_argument_buffer_resources = true; // Same as MoltenVK when using argument buffers.
 	// msl_options.pad_argument_buffer_resources = true; // Same as MoltenVK when using argument buffers.
 	msl_options.texture_buffer_native = true; // Enable texture buffer support.
@ -2042,7 +2090,12 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
 		std::unordered_set<VariableID> active = compiler.get_active_interface_variables();
 		ShaderResources resources = compiler.get_shader_resources();

-		std::string source = compiler.compile();
+		std::string source;
+		try {
+			source = compiler.compile();
+		} catch (CompilerError &e) {
+			ERR_FAIL_V_MSG(Result(), "Failed to compile stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what());
+		}

 		ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, Result(), "Expected a single entry point and stage.");

@ -2088,8 +2141,8 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
 			return res;
 		};

-		auto descriptor_bindings = [&compiler, &active, &uniform_sets, stage, &get_decoration](SmallVector<Resource> &resources, Writable writable) {
-			for (Resource const &res : resources) {
+		auto descriptor_bindings = [&compiler, &active, &uniform_sets, stage, &get_decoration](SmallVector<Resource> &p_resources, Writable p_writable) {
+			for (Resource const &res : p_resources) {
 				uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet);
 				uint32_t dbin = get_decoration(res.id, spv::DecorationBinding);
 				UniformData *found = nullptr;
@ -2195,7 +2248,7 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
 				}

 				// Update writable.
-				if (writable == Writable::Maybe) {
+				if (p_writable == Writable::Maybe) {
 					if (basetype == BT::Struct) {
 						Bitset flags = compiler.get_buffer_block_flags(res.id);
 						if (!flags.get(spv::DecorationNonWritable)) {
@ -2384,6 +2437,11 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect
 			ERR_FAIL_V_MSG(ShaderID(), "Unexpected end of buffer");
 	}

+	// We need to regenerate the shader if the cache is moved to an incompatible device.
+	ERR_FAIL_COND_V_MSG(device_properties->features.argument_buffers_tier < MTLArgumentBuffersTier2 && binary_data.uses_argument_buffers(),
+			ShaderID(),
+			"Shader was generated with argument buffers, but device has limited support");
+
 	MTLCompileOptions *options = [MTLCompileOptions new];
 	options.languageVersion = binary_data.get_msl_version();
 	HashMap<ShaderStage, MDLibrary *> libraries;
@ -2505,8 +2563,12 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect
 	}

 	MDShader *shader = nullptr;
-	if (binary_data.is_compute) {
-		MDComputeShader *cs = new MDComputeShader(binary_data.shader_name, uniform_sets, libraries[ShaderStage::SHADER_STAGE_COMPUTE]);
+	if (binary_data.is_compute()) {
+		MDComputeShader *cs = new MDComputeShader(
+				binary_data.shader_name,
+				uniform_sets,
+				binary_data.uses_argument_buffers(),
+				libraries[ShaderStage::SHADER_STAGE_COMPUTE]);

 		uint32_t *binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_COMPUTE);
 		if (binding) {
@ -2520,7 +2582,13 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect
 #endif
 		shader = cs;
 	} else {
-		MDRenderShader *rs = new MDRenderShader(binary_data.shader_name, (bool)binary_data.needs_view_mask_buffer, uniform_sets, libraries[ShaderStage::SHADER_STAGE_VERTEX], libraries[ShaderStage::SHADER_STAGE_FRAGMENT]);
+		MDRenderShader *rs = new MDRenderShader(
+				binary_data.shader_name,
+				uniform_sets,
+				binary_data.needs_view_mask_buffer(),
+				binary_data.uses_argument_buffers(),
+				libraries[ShaderStage::SHADER_STAGE_VERTEX],
+				libraries[ShaderStage::SHADER_STAGE_FRAGMENT]);

 		uint32_t *vert_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_VERTEX);
 		if (vert_binding) {
@ -2547,7 +2615,7 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect

 	r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask;
 	r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask;
-	r_shader_desc.is_compute = binary_data.is_compute;
+	r_shader_desc.is_compute = binary_data.is_compute();
 	r_shader_desc.compute_local_size[0] = binary_data.compute_local_size.x;
 	r_shader_desc.compute_local_size[1] = binary_data.compute_local_size.y;
 	r_shader_desc.compute_local_size[2] = binary_data.compute_local_size.z;
@ -2572,7 +2640,7 @@ void RenderingDeviceDriverMetal::shader_destroy_modules(ShaderID p_shader) {
 RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) {
 	//p_linear_pool_index = -1; // TODO:? Linear pools not implemented or not supported by API backend.

-	MDUniformSet *set = new MDUniformSet();
+	MDUniformSet *set = memnew(MDUniformSet);
 	Vector<BoundUniform> bound_uniforms;
 	bound_uniforms.resize(p_uniforms.size());
 	for (uint32_t i = 0; i < p_uniforms.size(); i += 1) {
@ -2586,7 +2654,7 @@ RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView<Boun

 void RenderingDeviceDriverMetal::uniform_set_free(UniformSetID p_uniform_set) {
 	MDUniformSet *obj = (MDUniformSet *)p_uniform_set.id;
-	delete obj;
+	memdelete(obj);
 }

 void RenderingDeviceDriverMetal::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
@ -2800,7 +2868,7 @@ void RenderingDeviceDriverMetal::command_clear_color_texture(CommandBufferID p_c
 		uint32_t layerCnt = p_subresources.layer_count;
 		uint32_t layerEnd = layerStart + layerCnt;

-		MetalFeatures const &features = (*metal_device_properties).features;
+		MetalFeatures const &features = (*device_properties).features;

 		// Iterate across mipmap levels and layers, and perform and empty render to clear each.
 		for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) {
@ -3057,7 +3125,7 @@ RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView<Atta
 		MTLPixelFormat format = pf.getMTLPixelFormat(a.format);
 		mda.format = format;
 		if (a.samples > TEXTURE_SAMPLES_1) {
-			mda.samples = (*metal_device_properties).find_nearest_supported_sample_count(a.samples);
+			mda.samples = (*device_properties).find_nearest_supported_sample_count(a.samples);
 		}
 		mda.loadAction = LOAD_ACTIONS[a.load_op];
 		mda.storeAction = STORE_ACTIONS[a.store_op];
@ -3436,7 +3504,7 @@ RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create(
 	}

 	if (p_multisample_state.sample_count > TEXTURE_SAMPLES_1) {
-		pipeline->sample_count = (*metal_device_properties).find_nearest_supported_sample_count(p_multisample_state.sample_count);
+		pipeline->sample_count = (*device_properties).find_nearest_supported_sample_count(p_multisample_state.sample_count);
 	}
 	desc.rasterSampleCount = static_cast<NSUInteger>(pipeline->sample_count);
 	desc.alphaToCoverageEnabled = p_multisample_state.enable_alpha_to_coverage;
@ -3815,7 +3883,7 @@ uint64_t RenderingDeviceDriverMetal::get_lazily_memory_used() {
 }

 uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
-	MetalDeviceProperties const &props = (*metal_device_properties);
+	MetalDeviceProperties const &props = (*device_properties);
 	MetalLimits const &limits = props.limits;

 #if defined(DEV_ENABLED)
@ -3911,11 +3979,13 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
 		case LIMIT_SUBGROUP_MAX_SIZE:
 			return limits.maxSubgroupSize;
 		case LIMIT_SUBGROUP_IN_SHADERS:
-			return (int64_t)limits.subgroupSupportedShaderStages;
+			return (uint64_t)limits.subgroupSupportedShaderStages;
 		case LIMIT_SUBGROUP_OPERATIONS:
-			return (int64_t)limits.subgroupSupportedOperations;
+			return (uint64_t)limits.subgroupSupportedOperations;
 		UNKNOWN(LIMIT_VRS_TEXEL_WIDTH);
 		UNKNOWN(LIMIT_VRS_TEXEL_HEIGHT);
+		UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_WIDTH);
+		UNKNOWN(LIMIT_VRS_MAX_FRAGMENT_HEIGHT);
 		default:
 			ERR_FAIL_V(0);
 	}
@ -4042,11 +4112,11 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p
 	// Set the pipeline cache ID based on the Metal version.
 	pipeline_cache_id = "metal-driver-" + get_api_version();

-	metal_device_properties = memnew(MetalDeviceProperties(device));
+	device_properties = memnew(MetalDeviceProperties(device));
 	pixel_formats = memnew(PixelFormats(device));
-	if (metal_device_properties->features.layeredRendering) {
+	if (device_properties->features.layeredRendering) {
 		multiview_capabilities.is_supported = true;
-		multiview_capabilities.max_view_count = metal_device_properties->limits.maxViewports;
+		multiview_capabilities.max_view_count = device_properties->limits.maxViewports;
 		// NOTE: I'm not sure what the limit is as I don't see it referenced anywhere
 		multiview_capabilities.max_instance_count = UINT32_MAX;

@ -4057,11 +4127,10 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p
 		print_verbose("- Metal multiview not supported");
 	}

-	// Check required features and abort if any of them is missing.
-	if (!metal_device_properties->features.imageCubeArray) {
-		// NOTE: Apple A11 (Apple4) GPUs support image cube arrays, which are devices from 2017 and newer.
-		String error_string = vformat("Your Apple GPU does not support the following features which are required to use Metal-based renderers in Godot:\n\n");
-		if (!metal_device_properties->features.imageCubeArray) {
+	// The Metal renderer requires Apple4 family. This is 2017 era A11 chips and newer.
+	if (device_properties->features.highestFamily < MTLGPUFamilyApple4) {
+		String error_string = vformat("Your Apple GPU does not support the following features, which are required to use Metal-based renderers in Godot:\n\n");
+		if (!device_properties->features.imageCubeArray) {
 			error_string += "- No support for image cube arrays.\n";
 		}