Apple: Use image atomic operations on supported Apple hardware

Co-authored-by: A Thousand Ships <96648715+AThousandShips@users.noreply.github.com>
This commit is contained in:
Stuart Carnie
2025-06-27 09:59:21 +10:00
parent 9b22b41531
commit 5230f6c60c
32 changed files with 5354 additions and 719 deletions

View File

@ -12,7 +12,6 @@ thirdparty_obj = []
thirdparty_dir = "#thirdparty/spirv-cross/"
thirdparty_sources = [
"spirv_cfg.cpp",
"spirv_cross_util.cpp",
"spirv_cross.cpp",
"spirv_parser.cpp",
"spirv_msl.cpp",

View File

@ -94,6 +94,8 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures {
bool metal_fx_spatial = false; /**< If true, Metal FX spatial functions are supported. */
bool metal_fx_temporal = false; /**< If true, Metal FX temporal functions are supported. */
bool supports_gpu_address = false; /**< If true, referencing a GPU address in a shader is supported. */
bool supports_image_atomic_32_bit = false; /**< If true, 32-bit atomic operations on images are supported. */
bool supports_image_atomic_64_bit = false; /**< If true, 64-bit atomic operations on images are supported. */
};
struct MetalLimits {

View File

@ -121,6 +121,12 @@ void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6];
features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7];
features.argument_buffers_tier = p_device.argumentBuffersSupport;
features.supports_image_atomic_32_bit = [p_device supportsFamily:MTLGPUFamilyApple6];
features.supports_image_atomic_64_bit = [p_device supportsFamily:MTLGPUFamilyApple8];
if (OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_IMAGE_ATOMICS") == "1") {
features.supports_image_atomic_32_bit = false;
features.supports_image_atomic_64_bit = false;
}
if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2);

View File

@ -309,9 +309,23 @@ public:
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
private:
#pragma mark - Common State
// From RenderingDevice
static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128;
RenderingDeviceDriverMetal *device_driver = nullptr;
id<MTLCommandQueue> queue = nil;
id<MTLCommandBuffer> commandBuffer = nil;
bool state_begin = false;
_FORCE_INLINE_ id<MTLCommandBuffer> command_buffer() {
DEV_ASSERT(state_begin);
if (commandBuffer == nil) {
commandBuffer = queue.commandBuffer;
}
return commandBuffer;
}
void _end_compute_dispatch();
void _end_blit();
@ -326,6 +340,11 @@ private:
void _end_render_pass();
void _render_clear_render_area();
#pragma mark - Compute
void _compute_set_dirty_state();
void _compute_bind_uniform_sets();
public:
MDCommandBufferStateType type = MDCommandBufferStateType::None;
@ -349,18 +368,18 @@ public:
LocalVector<NSUInteger> vertex_offsets;
ResourceUsageMap resource_usage;
// clang-format off
enum DirtyFlag: uint8_t {
DIRTY_NONE = 0b0000'0000,
DIRTY_PIPELINE = 0b0000'0001, //! pipeline state
DIRTY_UNIFORMS = 0b0000'0010, //! uniform sets
DIRTY_DEPTH = 0b0000'0100, //! depth / stencil state
DIRTY_VERTEX = 0b0000'1000, //! vertex buffers
DIRTY_VIEWPORT = 0b0001'0000, //! viewport rectangles
DIRTY_SCISSOR = 0b0010'0000, //! scissor rectangles
DIRTY_BLEND = 0b0100'0000, //! blend state
DIRTY_RASTER = 0b1000'0000, //! encoder state like cull mode
DIRTY_ALL = 0xff,
enum DirtyFlag: uint16_t {
DIRTY_NONE = 0,
DIRTY_PIPELINE = 1 << 0, //! pipeline state
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
DIRTY_PUSH = 1 << 2, //! push constants
DIRTY_DEPTH = 1 << 3, //! depth / stencil state
DIRTY_VERTEX = 1 << 4, //! vertex buffers
DIRTY_VIEWPORT = 1 << 5, //! viewport rectangles
DIRTY_SCISSOR = 1 << 6, //! scissor rectangles
DIRTY_BLEND = 1 << 7, //! blend state
DIRTY_RASTER = 1 << 8, //! encoder state like cull mode
DIRTY_ALL = (1 << 9) - 1,
};
// clang-format on
BitField<DirtyFlag> dirty = DIRTY_NONE;
@ -368,6 +387,9 @@ public:
LocalVector<MDUniformSet *> uniform_sets;
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
uint64_t uniform_set_mask = 0;
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
uint32_t push_constant_data_len = 0;
uint32_t push_constant_bindings[2] = { 0 };
_FORCE_INLINE_ void reset();
void end_encoding();
@ -422,6 +444,13 @@ public:
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
}
_FORCE_INLINE_ void mark_push_constants_dirty() {
if (push_constant_data_len == 0) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
}
_FORCE_INLINE_ void mark_blend_dirty() {
if (!blend_constants.has_value()) {
return;
@ -464,16 +493,46 @@ public:
MDComputePipeline *pipeline = nullptr;
id<MTLComputeCommandEncoder> encoder = nil;
ResourceUsageMap resource_usage;
_FORCE_INLINE_ void reset() {
pipeline = nil;
encoder = nil;
// Keep the keys, as they are likely to be used again.
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
kv.value.clear();
// clang-format off
enum DirtyFlag: uint16_t {
DIRTY_NONE = 0,
DIRTY_PIPELINE = 1 << 0, //! pipeline state
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
DIRTY_PUSH = 1 << 2, //! push constants
DIRTY_ALL = (1 << 3) - 1,
};
// clang-format on
BitField<DirtyFlag> dirty = DIRTY_NONE;
LocalVector<MDUniformSet *> uniform_sets;
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
uint64_t uniform_set_mask = 0;
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
uint32_t push_constant_data_len = 0;
uint32_t push_constant_bindings[1] = { 0 };
_FORCE_INLINE_ void reset();
void end_encoding();
_FORCE_INLINE_ void mark_uniforms_dirty(void) {
if (uniform_sets.is_empty()) {
return;
}
for (uint32_t i = 0; i < uniform_sets.size(); i++) {
if (uniform_sets[i] != nullptr) {
uniform_set_mask |= 1 << i;
}
}
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
}
_FORCE_INLINE_ void mark_push_constants_dirty() {
if (push_constant_data_len == 0) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
}
void end_encoding();
} compute;
// State specific to a blit pass.
@ -496,6 +555,7 @@ public:
void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label);
void bind_pipeline(RDD::PipelineID p_pipeline);
void encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data);
#pragma mark - Render Commands
@ -661,8 +721,6 @@ public:
Vector<UniformSet> sets;
bool uses_argument_buffers = true;
virtual void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) = 0;
MDShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers) :
name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {}
virtual ~MDShader() = default;
@ -671,15 +729,13 @@ public:
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader {
public:
struct {
uint32_t binding = -1;
int32_t binding = -1;
uint32_t size = 0;
} push_constants;
MTLSize local = {};
MDLibrary *kernel;
void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel);
};
@ -700,8 +756,6 @@ public:
MDLibrary *vert;
MDLibrary *frag;
void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
MDRenderShader(CharString p_name,
Vector<UniformSet> p_sets,
bool p_needs_view_mask_buffer,

View File

@ -62,8 +62,8 @@
#undef MAX
void MDCommandBuffer::begin() {
DEV_ASSERT(commandBuffer == nil);
commandBuffer = queue.commandBuffer;
DEV_ASSERT(commandBuffer == nil && !state_begin);
state_begin = true;
}
void MDCommandBuffer::end() {
@ -83,6 +83,7 @@ void MDCommandBuffer::commit() {
end();
[commandBuffer commit];
commandBuffer = nil;
state_begin = false;
}
void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
@ -136,7 +137,7 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
render.desc.colorAttachments[0].resolveTexture = res_tex;
}
#endif
render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:render.desc];
render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:render.desc];
}
if (render.pipeline != rp) {
@ -160,9 +161,44 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
DEV_ASSERT(type == MDCommandBufferStateType::None);
type = MDCommandBufferStateType::Compute;
compute.pipeline = (MDComputePipeline *)p;
compute.encoder = commandBuffer.computeCommandEncoder;
[compute.encoder setComputePipelineState:compute.pipeline->state];
if (compute.pipeline != p) {
compute.dirty.set_flag(ComputeState::DIRTY_PIPELINE);
compute.mark_uniforms_dirty();
compute.pipeline = (MDComputePipeline *)p;
}
}
}
void MDCommandBuffer::encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data) {
switch (type) {
case MDCommandBufferStateType::Render: {
MDRenderShader *shader = (MDRenderShader *)(p_shader.id);
if (shader->push_constants.vert.binding == -1 && shader->push_constants.frag.binding == -1) {
return;
}
render.push_constant_bindings[0] = shader->push_constants.vert.binding;
render.push_constant_bindings[1] = shader->push_constants.frag.binding;
void const *ptr = p_data.ptr();
render.push_constant_data_len = p_data.size() * sizeof(uint32_t);
DEV_ASSERT(render.push_constant_data_len <= sizeof(RenderState::push_constant_data));
memcpy(render.push_constant_data, ptr, render.push_constant_data_len);
render.mark_push_constants_dirty();
} break;
case MDCommandBufferStateType::Compute: {
MDComputeShader *shader = (MDComputeShader *)(p_shader.id);
if (shader->push_constants.binding == -1) {
return;
}
compute.push_constant_bindings[0] = shader->push_constants.binding;
void const *ptr = p_data.ptr();
compute.push_constant_data_len = p_data.size() * sizeof(uint32_t);
DEV_ASSERT(compute.push_constant_data_len <= sizeof(ComputeState::push_constant_data));
memcpy(compute.push_constant_data, ptr, compute.push_constant_data_len);
compute.mark_push_constants_dirty();
} break;
case MDCommandBufferStateType::Blit:
case MDCommandBufferStateType::None:
return;
}
}
@ -181,7 +217,7 @@ id<MTLBlitCommandEncoder> MDCommandBuffer::blit_command_encoder() {
}
type = MDCommandBufferStateType::Blit;
blit.encoder = commandBuffer.blitCommandEncoder;
blit.encoder = command_buffer().blitCommandEncoder;
return blit.encoder;
}
@ -200,7 +236,7 @@ void MDCommandBuffer::encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDesc
break;
}
id<MTLRenderCommandEncoder> enc = [commandBuffer renderCommandEncoderWithDescriptor:p_desc];
id<MTLRenderCommandEncoder> enc = [command_buffer() renderCommandEncoderWithDescriptor:p_desc];
if (p_label != nil) {
[enc pushDebugGroup:p_label];
[enc popDebugGroup];
@ -344,6 +380,19 @@ void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear>
void MDCommandBuffer::_render_set_dirty_state() {
_render_bind_uniform_sets();
if (render.dirty.has_flag(RenderState::DIRTY_PUSH)) {
if (render.push_constant_bindings[0] != (uint32_t)-1) {
[render.encoder setVertexBytes:render.push_constant_data
length:render.push_constant_data_len
atIndex:render.push_constant_bindings[0]];
}
if (render.push_constant_bindings[1] != (uint32_t)-1) {
[render.encoder setFragmentBytes:render.push_constant_data
length:render.push_constant_data_len
atIndex:render.push_constant_bindings[1]];
}
}
MDSubpass const &subpass = render.get_subpass();
if (subpass.view_count > 1) {
uint32_t view_range[2] = { 0, subpass.view_count };
@ -552,7 +601,7 @@ uint32_t MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, uint32_t
}
void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_frameBuffer, RDD::CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RDD::RenderPassClearValue> p_clear_values) {
DEV_ASSERT(commandBuffer != nil);
DEV_ASSERT(command_buffer() != nil);
end();
MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);
@ -639,7 +688,7 @@ void MDCommandBuffer::_render_clear_render_area() {
}
void MDCommandBuffer::render_next_subpass() {
DEV_ASSERT(commandBuffer != nil);
DEV_ASSERT(command_buffer() != nil);
if (render.current_subpass == UINT32_MAX) {
render.current_subpass = 0;
@ -726,7 +775,7 @@ void MDCommandBuffer::render_next_subpass() {
// the defaultRasterSampleCount from the pipeline's sample count.
render.desc = desc;
} else {
render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:desc];
render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:desc];
if (!render.is_rendering_entire_area) {
_render_clear_render_area();
@ -895,6 +944,7 @@ void MDCommandBuffer::RenderState::reset() {
dirty = DIRTY_NONE;
uniform_sets.clear();
uniform_set_mask = 0;
push_constant_data_len = 0;
clear_values.clear();
viewports.clear();
scissors.clear();
@ -960,29 +1010,108 @@ void MDCommandBuffer::ComputeState::end_encoding() {
#pragma mark - Compute
void MDCommandBuffer::_compute_set_dirty_state() {
if (compute.dirty.has_flag(ComputeState::DIRTY_PIPELINE)) {
compute.encoder = [command_buffer() computeCommandEncoderWithDispatchType:MTLDispatchTypeConcurrent];
[compute.encoder setComputePipelineState:compute.pipeline->state];
}
_compute_bind_uniform_sets();
if (compute.dirty.has_flag(ComputeState::DIRTY_PUSH)) {
if (compute.push_constant_bindings[0] != (uint32_t)-1) {
[compute.encoder setBytes:compute.push_constant_data
length:compute.push_constant_data_len
atIndex:compute.push_constant_bindings[0]];
}
}
compute.dirty.clear();
}
void MDCommandBuffer::_compute_bind_uniform_sets() {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
if (!compute.dirty.has_flag(ComputeState::DIRTY_UNIFORMS)) {
return;
}
compute.dirty.clear_flag(ComputeState::DIRTY_UNIFORMS);
uint64_t set_uniforms = compute.uniform_set_mask;
compute.uniform_set_mask = 0;
MDComputeShader *shader = compute.pipeline->shader;
while (set_uniforms != 0) {
// Find the index of the next set bit.
uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms);
// Clear the set bit.
set_uniforms &= (set_uniforms - 1);
MDUniformSet *set = compute.uniform_sets[index];
if (set == nullptr || index >= (uint32_t)shader->sets.size()) {
continue;
}
set->bind_uniforms(shader, compute, index);
}
}
void MDCommandBuffer::ComputeState::reset() {
pipeline = nil;
encoder = nil;
dirty = DIRTY_NONE;
uniform_sets.clear();
uniform_set_mask = 0;
push_constant_data_len = 0;
// Keep the keys, as they are likely to be used again.
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
kv.value.clear();
}
}
void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
MDShader *shader = (MDShader *)(p_shader.id);
MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
set->bind_uniforms(shader, compute, p_set_index);
if (compute.uniform_sets.size() <= p_set_index) {
uint32_t s = render.uniform_sets.size();
compute.uniform_sets.resize(p_set_index + 1);
// Set intermediate values to null.
std::fill(&compute.uniform_sets[s], &compute.uniform_sets[p_set_index] + 1, nullptr);
}
if (compute.uniform_sets[p_set_index] != set) {
compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
compute.uniform_set_mask |= 1ULL << p_set_index;
compute.uniform_sets[p_set_index] = set;
}
}
void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
MDShader *shader = (MDShader *)(p_shader.id);
// TODO(sgc): Bind multiple buffers using [encoder setBuffers:offsets:withRange:]
for (size_t i = 0u; i < p_set_count; ++i) {
for (size_t i = 0; i < p_set_count; ++i) {
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
set->bind_uniforms(shader, compute, p_first_set_index + i);
uint32_t index = p_first_set_index + i;
if (compute.uniform_sets.size() <= index) {
uint32_t s = compute.uniform_sets.size();
compute.uniform_sets.resize(index + 1);
// Set intermediate values to null.
std::fill(&compute.uniform_sets[s], &compute.uniform_sets[index] + 1, nullptr);
}
if (compute.uniform_sets[index] != set) {
compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
compute.uniform_set_mask |= 1ULL << index;
compute.uniform_sets[index] = set;
}
}
}
void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
_compute_set_dirty_state();
MTLRegion region = MTLRegionMake3D(0, 0, 0, p_x_groups, p_y_groups, p_z_groups);
id<MTLComputeCommandEncoder> enc = compute.encoder;
@ -992,6 +1121,8 @@ void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups,
void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
_compute_set_dirty_state();
id<MTLBuffer> indirectBuffer = rid::get(p_indirect_buffer);
id<MTLComputeCommandEncoder> enc = compute.encoder;
@ -1021,20 +1152,6 @@ MDComputeShader::MDComputeShader(CharString p_name,
MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(p_kernel) {
}
void MDComputeShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Compute);
if (push_constants.binding == (uint32_t)-1) {
return;
}
id<MTLComputeCommandEncoder> enc = p_cb->compute.encoder;
void const *ptr = p_data.ptr();
size_t length = p_data.size() * sizeof(uint32_t);
[enc setBytes:ptr length:length atIndex:push_constants.binding];
}
MDRenderShader::MDRenderShader(CharString p_name,
Vector<UniformSet> p_sets,
bool p_needs_view_mask_buffer,
@ -1046,22 +1163,6 @@ MDRenderShader::MDRenderShader(CharString p_name,
frag(p_frag) {
}
void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Render);
id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_cb->render.encoder;
void const *ptr = p_data.ptr();
size_t length = p_data.size() * sizeof(uint32_t);
if (push_constants.vert.binding > -1) {
[enc setVertexBytes:ptr length:length atIndex:push_constants.vert.binding];
}
if (push_constants.frag.binding > -1) {
[enc setFragmentBytes:ptr length:length atIndex:push_constants.frag.binding];
}
}
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) {
DEV_ASSERT(p_shader->uses_argument_buffers);
DEV_ASSERT(p_state.encoder != nil);

View File

@ -58,6 +58,7 @@
#include "core/io/marshalls.h"
#include "core/string/ustring.h"
#include "core/templates/hash_map.h"
#include "drivers/apple/foundation_helpers.h"
#import <Metal/MTLTexture.h>
#import <Metal/Metal.h>
@ -317,12 +318,6 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
desc.usage |= MTLTextureUsageShaderWrite;
}
if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
if (format_caps & kMTLFmtCapsAtomic) {
desc.usage |= MTLTextureUsageShaderAtomic;
}
}
bool can_be_attachment = flags::any(format_caps, (kMTLFmtCapsColorAtt | kMTLFmtCapsDSAtt));
if (flags::any(p_format.usage_bits, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
@ -334,6 +329,18 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
desc.usage |= MTLTextureUsageShaderRead;
}
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT) {
if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
if (format_caps & kMTLFmtCapsAtomic) {
desc.usage |= MTLTextureUsageShaderAtomic;
} else {
ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic operations on this texture format are not supported.");
}
} else {
ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic texture operations not supported on this OS version.");
}
}
if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) {
ERR_FAIL_V_MSG(RDD::TextureID(), "unsupported: TEXTURE_USAGE_VRS_ATTACHMENT_BIT");
}
@ -363,7 +370,29 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
// Check if it is a linear format for atomic operations and therefore needs a buffer,
// as generally Metal does not support atomic operations on textures.
bool needs_buffer = is_linear || (p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D && flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && (p_format.format == DATA_FORMAT_R32_UINT || p_format.format == DATA_FORMAT_R32_SINT || p_format.format == DATA_FORMAT_R32G32_UINT || p_format.format == DATA_FORMAT_R32G32_SINT));
bool needs_buffer = is_linear;
// Check for atomic requirements.
if (flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D) {
switch (p_format.format) {
case RenderingDeviceCommons::DATA_FORMAT_R32_SINT:
case RenderingDeviceCommons::DATA_FORMAT_R32_UINT: {
if (!device_properties->features.supports_image_atomic_32_bit) {
// We can emulate 32-bit atomic operations on textures.
needs_buffer = true;
}
} break;
case RenderingDeviceCommons::DATA_FORMAT_R32G32_SINT:
case RenderingDeviceCommons::DATA_FORMAT_R32G32_UINT: {
if (!device_properties->features.supports_image_atomic_64_bit) {
// No emulation for 64-bit atomics.
ERR_FAIL_V_MSG(TextureID(), "64-bit atomic operations are not supported.");
}
} break;
default:
break;
}
}
id<MTLTexture> obj = nil;
if (needs_buffer) {
@ -900,9 +929,15 @@ Error RenderingDeviceDriverMetal::command_queue_execute_and_present(CommandQueue
MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id);
Fence *fence = (Fence *)(p_cmd_fence.id);
if (fence != nullptr) {
[cmd_buffer->get_command_buffer() addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
id<MTLCommandBuffer> cb = cmd_buffer->get_command_buffer();
if (cb == nil) {
// If there is nothing to do, signal the fence immediately.
dispatch_semaphore_signal(fence->semaphore);
}];
} else {
[cb addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
dispatch_semaphore_signal(fence->semaphore);
}];
}
}
for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
@ -1730,8 +1765,7 @@ void RenderingDeviceDriverMetal::pipeline_free(PipelineID p_pipeline_id) {
void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView<uint32_t> p_data) {
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
MDShader *shader = (MDShader *)(p_shader.id);
shader->encode_push_constant_data(p_data, cb);
cb->encode_push_constant_data(p_shader, p_data);
}
// ----- CACHE -----
@ -2417,6 +2451,7 @@ RDD::PipelineID RenderingDeviceDriverMetal::compute_pipeline_create(ShaderID p_s
MTLComputePipelineDescriptor *desc = [MTLComputePipelineDescriptor new];
desc.computeFunction = function;
desc.label = conv::to_nsstring(shader->name);
if (archive) {
desc.binaryArchives = @[ archive ];
}
@ -2735,6 +2770,8 @@ bool RenderingDeviceDriverMetal::has_feature(Features p_feature) {
return device_properties->features.metal_fx_spatial;
case SUPPORTS_METALFX_TEMPORAL:
return device_properties->features.metal_fx_temporal;
case SUPPORTS_IMAGE_ATOMIC_32_BIT:
return device_properties->features.supports_image_atomic_32_bit;
default:
return false;
}

View File

@ -199,6 +199,8 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
set_indexes[set_indexes_size - 1] = offset;
}
CompilerMSL::Options msl_options{};
// MAJOR * 10000 + MINOR * 100
uint32_t msl_version = CompilerMSL::Options::make_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
msl_options.set_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
mtl_reflection_data.msl_version = msl_options.msl_version;
msl_options.platform = device_profile->platform == MetalDeviceProfile::Platform::macOS ? CompilerMSL::Options::macOS : CompilerMSL::Options::iOS;
@ -209,7 +211,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
}
bool disable_argument_buffers = false;
if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") {
if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") {
disable_argument_buffers = true;
}
@ -236,6 +238,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
msl_options.multiview_layered_rendering = true;
msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX;
}
if (msl_version >= CompilerMSL::Options::make_msl_version(3, 2)) {
// All 3.2+ versions support device coherence, so we can disable texture fences.
msl_options.readwrite_texture_fences = false;
}
CompilerGLSL::Options options{};
options.vertex.flip_vert_y = true;
@ -417,6 +423,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
// VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
primary.texture_type = MTLTextureTypeTextureBuffer;
} break;
case spv::DimTileImageDataEXT: {
// Godot does not use this extension.
// See: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_EXT_shader_tile_image.html
} break;
case spv::DimMax: {
// Add all enumerations to silence the compiler warning
// and generate future warnings, should a new one be added.