Apple: Use image atomic operations on supported Apple hardware
Co-authored-by: A Thousand Ships <96648715+AThousandShips@users.noreply.github.com>
This commit is contained in:
@ -12,7 +12,6 @@ thirdparty_obj = []
|
||||
thirdparty_dir = "#thirdparty/spirv-cross/"
|
||||
thirdparty_sources = [
|
||||
"spirv_cfg.cpp",
|
||||
"spirv_cross_util.cpp",
|
||||
"spirv_cross.cpp",
|
||||
"spirv_parser.cpp",
|
||||
"spirv_msl.cpp",
|
||||
|
||||
@ -94,6 +94,8 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures {
|
||||
bool metal_fx_spatial = false; /**< If true, Metal FX spatial functions are supported. */
|
||||
bool metal_fx_temporal = false; /**< If true, Metal FX temporal functions are supported. */
|
||||
bool supports_gpu_address = false; /**< If true, referencing a GPU address in a shader is supported. */
|
||||
bool supports_image_atomic_32_bit = false; /**< If true, 32-bit atomic operations on images are supported. */
|
||||
bool supports_image_atomic_64_bit = false; /**< If true, 64-bit atomic operations on images are supported. */
|
||||
};
|
||||
|
||||
struct MetalLimits {
|
||||
|
||||
@ -121,6 +121,12 @@ void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
|
||||
features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6];
|
||||
features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7];
|
||||
features.argument_buffers_tier = p_device.argumentBuffersSupport;
|
||||
features.supports_image_atomic_32_bit = [p_device supportsFamily:MTLGPUFamilyApple6];
|
||||
features.supports_image_atomic_64_bit = [p_device supportsFamily:MTLGPUFamilyApple8];
|
||||
if (OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_IMAGE_ATOMICS") == "1") {
|
||||
features.supports_image_atomic_32_bit = false;
|
||||
features.supports_image_atomic_64_bit = false;
|
||||
}
|
||||
|
||||
if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
|
||||
features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2);
|
||||
|
||||
@ -309,9 +309,23 @@ public:
|
||||
|
||||
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
|
||||
private:
|
||||
#pragma mark - Common State
|
||||
|
||||
// From RenderingDevice
|
||||
static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128;
|
||||
|
||||
RenderingDeviceDriverMetal *device_driver = nullptr;
|
||||
id<MTLCommandQueue> queue = nil;
|
||||
id<MTLCommandBuffer> commandBuffer = nil;
|
||||
bool state_begin = false;
|
||||
|
||||
_FORCE_INLINE_ id<MTLCommandBuffer> command_buffer() {
|
||||
DEV_ASSERT(state_begin);
|
||||
if (commandBuffer == nil) {
|
||||
commandBuffer = queue.commandBuffer;
|
||||
}
|
||||
return commandBuffer;
|
||||
}
|
||||
|
||||
void _end_compute_dispatch();
|
||||
void _end_blit();
|
||||
@ -326,6 +340,11 @@ private:
|
||||
void _end_render_pass();
|
||||
void _render_clear_render_area();
|
||||
|
||||
#pragma mark - Compute
|
||||
|
||||
void _compute_set_dirty_state();
|
||||
void _compute_bind_uniform_sets();
|
||||
|
||||
public:
|
||||
MDCommandBufferStateType type = MDCommandBufferStateType::None;
|
||||
|
||||
@ -349,18 +368,18 @@ public:
|
||||
LocalVector<NSUInteger> vertex_offsets;
|
||||
ResourceUsageMap resource_usage;
|
||||
// clang-format off
|
||||
enum DirtyFlag: uint8_t {
|
||||
DIRTY_NONE = 0b0000'0000,
|
||||
DIRTY_PIPELINE = 0b0000'0001, //! pipeline state
|
||||
DIRTY_UNIFORMS = 0b0000'0010, //! uniform sets
|
||||
DIRTY_DEPTH = 0b0000'0100, //! depth / stencil state
|
||||
DIRTY_VERTEX = 0b0000'1000, //! vertex buffers
|
||||
DIRTY_VIEWPORT = 0b0001'0000, //! viewport rectangles
|
||||
DIRTY_SCISSOR = 0b0010'0000, //! scissor rectangles
|
||||
DIRTY_BLEND = 0b0100'0000, //! blend state
|
||||
DIRTY_RASTER = 0b1000'0000, //! encoder state like cull mode
|
||||
|
||||
DIRTY_ALL = 0xff,
|
||||
enum DirtyFlag: uint16_t {
|
||||
DIRTY_NONE = 0,
|
||||
DIRTY_PIPELINE = 1 << 0, //! pipeline state
|
||||
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
|
||||
DIRTY_PUSH = 1 << 2, //! push constants
|
||||
DIRTY_DEPTH = 1 << 3, //! depth / stencil state
|
||||
DIRTY_VERTEX = 1 << 4, //! vertex buffers
|
||||
DIRTY_VIEWPORT = 1 << 5, //! viewport rectangles
|
||||
DIRTY_SCISSOR = 1 << 6, //! scissor rectangles
|
||||
DIRTY_BLEND = 1 << 7, //! blend state
|
||||
DIRTY_RASTER = 1 << 8, //! encoder state like cull mode
|
||||
DIRTY_ALL = (1 << 9) - 1,
|
||||
};
|
||||
// clang-format on
|
||||
BitField<DirtyFlag> dirty = DIRTY_NONE;
|
||||
@ -368,6 +387,9 @@ public:
|
||||
LocalVector<MDUniformSet *> uniform_sets;
|
||||
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
|
||||
uint64_t uniform_set_mask = 0;
|
||||
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
|
||||
uint32_t push_constant_data_len = 0;
|
||||
uint32_t push_constant_bindings[2] = { 0 };
|
||||
|
||||
_FORCE_INLINE_ void reset();
|
||||
void end_encoding();
|
||||
@ -422,6 +444,13 @@ public:
|
||||
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ void mark_push_constants_dirty() {
|
||||
if (push_constant_data_len == 0) {
|
||||
return;
|
||||
}
|
||||
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ void mark_blend_dirty() {
|
||||
if (!blend_constants.has_value()) {
|
||||
return;
|
||||
@ -464,16 +493,46 @@ public:
|
||||
MDComputePipeline *pipeline = nullptr;
|
||||
id<MTLComputeCommandEncoder> encoder = nil;
|
||||
ResourceUsageMap resource_usage;
|
||||
_FORCE_INLINE_ void reset() {
|
||||
pipeline = nil;
|
||||
encoder = nil;
|
||||
// Keep the keys, as they are likely to be used again.
|
||||
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
|
||||
kv.value.clear();
|
||||
// clang-format off
|
||||
enum DirtyFlag: uint16_t {
|
||||
DIRTY_NONE = 0,
|
||||
DIRTY_PIPELINE = 1 << 0, //! pipeline state
|
||||
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
|
||||
DIRTY_PUSH = 1 << 2, //! push constants
|
||||
DIRTY_ALL = (1 << 3) - 1,
|
||||
};
|
||||
// clang-format on
|
||||
BitField<DirtyFlag> dirty = DIRTY_NONE;
|
||||
|
||||
LocalVector<MDUniformSet *> uniform_sets;
|
||||
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
|
||||
uint64_t uniform_set_mask = 0;
|
||||
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
|
||||
uint32_t push_constant_data_len = 0;
|
||||
uint32_t push_constant_bindings[1] = { 0 };
|
||||
|
||||
_FORCE_INLINE_ void reset();
|
||||
void end_encoding();
|
||||
|
||||
_FORCE_INLINE_ void mark_uniforms_dirty(void) {
|
||||
if (uniform_sets.is_empty()) {
|
||||
return;
|
||||
}
|
||||
for (uint32_t i = 0; i < uniform_sets.size(); i++) {
|
||||
if (uniform_sets[i] != nullptr) {
|
||||
uniform_set_mask |= 1 << i;
|
||||
}
|
||||
}
|
||||
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ void mark_push_constants_dirty() {
|
||||
if (push_constant_data_len == 0) {
|
||||
return;
|
||||
}
|
||||
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
|
||||
}
|
||||
|
||||
void end_encoding();
|
||||
} compute;
|
||||
|
||||
// State specific to a blit pass.
|
||||
@ -496,6 +555,7 @@ public:
|
||||
void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label);
|
||||
|
||||
void bind_pipeline(RDD::PipelineID p_pipeline);
|
||||
void encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data);
|
||||
|
||||
#pragma mark - Render Commands
|
||||
|
||||
@ -661,8 +721,6 @@ public:
|
||||
Vector<UniformSet> sets;
|
||||
bool uses_argument_buffers = true;
|
||||
|
||||
virtual void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) = 0;
|
||||
|
||||
MDShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers) :
|
||||
name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {}
|
||||
virtual ~MDShader() = default;
|
||||
@ -671,15 +729,13 @@ public:
|
||||
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader {
|
||||
public:
|
||||
struct {
|
||||
uint32_t binding = -1;
|
||||
int32_t binding = -1;
|
||||
uint32_t size = 0;
|
||||
} push_constants;
|
||||
MTLSize local = {};
|
||||
|
||||
MDLibrary *kernel;
|
||||
|
||||
void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
|
||||
|
||||
MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel);
|
||||
};
|
||||
|
||||
@ -700,8 +756,6 @@ public:
|
||||
MDLibrary *vert;
|
||||
MDLibrary *frag;
|
||||
|
||||
void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
|
||||
|
||||
MDRenderShader(CharString p_name,
|
||||
Vector<UniformSet> p_sets,
|
||||
bool p_needs_view_mask_buffer,
|
||||
|
||||
@ -62,8 +62,8 @@
|
||||
#undef MAX
|
||||
|
||||
void MDCommandBuffer::begin() {
|
||||
DEV_ASSERT(commandBuffer == nil);
|
||||
commandBuffer = queue.commandBuffer;
|
||||
DEV_ASSERT(commandBuffer == nil && !state_begin);
|
||||
state_begin = true;
|
||||
}
|
||||
|
||||
void MDCommandBuffer::end() {
|
||||
@ -83,6 +83,7 @@ void MDCommandBuffer::commit() {
|
||||
end();
|
||||
[commandBuffer commit];
|
||||
commandBuffer = nil;
|
||||
state_begin = false;
|
||||
}
|
||||
|
||||
void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
|
||||
@ -136,7 +137,7 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
|
||||
render.desc.colorAttachments[0].resolveTexture = res_tex;
|
||||
}
|
||||
#endif
|
||||
render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:render.desc];
|
||||
render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:render.desc];
|
||||
}
|
||||
|
||||
if (render.pipeline != rp) {
|
||||
@ -160,9 +161,44 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::None);
|
||||
type = MDCommandBufferStateType::Compute;
|
||||
|
||||
compute.pipeline = (MDComputePipeline *)p;
|
||||
compute.encoder = commandBuffer.computeCommandEncoder;
|
||||
[compute.encoder setComputePipelineState:compute.pipeline->state];
|
||||
if (compute.pipeline != p) {
|
||||
compute.dirty.set_flag(ComputeState::DIRTY_PIPELINE);
|
||||
compute.mark_uniforms_dirty();
|
||||
compute.pipeline = (MDComputePipeline *)p;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data) {
|
||||
switch (type) {
|
||||
case MDCommandBufferStateType::Render: {
|
||||
MDRenderShader *shader = (MDRenderShader *)(p_shader.id);
|
||||
if (shader->push_constants.vert.binding == -1 && shader->push_constants.frag.binding == -1) {
|
||||
return;
|
||||
}
|
||||
render.push_constant_bindings[0] = shader->push_constants.vert.binding;
|
||||
render.push_constant_bindings[1] = shader->push_constants.frag.binding;
|
||||
void const *ptr = p_data.ptr();
|
||||
render.push_constant_data_len = p_data.size() * sizeof(uint32_t);
|
||||
DEV_ASSERT(render.push_constant_data_len <= sizeof(RenderState::push_constant_data));
|
||||
memcpy(render.push_constant_data, ptr, render.push_constant_data_len);
|
||||
render.mark_push_constants_dirty();
|
||||
} break;
|
||||
case MDCommandBufferStateType::Compute: {
|
||||
MDComputeShader *shader = (MDComputeShader *)(p_shader.id);
|
||||
if (shader->push_constants.binding == -1) {
|
||||
return;
|
||||
}
|
||||
compute.push_constant_bindings[0] = shader->push_constants.binding;
|
||||
void const *ptr = p_data.ptr();
|
||||
compute.push_constant_data_len = p_data.size() * sizeof(uint32_t);
|
||||
DEV_ASSERT(compute.push_constant_data_len <= sizeof(ComputeState::push_constant_data));
|
||||
memcpy(compute.push_constant_data, ptr, compute.push_constant_data_len);
|
||||
compute.mark_push_constants_dirty();
|
||||
} break;
|
||||
case MDCommandBufferStateType::Blit:
|
||||
case MDCommandBufferStateType::None:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -181,7 +217,7 @@ id<MTLBlitCommandEncoder> MDCommandBuffer::blit_command_encoder() {
|
||||
}
|
||||
|
||||
type = MDCommandBufferStateType::Blit;
|
||||
blit.encoder = commandBuffer.blitCommandEncoder;
|
||||
blit.encoder = command_buffer().blitCommandEncoder;
|
||||
return blit.encoder;
|
||||
}
|
||||
|
||||
@ -200,7 +236,7 @@ void MDCommandBuffer::encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDesc
|
||||
break;
|
||||
}
|
||||
|
||||
id<MTLRenderCommandEncoder> enc = [commandBuffer renderCommandEncoderWithDescriptor:p_desc];
|
||||
id<MTLRenderCommandEncoder> enc = [command_buffer() renderCommandEncoderWithDescriptor:p_desc];
|
||||
if (p_label != nil) {
|
||||
[enc pushDebugGroup:p_label];
|
||||
[enc popDebugGroup];
|
||||
@ -344,6 +380,19 @@ void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear>
|
||||
void MDCommandBuffer::_render_set_dirty_state() {
|
||||
_render_bind_uniform_sets();
|
||||
|
||||
if (render.dirty.has_flag(RenderState::DIRTY_PUSH)) {
|
||||
if (render.push_constant_bindings[0] != (uint32_t)-1) {
|
||||
[render.encoder setVertexBytes:render.push_constant_data
|
||||
length:render.push_constant_data_len
|
||||
atIndex:render.push_constant_bindings[0]];
|
||||
}
|
||||
if (render.push_constant_bindings[1] != (uint32_t)-1) {
|
||||
[render.encoder setFragmentBytes:render.push_constant_data
|
||||
length:render.push_constant_data_len
|
||||
atIndex:render.push_constant_bindings[1]];
|
||||
}
|
||||
}
|
||||
|
||||
MDSubpass const &subpass = render.get_subpass();
|
||||
if (subpass.view_count > 1) {
|
||||
uint32_t view_range[2] = { 0, subpass.view_count };
|
||||
@ -552,7 +601,7 @@ uint32_t MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, uint32_t
|
||||
}
|
||||
|
||||
void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_frameBuffer, RDD::CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RDD::RenderPassClearValue> p_clear_values) {
|
||||
DEV_ASSERT(commandBuffer != nil);
|
||||
DEV_ASSERT(command_buffer() != nil);
|
||||
end();
|
||||
|
||||
MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);
|
||||
@ -639,7 +688,7 @@ void MDCommandBuffer::_render_clear_render_area() {
|
||||
}
|
||||
|
||||
void MDCommandBuffer::render_next_subpass() {
|
||||
DEV_ASSERT(commandBuffer != nil);
|
||||
DEV_ASSERT(command_buffer() != nil);
|
||||
|
||||
if (render.current_subpass == UINT32_MAX) {
|
||||
render.current_subpass = 0;
|
||||
@ -726,7 +775,7 @@ void MDCommandBuffer::render_next_subpass() {
|
||||
// the defaultRasterSampleCount from the pipeline's sample count.
|
||||
render.desc = desc;
|
||||
} else {
|
||||
render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:desc];
|
||||
render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:desc];
|
||||
|
||||
if (!render.is_rendering_entire_area) {
|
||||
_render_clear_render_area();
|
||||
@ -895,6 +944,7 @@ void MDCommandBuffer::RenderState::reset() {
|
||||
dirty = DIRTY_NONE;
|
||||
uniform_sets.clear();
|
||||
uniform_set_mask = 0;
|
||||
push_constant_data_len = 0;
|
||||
clear_values.clear();
|
||||
viewports.clear();
|
||||
scissors.clear();
|
||||
@ -960,29 +1010,108 @@ void MDCommandBuffer::ComputeState::end_encoding() {
|
||||
|
||||
#pragma mark - Compute
|
||||
|
||||
void MDCommandBuffer::_compute_set_dirty_state() {
|
||||
if (compute.dirty.has_flag(ComputeState::DIRTY_PIPELINE)) {
|
||||
compute.encoder = [command_buffer() computeCommandEncoderWithDispatchType:MTLDispatchTypeConcurrent];
|
||||
[compute.encoder setComputePipelineState:compute.pipeline->state];
|
||||
}
|
||||
|
||||
_compute_bind_uniform_sets();
|
||||
|
||||
if (compute.dirty.has_flag(ComputeState::DIRTY_PUSH)) {
|
||||
if (compute.push_constant_bindings[0] != (uint32_t)-1) {
|
||||
[compute.encoder setBytes:compute.push_constant_data
|
||||
length:compute.push_constant_data_len
|
||||
atIndex:compute.push_constant_bindings[0]];
|
||||
}
|
||||
}
|
||||
|
||||
compute.dirty.clear();
|
||||
}
|
||||
|
||||
void MDCommandBuffer::_compute_bind_uniform_sets() {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
if (!compute.dirty.has_flag(ComputeState::DIRTY_UNIFORMS)) {
|
||||
return;
|
||||
}
|
||||
|
||||
compute.dirty.clear_flag(ComputeState::DIRTY_UNIFORMS);
|
||||
uint64_t set_uniforms = compute.uniform_set_mask;
|
||||
compute.uniform_set_mask = 0;
|
||||
|
||||
MDComputeShader *shader = compute.pipeline->shader;
|
||||
|
||||
while (set_uniforms != 0) {
|
||||
// Find the index of the next set bit.
|
||||
uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms);
|
||||
// Clear the set bit.
|
||||
set_uniforms &= (set_uniforms - 1);
|
||||
MDUniformSet *set = compute.uniform_sets[index];
|
||||
if (set == nullptr || index >= (uint32_t)shader->sets.size()) {
|
||||
continue;
|
||||
}
|
||||
set->bind_uniforms(shader, compute, index);
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::ComputeState::reset() {
|
||||
pipeline = nil;
|
||||
encoder = nil;
|
||||
dirty = DIRTY_NONE;
|
||||
uniform_sets.clear();
|
||||
uniform_set_mask = 0;
|
||||
push_constant_data_len = 0;
|
||||
// Keep the keys, as they are likely to be used again.
|
||||
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
|
||||
kv.value.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
|
||||
MDShader *shader = (MDShader *)(p_shader.id);
|
||||
MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
|
||||
set->bind_uniforms(shader, compute, p_set_index);
|
||||
if (compute.uniform_sets.size() <= p_set_index) {
|
||||
uint32_t s = render.uniform_sets.size();
|
||||
compute.uniform_sets.resize(p_set_index + 1);
|
||||
// Set intermediate values to null.
|
||||
std::fill(&compute.uniform_sets[s], &compute.uniform_sets[p_set_index] + 1, nullptr);
|
||||
}
|
||||
|
||||
if (compute.uniform_sets[p_set_index] != set) {
|
||||
compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
|
||||
compute.uniform_set_mask |= 1ULL << p_set_index;
|
||||
compute.uniform_sets[p_set_index] = set;
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
|
||||
MDShader *shader = (MDShader *)(p_shader.id);
|
||||
|
||||
// TODO(sgc): Bind multiple buffers using [encoder setBuffers:offsets:withRange:]
|
||||
for (size_t i = 0u; i < p_set_count; ++i) {
|
||||
for (size_t i = 0; i < p_set_count; ++i) {
|
||||
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
|
||||
set->bind_uniforms(shader, compute, p_first_set_index + i);
|
||||
|
||||
uint32_t index = p_first_set_index + i;
|
||||
if (compute.uniform_sets.size() <= index) {
|
||||
uint32_t s = compute.uniform_sets.size();
|
||||
compute.uniform_sets.resize(index + 1);
|
||||
// Set intermediate values to null.
|
||||
std::fill(&compute.uniform_sets[s], &compute.uniform_sets[index] + 1, nullptr);
|
||||
}
|
||||
|
||||
if (compute.uniform_sets[index] != set) {
|
||||
compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
|
||||
compute.uniform_set_mask |= 1ULL << index;
|
||||
compute.uniform_sets[index] = set;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
|
||||
_compute_set_dirty_state();
|
||||
|
||||
MTLRegion region = MTLRegionMake3D(0, 0, 0, p_x_groups, p_y_groups, p_z_groups);
|
||||
|
||||
id<MTLComputeCommandEncoder> enc = compute.encoder;
|
||||
@ -992,6 +1121,8 @@ void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups,
|
||||
void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
|
||||
_compute_set_dirty_state();
|
||||
|
||||
id<MTLBuffer> indirectBuffer = rid::get(p_indirect_buffer);
|
||||
|
||||
id<MTLComputeCommandEncoder> enc = compute.encoder;
|
||||
@ -1021,20 +1152,6 @@ MDComputeShader::MDComputeShader(CharString p_name,
|
||||
MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(p_kernel) {
|
||||
}
|
||||
|
||||
void MDComputeShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
|
||||
DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Compute);
|
||||
if (push_constants.binding == (uint32_t)-1) {
|
||||
return;
|
||||
}
|
||||
|
||||
id<MTLComputeCommandEncoder> enc = p_cb->compute.encoder;
|
||||
|
||||
void const *ptr = p_data.ptr();
|
||||
size_t length = p_data.size() * sizeof(uint32_t);
|
||||
|
||||
[enc setBytes:ptr length:length atIndex:push_constants.binding];
|
||||
}
|
||||
|
||||
MDRenderShader::MDRenderShader(CharString p_name,
|
||||
Vector<UniformSet> p_sets,
|
||||
bool p_needs_view_mask_buffer,
|
||||
@ -1046,22 +1163,6 @@ MDRenderShader::MDRenderShader(CharString p_name,
|
||||
frag(p_frag) {
|
||||
}
|
||||
|
||||
void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
|
||||
DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Render);
|
||||
id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_cb->render.encoder;
|
||||
|
||||
void const *ptr = p_data.ptr();
|
||||
size_t length = p_data.size() * sizeof(uint32_t);
|
||||
|
||||
if (push_constants.vert.binding > -1) {
|
||||
[enc setVertexBytes:ptr length:length atIndex:push_constants.vert.binding];
|
||||
}
|
||||
|
||||
if (push_constants.frag.binding > -1) {
|
||||
[enc setFragmentBytes:ptr length:length atIndex:push_constants.frag.binding];
|
||||
}
|
||||
}
|
||||
|
||||
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) {
|
||||
DEV_ASSERT(p_shader->uses_argument_buffers);
|
||||
DEV_ASSERT(p_state.encoder != nil);
|
||||
|
||||
@ -58,6 +58,7 @@
|
||||
#include "core/io/marshalls.h"
|
||||
#include "core/string/ustring.h"
|
||||
#include "core/templates/hash_map.h"
|
||||
#include "drivers/apple/foundation_helpers.h"
|
||||
|
||||
#import <Metal/MTLTexture.h>
|
||||
#import <Metal/Metal.h>
|
||||
@ -317,12 +318,6 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
|
||||
desc.usage |= MTLTextureUsageShaderWrite;
|
||||
}
|
||||
|
||||
if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
|
||||
if (format_caps & kMTLFmtCapsAtomic) {
|
||||
desc.usage |= MTLTextureUsageShaderAtomic;
|
||||
}
|
||||
}
|
||||
|
||||
bool can_be_attachment = flags::any(format_caps, (kMTLFmtCapsColorAtt | kMTLFmtCapsDSAtt));
|
||||
|
||||
if (flags::any(p_format.usage_bits, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
|
||||
@ -334,6 +329,18 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
|
||||
desc.usage |= MTLTextureUsageShaderRead;
|
||||
}
|
||||
|
||||
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT) {
|
||||
if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
|
||||
if (format_caps & kMTLFmtCapsAtomic) {
|
||||
desc.usage |= MTLTextureUsageShaderAtomic;
|
||||
} else {
|
||||
ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic operations on this texture format are not supported.");
|
||||
}
|
||||
} else {
|
||||
ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic texture operations not supported on this OS version.");
|
||||
}
|
||||
}
|
||||
|
||||
if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) {
|
||||
ERR_FAIL_V_MSG(RDD::TextureID(), "unsupported: TEXTURE_USAGE_VRS_ATTACHMENT_BIT");
|
||||
}
|
||||
@ -363,7 +370,29 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
|
||||
|
||||
// Check if it is a linear format for atomic operations and therefore needs a buffer,
|
||||
// as generally Metal does not support atomic operations on textures.
|
||||
bool needs_buffer = is_linear || (p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D && flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && (p_format.format == DATA_FORMAT_R32_UINT || p_format.format == DATA_FORMAT_R32_SINT || p_format.format == DATA_FORMAT_R32G32_UINT || p_format.format == DATA_FORMAT_R32G32_SINT));
|
||||
bool needs_buffer = is_linear;
|
||||
|
||||
// Check for atomic requirements.
|
||||
if (flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D) {
|
||||
switch (p_format.format) {
|
||||
case RenderingDeviceCommons::DATA_FORMAT_R32_SINT:
|
||||
case RenderingDeviceCommons::DATA_FORMAT_R32_UINT: {
|
||||
if (!device_properties->features.supports_image_atomic_32_bit) {
|
||||
// We can emulate 32-bit atomic operations on textures.
|
||||
needs_buffer = true;
|
||||
}
|
||||
} break;
|
||||
case RenderingDeviceCommons::DATA_FORMAT_R32G32_SINT:
|
||||
case RenderingDeviceCommons::DATA_FORMAT_R32G32_UINT: {
|
||||
if (!device_properties->features.supports_image_atomic_64_bit) {
|
||||
// No emulation for 64-bit atomics.
|
||||
ERR_FAIL_V_MSG(TextureID(), "64-bit atomic operations are not supported.");
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
id<MTLTexture> obj = nil;
|
||||
if (needs_buffer) {
|
||||
@ -900,9 +929,15 @@ Error RenderingDeviceDriverMetal::command_queue_execute_and_present(CommandQueue
|
||||
MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id);
|
||||
Fence *fence = (Fence *)(p_cmd_fence.id);
|
||||
if (fence != nullptr) {
|
||||
[cmd_buffer->get_command_buffer() addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
|
||||
id<MTLCommandBuffer> cb = cmd_buffer->get_command_buffer();
|
||||
if (cb == nil) {
|
||||
// If there is nothing to do, signal the fence immediately.
|
||||
dispatch_semaphore_signal(fence->semaphore);
|
||||
}];
|
||||
} else {
|
||||
[cb addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
|
||||
dispatch_semaphore_signal(fence->semaphore);
|
||||
}];
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
|
||||
@ -1730,8 +1765,7 @@ void RenderingDeviceDriverMetal::pipeline_free(PipelineID p_pipeline_id) {
|
||||
|
||||
void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView<uint32_t> p_data) {
|
||||
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
|
||||
MDShader *shader = (MDShader *)(p_shader.id);
|
||||
shader->encode_push_constant_data(p_data, cb);
|
||||
cb->encode_push_constant_data(p_shader, p_data);
|
||||
}
|
||||
|
||||
// ----- CACHE -----
|
||||
@ -2417,6 +2451,7 @@ RDD::PipelineID RenderingDeviceDriverMetal::compute_pipeline_create(ShaderID p_s
|
||||
|
||||
MTLComputePipelineDescriptor *desc = [MTLComputePipelineDescriptor new];
|
||||
desc.computeFunction = function;
|
||||
desc.label = conv::to_nsstring(shader->name);
|
||||
if (archive) {
|
||||
desc.binaryArchives = @[ archive ];
|
||||
}
|
||||
@ -2735,6 +2770,8 @@ bool RenderingDeviceDriverMetal::has_feature(Features p_feature) {
|
||||
return device_properties->features.metal_fx_spatial;
|
||||
case SUPPORTS_METALFX_TEMPORAL:
|
||||
return device_properties->features.metal_fx_temporal;
|
||||
case SUPPORTS_IMAGE_ATOMIC_32_BIT:
|
||||
return device_properties->features.supports_image_atomic_32_bit;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -199,6 +199,8 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
|
||||
set_indexes[set_indexes_size - 1] = offset;
|
||||
}
|
||||
CompilerMSL::Options msl_options{};
|
||||
// MAJOR * 10000 + MINOR * 100
|
||||
uint32_t msl_version = CompilerMSL::Options::make_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
|
||||
msl_options.set_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
|
||||
mtl_reflection_data.msl_version = msl_options.msl_version;
|
||||
msl_options.platform = device_profile->platform == MetalDeviceProfile::Platform::macOS ? CompilerMSL::Options::macOS : CompilerMSL::Options::iOS;
|
||||
@ -209,7 +211,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
|
||||
}
|
||||
|
||||
bool disable_argument_buffers = false;
|
||||
if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") {
|
||||
if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") {
|
||||
disable_argument_buffers = true;
|
||||
}
|
||||
|
||||
@ -236,6 +238,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
|
||||
msl_options.multiview_layered_rendering = true;
|
||||
msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX;
|
||||
}
|
||||
if (msl_version >= CompilerMSL::Options::make_msl_version(3, 2)) {
|
||||
// All 3.2+ versions support device coherence, so we can disable texture fences.
|
||||
msl_options.readwrite_texture_fences = false;
|
||||
}
|
||||
|
||||
CompilerGLSL::Options options{};
|
||||
options.vertex.flip_vert_y = true;
|
||||
@ -417,6 +423,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
|
||||
// VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
|
||||
primary.texture_type = MTLTextureTypeTextureBuffer;
|
||||
} break;
|
||||
case spv::DimTileImageDataEXT: {
|
||||
// Godot does not use this extension.
|
||||
// See: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_EXT_shader_tile_image.html
|
||||
} break;
|
||||
case spv::DimMax: {
|
||||
// Add all enumerations to silence the compiler warning
|
||||
// and generate future warnings, should a new one be added.
|
||||
|
||||
Reference in New Issue
Block a user