2D: Switch to VBOs for instance data

- Add support for vertex bindings and UMA vertex buffers in D3D12.
- Simplify 2D instance params and move more into per-batch data to save
  bandwidth

Co-authored-by: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com>
Co-authored-by: Clay John <claynjohn@gmail.com>
Co-authored-by: A Thousand Ships <96648715+athousandships@users.noreply.github.com>
This commit is contained in:
Stuart Carnie
2025-11-07 05:50:02 +11:00
parent bad1287b62
commit 90c0e6acca
25 changed files with 893 additions and 256 deletions

View File

@ -663,7 +663,7 @@ public:
uint32_t p_instance_count,
uint32_t p_base_vertex,
uint32_t p_first_instance);
void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets);
void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets);
void render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset);
void render_draw_indexed(uint32_t p_index_count,

View File

@ -783,10 +783,12 @@ void MDCommandBuffer::_render_set_dirty_state() {
if (render.dirty.has_flag(RenderState::DIRTY_VERTEX)) {
uint32_t p_binding_count = render.vertex_buffers.size();
uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1);
[render.encoder setVertexBuffers:render.vertex_buffers.ptr()
offsets:render.vertex_offsets.ptr()
withRange:NSMakeRange(first, p_binding_count)];
if (p_binding_count > 0) {
uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1);
[render.encoder setVertexBuffers:render.vertex_buffers.ptr()
offsets:render.vertex_offsets.ptr()
withRange:NSMakeRange(first, p_binding_count)];
}
}
render.resource_tracker.encode(render.encoder);
@ -1252,24 +1254,47 @@ void MDCommandBuffer::render_draw(uint32_t p_vertex_count,
baseInstance:p_first_instance];
}
void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets) {
void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) {
DEV_ASSERT(type == MDCommandBufferStateType::Render);
render.vertex_buffers.resize(p_binding_count);
render.vertex_offsets.resize(p_binding_count);
// Are the existing buffer bindings the same?
bool same = true;
// Reverse the buffers, as their bindings are assigned in descending order.
for (uint32_t i = 0; i < p_binding_count; i += 1) {
const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)p_buffers[p_binding_count - i - 1].id;
render.vertex_buffers[i] = buf_info->metal_buffer;
render.vertex_offsets[i] = p_offsets[p_binding_count - i - 1];
NSUInteger dynamic_offset = 0;
if (buf_info->is_dynamic()) {
const MetalBufferDynamicInfo *dyn_buf = (const MetalBufferDynamicInfo *)buf_info;
uint64_t frame_idx = p_dynamic_offsets & 0x3;
p_dynamic_offsets >>= 2;
dynamic_offset = frame_idx * dyn_buf->size_bytes;
}
if (render.vertex_buffers[i] != buf_info->metal_buffer) {
render.vertex_buffers[i] = buf_info->metal_buffer;
same = false;
}
render.vertex_offsets[i] = dynamic_offset + p_offsets[p_binding_count - i - 1];
}
if (render.encoder) {
uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1);
[render.encoder setVertexBuffers:render.vertex_buffers.ptr()
offsets:render.vertex_offsets.ptr()
withRange:NSMakeRange(first, p_binding_count)];
if (same) {
NSUInteger *offset_ptr = render.vertex_offsets.ptr();
for (uint32_t i = first; i < first + p_binding_count; i++) {
[render.encoder setVertexBufferOffset:*offset_ptr atIndex:i];
offset_ptr++;
}
} else {
[render.encoder setVertexBuffers:render.vertex_buffers.ptr()
offsets:render.vertex_offsets.ptr()
withRange:NSMakeRange(first, p_binding_count)];
}
render.dirty.clear_flag(RenderState::DIRTY_VERTEX);
} else {
render.dirty.set_flag(RenderState::DIRTY_VERTEX);
@ -1394,7 +1419,9 @@ void MDCommandBuffer::RenderState::reset() {
viewports.clear();
scissors.clear();
blend_constants.reset();
bzero(vertex_buffers.ptr(), sizeof(id<MTLBuffer> __unsafe_unretained) * vertex_buffers.size());
vertex_buffers.clear();
bzero(vertex_offsets.ptr(), sizeof(NSUInteger) * vertex_offsets.size());
vertex_offsets.clear();
resource_tracker.reset();
}

View File

@ -130,6 +130,7 @@ public:
virtual uint8_t *buffer_map(BufferID p_buffer) override final;
virtual void buffer_unmap(BufferID p_buffer) override final;
virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) override final;
virtual uint64_t buffer_get_dynamic_offsets(Span<BufferID> p_buffers) override final;
virtual void buffer_flush(BufferID p_buffer) override final;
virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final;
@ -164,7 +165,7 @@ public:
private:
public:
virtual VertexFormatID vertex_format_create(VectorView<VertexAttribute> p_vertex_attribs) override final;
virtual VertexFormatID vertex_format_create(Span<VertexAttribute> p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) override final;
virtual void vertex_format_free(VertexFormatID p_vertex_format) override final;
#pragma mark - Barriers
@ -403,7 +404,7 @@ public:
virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final;
// Buffer binding.
virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final;
virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) override final;
virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final;
// Dynamic state.

View File

@ -186,6 +186,23 @@ uint8_t *RenderingDeviceDriverMetal::buffer_persistent_map_advance(BufferID p_bu
return (uint8_t *)buf_info->metal_buffer.contents + buf_info->next_frame_index(_frame_count) * buf_info->size_bytes;
}
uint64_t RenderingDeviceDriverMetal::buffer_get_dynamic_offsets(Span<BufferID> p_buffers) {
uint64_t mask = 0u;
uint64_t shift = 0u;
for (const BufferID &buf : p_buffers) {
const BufferInfo *buf_info = (const BufferInfo *)buf.id;
if (!buf_info->is_dynamic()) {
continue;
}
mask |= buf_info->frame_index() << shift;
// We can encode the frame index in 2 bits since frame_count won't be > 4.
shift += 2UL;
}
return mask;
}
void RenderingDeviceDriverMetal::buffer_flush(BufferID p_buffer) {
// Nothing to do.
}
@ -809,27 +826,33 @@ bool RenderingDeviceDriverMetal::sampler_is_format_supported_for_filter(DataForm
#pragma mark - Vertex Array
RDD::VertexFormatID RenderingDeviceDriverMetal::vertex_format_create(VectorView<VertexAttribute> p_vertex_attribs) {
RDD::VertexFormatID RenderingDeviceDriverMetal::vertex_format_create(Span<VertexAttribute> p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) {
MTLVertexDescriptor *desc = MTLVertexDescriptor.vertexDescriptor;
for (uint32_t i = 0; i < p_vertex_attribs.size(); i++) {
VertexAttribute const &vf = p_vertex_attribs[i];
ERR_FAIL_COND_V_MSG(get_format_vertex_size(vf.format) == 0, VertexFormatID(),
"Data format for attachment (" + itos(i) + "), '" + FORMAT_NAMES[vf.format] + "', is not valid for a vertex array.");
for (const VertexAttributeBindingsMap::KV &kv : p_vertex_bindings) {
uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(kv.key);
MTLVertexBufferLayoutDescriptor *ld = desc.layouts[idx];
if (kv.value.stride != 0) {
ld.stepFunction = kv.value.frequency == VERTEX_FREQUENCY_VERTEX ? MTLVertexStepFunctionPerVertex : MTLVertexStepFunctionPerInstance;
ld.stepRate = 1;
ld.stride = kv.value.stride;
} else {
ld.stepFunction = MTLVertexStepFunctionConstant;
ld.stepRate = 0;
ld.stride = 0;
}
DEV_ASSERT(ld.stride == desc.layouts[idx].stride);
}
for (const VertexAttribute &vf : p_vertex_attribs) {
desc.attributes[vf.location].format = pixel_formats->getMTLVertexFormat(vf.format);
desc.attributes[vf.location].offset = vf.offset;
uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(i);
uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(vf.binding);
desc.attributes[vf.location].bufferIndex = idx;
if (vf.stride == 0) {
desc.layouts[idx].stepFunction = MTLVertexStepFunctionConstant;
desc.layouts[idx].stepRate = 0;
desc.layouts[idx].stride = pixel_formats->getBytesPerBlock(vf.format);
} else {
desc.layouts[idx].stepFunction = vf.frequency == VERTEX_FREQUENCY_VERTEX ? MTLVertexStepFunctionPerVertex : MTLVertexStepFunctionPerInstance;
desc.layouts[idx].stepRate = 1;
desc.layouts[idx].stride = vf.stride;
// Constant attribute, so we must determine the stride to satisfy Metal API.
uint32_t stride = desc.layouts[idx].stride;
desc.layouts[idx].stride = std::max(stride, vf.offset + pixel_formats->getBytesPerBlock(vf.format));
}
}
@ -1768,9 +1791,9 @@ void RenderingDeviceDriverMetal::command_render_draw_indirect_count(CommandBuffe
cb->render_draw_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride);
}
void RenderingDeviceDriverMetal::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) {
void RenderingDeviceDriverMetal::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) {
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
cb->render_bind_vertex_buffers(p_binding_count, p_buffers, p_offsets);
cb->render_bind_vertex_buffers(p_binding_count, p_buffers, p_offsets, p_dynamic_offsets);
}
void RenderingDeviceDriverMetal::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) {