From b471d3043c7a2548691a9613f0ec7140a76bc949 Mon Sep 17 00:00:00 2001
From: copygirl <copygirl@mcft.net>
Date: Mon, 9 Dec 2024 03:56:45 +0100
Subject: [PATCH] Build our own custom sync packet data

Instead of relying on Godot packing up dictionaries with variants
in a compact way (which it doesn't), we're building our own packet
from scratch with a new StreamBuffer type which helps with writing
and reading data to and from a PackedByteArray.

To cut down on data, we don't send a full Transform3D per bone.
We send position, rotation and scale separatetly as 16-bit floats,
instead of the twelve 32-bit floats of a Transform3D. We also skip
anything that's approximately default, like position and scale
might often be.

This helped cut down the size of packets to about 1500 bytes,
or 800 with compression, since we're still sending strings.
---
 copyMultiplayer.gd |   4 +-
 stream_buffer.gd   | 202 +++++++++++++++++++++++++++++++++++++++++++++
 sync_controller.gd |  63 ++++++++++----
 3 files changed, 252 insertions(+), 17 deletions(-)
 create mode 100644 stream_buffer.gd

diff --git a/copyMultiplayer.gd b/copyMultiplayer.gd
index fb34c66..dcc539c 100644
--- a/copyMultiplayer.gd
+++ b/copyMultiplayer.gd
@@ -242,7 +242,7 @@ func change_model(filename: String) -> void:
 	if controller: controller.change_model(filename)
 
 @rpc("any_peer", "unreliable_ordered")
-func sync_model_animation(model_transform: Transform3D, shape_dict: Dictionary, bone_poses: Dictionary) -> void:
+func sync_model_animation(uncompressed_length: int, buffer: PackedByteArray) -> void:
 	var peer_id := multiplayer.get_remote_sender_id()
 	var controller := get_sync_controller(peer_id)
-	if controller: controller.sync_model_animation(model_transform, shape_dict, bone_poses)
+	if controller: controller.sync_model_animation(uncompressed_length, buffer)
diff --git a/stream_buffer.gd b/stream_buffer.gd
new file mode 100644
index 0000000..46acf1a
--- /dev/null
+++ b/stream_buffer.gd
@@ -0,0 +1,202 @@
+class_name StreamBuffer
+extends Resource
+
+# This maximum capacity is just to ensure we're not doing something wrong,
+# should be enough for our purpose: Encoding and decoding packets.
+const MAXIMUM_CAPACITY = 256 * 1024 # 256 KiB
+
+var buffer: PackedByteArray
+var capacity: int
+var size: int
+var cursor: int
+## Bit index (0 to 7) for writing / reading the next bit.
+var bit: int
+
+func _init(_buffer: PackedByteArray) -> void:
+	buffer   = _buffer
+	capacity = buffer.size()
+
+## Creates a new StreamBuffer with the specified capacity.
+## This is intended for writing / encoding data.
+static func with_capacity(initial_capacity: int) -> StreamBuffer:
+	var _buffer = PackedByteArray()
+	_buffer.resize(initial_capacity)
+	return StreamBuffer.new(_buffer)
+
+## Creates a new StreamBuffer from the specified buffer, pre-initializing "size".
+## This is intended for reading / decoding data.
+static func from_buffer(_buffer: PackedByteArray) -> StreamBuffer:
+	var stream := StreamBuffer.new(_buffer)
+	stream.size = stream.capacity
+	return stream
+
+
+## Returns the remaining capacity before the buffer needs to be resized to fit more data.
+func remaining_capacity() -> int:
+	return capacity - size
+
+## Returns the remaining number of bytes to read.
+func remaining_bytes() -> int:
+	return size - cursor
+
+
+## Returns a slice of this StreamBuffer.
+## By default returns a slice of the currently written bytes.
+func slice(begin: int = 0, end: int = -1) -> PackedByteArray:
+	if end < 0: end = size
+	return buffer.slice(begin, end)
+
+## Clears the buffer and resets the cursor, ready to encode new data.
+## For performance, does not clear the existing data in the underlying buffer.
+func clear() -> void:
+	size = 0
+	cursor = 0
+	bit = 0
+
+## Ensures that the capacity for this buffer is large enough for the specified number of bytes to be written.
+## For the sake of not resizing too often, this simply doubles the current capacity.
+func ensure_capacity(required_bytes: int) -> void:
+	var total_required_capacity := size + required_bytes
+	if capacity < total_required_capacity:
+		while capacity < total_required_capacity: capacity *= 2
+		assert(capacity <= MAXIMUM_CAPACITY)
+		buffer.resize(capacity)
+
+func write_int8(value: int) -> void: ensure_capacity(1); buffer.encode_s8(size, value); size += 1; bit = 0
+func write_int16(value: int) -> void: ensure_capacity(2); buffer.encode_s16(size, value); size += 2; bit = 0
+func write_int32(value: int) -> void: ensure_capacity(4); buffer.encode_s32(size, value); size += 4; bit = 0
+func write_int64(value: int) -> void: ensure_capacity(8); buffer.encode_s64(size, value); size += 8; bit = 0
+
+func write_byte(value: int) -> void: write_uint8(value)
+func write_uint8(value: int) -> void: ensure_capacity(1); buffer.encode_u8(size, value); size += 1; bit = 0
+func write_uint16(value: int) -> void: ensure_capacity(2); buffer.encode_u16(size, value); size += 2; bit = 0
+func write_uint32(value: int) -> void: ensure_capacity(4); buffer.encode_u32(size, value); size += 4; bit = 0
+func write_uint64(value: int) -> void: ensure_capacity(8); buffer.encode_u64(size, value); size += 8; bit = 0
+
+func write_float16(value: float) -> void: ensure_capacity(2); buffer.encode_half(size, value); size += 2; bit = 0
+func write_float32(value: float) -> void: ensure_capacity(4); buffer.encode_float(size, value); size += 4; bit = 0
+func write_float64(value: float) -> void: ensure_capacity(8); buffer.encode_double(size, value); size += 8; bit = 0
+
+
+func read_int8() -> int: assert(remaining_bytes() >= 1); var result := buffer.decode_s8(cursor); cursor += 1; bit = 0; return result
+func read_int16() -> int: assert(remaining_bytes() >= 2); var result := buffer.decode_s16(cursor); cursor += 2; bit = 0; return result
+func read_int32() -> int: assert(remaining_bytes() >= 4); var result := buffer.decode_s32(cursor); cursor += 4; bit = 0; return result
+func read_int64() -> int: assert(remaining_bytes() >= 8); var result := buffer.decode_s64(cursor); cursor += 8; bit = 0; return result
+
+func read_byte() -> int: return read_uint8()
+func read_uint8() -> int: assert(remaining_bytes() >= 1); var result := buffer.decode_u8(cursor); cursor += 1; bit = 0; return result
+func read_uint16() -> int: assert(remaining_bytes() >= 2); var result := buffer.decode_u16(cursor); cursor += 2; bit = 0; return result
+func read_uint32() -> int: assert(remaining_bytes() >= 4); var result := buffer.decode_u32(cursor); cursor += 4; bit = 0; return result
+func read_uint64() -> int: assert(remaining_bytes() >= 8); var result := buffer.decode_u64(cursor); cursor += 8; bit = 0; return result
+
+func read_float16() -> float: assert(remaining_bytes() >= 2); var result := buffer.decode_half(cursor); cursor += 2; bit = 0; return result
+func read_float32() -> float: assert(remaining_bytes() >= 4); var result := buffer.decode_float(cursor); cursor += 4; bit = 0; return result
+func read_float64() -> float: assert(remaining_bytes() >= 8); var result := buffer.decode_double(cursor); cursor += 8; bit = 0; return result
+
+
+func write_bit(value: bool) -> void:
+	if bit == 0:
+		ensure_capacity(1)
+		buffer[size] = 0
+		size += 1
+	buffer[size - 1] = buffer[size - 1] | (int(value) << bit)
+	bit = (bit + 1) % 8
+
+func read_bit() -> bool:
+	if bit == 0:
+		assert(remaining_bytes() >= 1)
+		cursor += 1
+	var result := bool((buffer[cursor - 1] >> bit) & 1)
+	bit = (bit + 1) % 8
+	return result
+
+
+func write_raw_buffer(value: PackedByteArray) -> void:
+	ensure_capacity(value.size())
+	for i in value.size():
+		buffer[size] = value[i]
+		size += 1
+
+func read_raw_buffer(length: int) -> PackedByteArray:
+	assert(remaining_bytes() >= length)
+	var result := PackedByteArray()
+	result.resize(length)
+	for i in length:
+		result[i] = buffer[cursor]
+		cursor += 1
+	return result
+
+
+func write_string(value: String) -> void:
+	var bytes := value.to_utf8_buffer()
+	write_uint16(bytes.size())
+	write_raw_buffer(bytes)
+
+func read_string() -> String:
+	var length := read_uint16()
+	var bytes  := read_raw_buffer(length)
+	return bytes.get_string_from_utf8()
+
+
+func write_transform32(value: Transform3D) -> void:
+	write_float32(value.basis.x.x); write_float32(value.basis.x.y); write_float32(value.basis.x.z)
+	write_float32(value.basis.y.x); write_float32(value.basis.y.y); write_float32(value.basis.y.z)
+	write_float32(value.basis.z.x); write_float32(value.basis.z.y); write_float32(value.basis.z.z)
+	write_float32(value.origin.x);  write_float32(value.origin.y);  write_float32(value.origin.z)
+
+func read_transform32() -> Transform3D:
+	var result := Transform3D.IDENTITY
+	result.basis.x = Vector3(read_float32(), read_float32(), read_float32())
+	result.basis.y = Vector3(read_float32(), read_float32(), read_float32())
+	result.basis.z = Vector3(read_float32(), read_float32(), read_float32())
+	result.origin  = Vector3(read_float32(), read_float32(), read_float32())
+	return result
+
+
+# Optimized way to write a bone transform, since bones are likely to not contain offset or scale.
+
+func write_bone_pose(value: Transform3D) -> void:
+	var pos   := value.origin
+	var rot   := value.basis.get_euler()
+	var scale := value.basis.get_scale()
+	var has_pos    := !pos.is_zero_approx()
+	var has_rot    := !rot.is_zero_approx()
+	var has_scale  := !scale.is_equal_approx(Vector3.ONE)
+	var has_scale3 := !is_equal_approx(scale.x, scale.y) or !is_equal_approx(scale.x, scale.y)
+	write_bit(has_pos)
+	write_bit(has_rot)
+	write_bit(has_scale)
+	write_bit(has_scale3)
+	if has_pos:
+		write_float16(pos.x)
+		write_float16(pos.y)
+		write_float16(pos.z)
+	if has_rot:
+		# TODO: Could optimize this by using 16-bit fixed-point values.
+		#       Since these values can only be in the range 0 to 1.
+		write_float16(rot.x)
+		write_float16(rot.y)
+		write_float16(rot.z)
+	if has_scale3:
+		write_float16(scale.x)
+		write_float16(scale.y)
+		write_float16(scale.z)
+	elif has_scale:
+		write_float16((scale.x + scale.y + scale.z) / 3)
+
+func read_bone_pose() -> Transform3D:
+	var pos   := Vector3.ZERO
+	var rot   := Vector3.ZERO
+	var scale := Vector3.ONE
+	var has_pos    := read_bit()
+	var has_rot    := read_bit()
+	var has_scale  := read_bit()
+	var has_scale3 := read_bit()
+	if has_pos: pos = Vector3(read_float16(), read_float16(), read_float16())
+	if has_rot: rot = Vector3(read_float16(), read_float16(), read_float16())
+	if has_scale3: scale = Vector3(read_float16(), read_float16(), read_float16())
+	elif has_scale:
+		var s := read_float16()
+		scale = Vector3(s, s, s)
+	var basis := Basis.from_scale(scale) * Basis.from_euler(rot)
+	return Transform3D(basis, pos)
diff --git a/sync_controller.gd b/sync_controller.gd
index 18a83d0..90de462 100644
--- a/sync_controller.gd
+++ b/sync_controller.gd
@@ -10,8 +10,11 @@ var model_name: String
 var model: Node
 var skeleton: Skeleton3D
 
+# Reusable buffer to write data for synchronizing models.
+static var write_stream: StreamBuffer = StreamBuffer.with_capacity(2048)
+
 # Allows us to use the "apply_animations" function to apply blendshapes to a model.
-static var _functions_blendshapes: Script = load("res://Mods/MediaPipe/MediaPipeController_BlendShapes.gd")
+static var BlendShapes: Script = load("res://Mods/MediaPipe/MediaPipeController_BlendShapes.gd")
 
 func _ready() -> void:
 	module = get_parent().get_parent()
@@ -45,18 +48,28 @@ func change_model(filename: String) -> void:
 	model      = model_controller.get_node_or_null("Model")
 	skeleton   = model_controller._get_model_skeleton()
 
-func sync_model_animation(
-	model_transform: Transform3D,
-	shape_dict: Dictionary, # Dictionary[String, float]
-	bone_poses: Dictionary, # Dictionary[String, Transform3D]
-) -> void:
+func sync_model_animation(uncompressed_length: int, buffer: PackedByteArray) -> void:
 	if (not model) or (not skeleton): return
-	model.transform = model_transform
-	_functions_blendshapes.apply_animations(model, shape_dict)
-	for bone_name in bone_poses:
-		var pose: Transform3D = bone_poses[bone_name]
-		var idx := skeleton.find_bone(bone_name)
-		if idx != -1: skeleton.set_bone_pose(idx, pose)
+
+	var uncompressed_buffer := buffer.decompress(uncompressed_length, FileAccess.COMPRESSION_ZSTD);
+	var stream := StreamBuffer.from_buffer(uncompressed_buffer)
+	model.transform = stream.read_transform32()
+
+	var shape_dict := {}
+	# 256 blendshapes (and bones) should be enough, right?
+	var num_shapes := stream.read_uint8()
+	for i in num_shapes:
+		var shape_name  := stream.read_string()
+		var shape_alpha := stream.read_float16()
+		shape_dict[shape_name] = shape_alpha
+	BlendShapes.apply_animations(model, shape_dict)
+
+	var num_bones := stream.read_uint8()
+	for i in num_bones:
+		var bone_name := stream.read_string()
+		var bone_pose := stream.read_bone_pose()
+		var bone_idx  := skeleton.find_bone(bone_name)
+		if bone_idx != -1: skeleton.set_bone_pose(bone_idx, bone_pose)
 
 @warning_ignore("shadowed_variable")
 static func send_model_animation(module: copyMultiplayer) -> void:
@@ -68,7 +81,17 @@ static func send_model_animation(module: copyMultiplayer) -> void:
 	var media_pipe = module.get_node("../MediaPipeController")
 	if (not model) or (not skeleton) or (not media_pipe): return
 
-	var shape_dict = media_pipe.blend_shape_last_values
+	write_stream.write_transform32(model.transform)
+
+	# TODO: Do not write full strings. Use a lookup table!
+	# TODO: Only write non-default blendshapes. Anything missing = default.
+
+	var shape_dict: Dictionary = media_pipe.blend_shape_last_values
+	write_stream.write_uint8(shape_dict.size())
+	for shape_name in shape_dict:
+		var shape_alpha: float = shape_dict[shape_name]
+		write_stream.write_string(shape_name)
+		write_stream.write_float16(shape_alpha)
 
 	var bone_poses = {}
 	for bone_name in module.tracked_bones:
@@ -77,5 +100,15 @@ static func send_model_animation(module: copyMultiplayer) -> void:
 		var bone_pose = skeleton.get_bone_pose(bone_idx)
 		bone_poses[bone_name] = bone_pose
 
-	# FIXME: This sends way more information than necessary, but works as a proof-of-concept!
-	module.sync_model_animation.rpc(model.transform, shape_dict, bone_poses)
+	write_stream.write_uint8(bone_poses.size())
+	for bone_name in bone_poses:
+		var bone_pose: Transform3D = bone_poses[bone_name]
+		write_stream.write_string(bone_name)
+		write_stream.write_bone_pose(bone_pose)
+
+	# TODO: Ideally, compression won't be needed once we remove strings.
+	var compressed_buffer := write_stream.slice().compress(FileAccess.COMPRESSION_ZSTD);
+	# DEBUG: Uncomment this to see packet size (ideally < 1024).
+	# module.set_status("Packet size: %d (%d uncompressed)" % [compressed_buffer.size(), write_stream.size])
+	module.sync_model_animation.rpc(write_stream.size, compressed_buffer)
+	write_stream.clear()