|
|
|
class_name copyMediaPipe
|
|
|
|
extends Mod_Base
|
|
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# Potentially configurable variables.
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
enum BlendshapeMode { NONE, MEDIA_PIPE, VRM_STANDARD }
|
|
|
|
var blendshape_mode := BlendshapeMode.VRM_STANDARD
|
|
|
|
|
|
|
|
var arm_rest_angle := 65
|
|
|
|
var interpolation_factor := 0.000000001 # Yes this value needs to be THAT small.
|
|
|
|
var rest_interpolation_factor := 0.2 # "Lerp about 80% of the way in one second."
|
|
|
|
var min_confidence_threshold := 0.85
|
|
|
|
var time_to_rest := 0.1 # Time without tracking data before returning to the rest pose.
|
|
|
|
|
|
|
|
# TODO: Change this via calibration!
|
|
|
|
var camera_transform := Transform3D(Basis(), Vector3(0.0, 0.0, 0.3))
|
|
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
# FIXME: Best to get this from the tracker process (if possible).
|
|
|
|
var camera_aspect_ratio := 4.0 / 3.0 # Logitech C920 default?
|
|
|
|
|
|
|
|
# TODO: Ensure that this works with the model offset from the world origin.
|
|
|
|
var ik_chains: Array[copyMediaPipe_IKChain] = []
|
|
|
|
|
|
|
|
@onready var tracking_root: Node3D = $TrackingRoot
|
|
|
|
@onready var head := {
|
|
|
|
last_data = null, # Most recent tracking data received.
|
|
|
|
last_received = INF, # How long ago it was received (in seconds).
|
|
|
|
tracker = $TrackingRoot/Head, # Node for visualizing tracking data.
|
|
|
|
rest_pose = Transform3D.IDENTITY, # Rest position of the head.
|
|
|
|
}
|
|
|
|
@onready var hands := {
|
|
|
|
left = {
|
|
|
|
last_data = null,
|
|
|
|
last_received = INF,
|
|
|
|
tracker = $TrackingRoot/LeftHand,
|
|
|
|
rest_pose = Transform3D.IDENTITY,
|
|
|
|
landmarks = [],
|
|
|
|
},
|
|
|
|
right = {
|
|
|
|
last_data = null,
|
|
|
|
last_received = INF,
|
|
|
|
tracker = $TrackingRoot/RightHand,
|
|
|
|
rest_pose = Transform3D.IDENTITY,
|
|
|
|
landmarks = [],
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
func _ready() -> void:
|
|
|
|
setup_hand_landmarks()
|
|
|
|
|
|
|
|
var dir = get_script().get_path().get_base_dir()
|
|
|
|
var path = dir.path_join("_tracker/Project/new_new_tracker.py")
|
|
|
|
python_process = KiriPythonWrapperInstance.new(path)
|
|
|
|
if not python_process.setup_python(false):
|
|
|
|
OS.alert("Failed to setup tracker dependencies!")
|
|
|
|
|
|
|
|
start_process()
|
|
|
|
# FIXME: Don't hardcode the video device.
|
|
|
|
set_video_device(get_video_devices()[0])
|
|
|
|
start_tracker()
|
|
|
|
|
|
|
|
func _exit_tree() -> void:
|
|
|
|
stop_tracker()
|
|
|
|
stop_process()
|
|
|
|
|
|
|
|
# Called after mod is initialized or model is changed.
|
|
|
|
func scene_init() -> void:
|
|
|
|
initialize_rest_pose()
|
|
|
|
initialize_ik_chains()
|
|
|
|
|
|
|
|
# Called before mod is removed, model is changed or application is shut down.
|
|
|
|
func scene_shutdown() -> void:
|
|
|
|
ik_chains = []
|
|
|
|
|
|
|
|
func _process(delta: float) -> void:
|
|
|
|
increase_last_received(delta)
|
|
|
|
if is_tracker_running():
|
|
|
|
receive_tracker_packets()
|
|
|
|
update_visual_trackers(delta)
|
|
|
|
update_ik_chains()
|
|
|
|
update_blendshapes()
|
|
|
|
|
|
|
|
## Sets up 21 nodes for the landmarks that make up hand/finger tracking.
|
|
|
|
func setup_hand_landmarks() -> void:
|
|
|
|
const landmark_scene := preload("Resources/debug_landmark.tscn")
|
|
|
|
for side in hands:
|
|
|
|
var hand = hands[side]
|
|
|
|
for i in 21:
|
|
|
|
var landmark := landmark_scene.instantiate()
|
|
|
|
hand.tracker.add_child(landmark)
|
|
|
|
hand.landmarks.append(landmark)
|
|
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# Initialization functions that are called when a new model is loaded.
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
## Initialized the stored rest positions for the head and hands.
|
|
|
|
## Also applies a rotation to the arms so they're not T-posing.
|
|
|
|
func initialize_rest_pose() -> void:
|
|
|
|
var skel := get_skeleton()
|
|
|
|
if not skel: return
|
|
|
|
|
|
|
|
var head_idx := skel.find_bone("Head")
|
|
|
|
var head_rest := skel.get_bone_global_rest(head_idx)
|
|
|
|
|
|
|
|
# Move the tracking root such that it is at the height of the head.
|
|
|
|
tracking_root.transform = camera_transform * head_rest
|
|
|
|
|
|
|
|
head.rest_pose = tracking_root.transform.inverse() * head_rest
|
|
|
|
|
|
|
|
for side in hands:
|
|
|
|
var shoulder_idx := skel.find_bone(side.capitalize() + "Shoulder")
|
|
|
|
var hand_idx := skel.find_bone(side.capitalize() + "Hand")
|
|
|
|
var shoulder_rest := skel.get_bone_global_rest(shoulder_idx)
|
|
|
|
var hand_rest := skel.get_bone_global_rest(hand_idx)
|
|
|
|
|
|
|
|
# First, get relative transform of hand to shoulder.
|
|
|
|
var hand_to_shoulder := shoulder_rest.inverse() * hand_rest
|
|
|
|
# Next, rotate this relative transform by arm_rest_angle.
|
|
|
|
hand_to_shoulder = hand_to_shoulder.rotated(Vector3.LEFT, deg_to_rad(arm_rest_angle))
|
|
|
|
# Finally, put the relative transform back into skeleton-relative coordinates.
|
|
|
|
var new_hand_transform := shoulder_rest * hand_to_shoulder
|
|
|
|
|
|
|
|
hands[side].rest_pose = tracking_root.transform.inverse() * new_hand_transform
|
|
|
|
|
|
|
|
## Sets up the inverse kinematics chains to move the model depending on the location of the visual trackers.
|
|
|
|
func initialize_ik_chains() -> void:
|
|
|
|
ik_chains = []
|
|
|
|
|
|
|
|
var chain_spine := copyMediaPipe_IKChain.new()
|
|
|
|
chain_spine.skeleton = get_skeleton()
|
|
|
|
chain_spine.base_bone = "Hips"
|
|
|
|
chain_spine.tip_bone = "Head"
|
|
|
|
chain_spine.rotation_low = 0.0 * TAU
|
|
|
|
chain_spine.rotation_high = 1.0 * TAU
|
|
|
|
chain_spine.do_yaw = true
|
|
|
|
chain_spine.main_axis_of_rotation = Vector3.RIGHT
|
|
|
|
chain_spine.secondary_axis_of_rotation = Vector3.UP
|
|
|
|
chain_spine.pole_direction_target = Vector3.ZERO # No pole target.
|
|
|
|
chain_spine.tracker_object = head.tracker
|
|
|
|
chain_spine.yaw_scale = 0.25 # chest_yaw_scale (Unsure what this does.)
|
|
|
|
ik_chains.append(chain_spine)
|
|
|
|
|
|
|
|
var x_pole_dist = 10.0
|
|
|
|
var y_pole_dist = 5.0
|
|
|
|
var z_pole_dist = 10.0
|
|
|
|
var arm_rotation_axis = Vector3.UP
|
|
|
|
|
|
|
|
for side in hands:
|
|
|
|
var hand = hands[side]
|
|
|
|
|
|
|
|
var chain_hand := copyMediaPipe_IKChain.new()
|
|
|
|
chain_hand.skeleton = get_skeleton()
|
|
|
|
chain_hand.base_bone = side.capitalize() + "UpperArm"
|
|
|
|
chain_hand.tip_bone = side.capitalize() + "Hand"
|
|
|
|
chain_hand.rotation_low = 0.025 * TAU
|
|
|
|
chain_hand.rotation_high = 0.990 * TAU
|
|
|
|
chain_hand.do_yaw = false
|
|
|
|
chain_hand.do_bone_roll = true
|
|
|
|
chain_hand.secondary_axis_of_rotation = Vector3.UP
|
|
|
|
|
|
|
|
if side == "left":
|
|
|
|
chain_hand.main_axis_of_rotation = -arm_rotation_axis
|
|
|
|
chain_hand.pole_direction_target = Vector3(x_pole_dist, -y_pole_dist, -z_pole_dist)
|
|
|
|
chain_hand.tracker_object = hand.tracker
|
|
|
|
else:
|
|
|
|
chain_hand.main_axis_of_rotation = arm_rotation_axis
|
|
|
|
chain_hand.pole_direction_target = Vector3(-x_pole_dist, -y_pole_dist, -z_pole_dist)
|
|
|
|
chain_hand.tracker_object = hand.tracker
|
|
|
|
|
|
|
|
ik_chains.append(chain_hand)
|
|
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# Functions to start/stop the PYTHON TRACKER PROCESS and communicate with it.
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
var python_process: KiriPythonWrapperInstance
|
|
|
|
|
|
|
|
func start_process() -> void:
|
|
|
|
python_process.start_process(false)
|
|
|
|
|
|
|
|
func stop_process() -> void:
|
|
|
|
python_process.stop_process()
|
|
|
|
|
|
|
|
func is_process_running() -> bool:
|
|
|
|
return python_process.get_status() == KiriPythonWrapperInstance.KiriPythonWrapperStatus.STATUS_RUNNING
|
|
|
|
|
|
|
|
# [{ name: String, backend: String, path: String, index: int }]
|
|
|
|
func get_video_devices() -> Array:
|
|
|
|
assert(is_process_running())
|
|
|
|
var devices = python_process.call_rpc_sync("enumerate_camera_devices", [])
|
|
|
|
return devices if devices is Array else []
|
|
|
|
|
|
|
|
func set_video_device(device) -> void:
|
|
|
|
assert(is_process_running())
|
|
|
|
var index: int = device.index if device else -1
|
|
|
|
python_process.call_rpc_sync("set_video_device_number", [ index ])
|
|
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# Functions to start/stop the TRACKER and receive packets coming from it.
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
var base_port := 7098
|
|
|
|
var udp_server: PacketPeerUDP
|
|
|
|
var udp_server_port: int
|
|
|
|
|
|
|
|
func start_tracker() -> void:
|
|
|
|
assert(!is_tracker_running())
|
|
|
|
|
|
|
|
udp_server = PacketPeerUDP.new()
|
|
|
|
# Find a port number that's open to use.
|
|
|
|
udp_server_port = base_port
|
|
|
|
while udp_server.bind(udp_server_port, "127.0.0.1") != OK:
|
|
|
|
udp_server_port += 1
|
|
|
|
|
|
|
|
python_process.call_rpc_sync("set_udp_port_number", [ udp_server_port ])
|
|
|
|
python_process.call_rpc_sync("start_tracker", [])
|
|
|
|
|
|
|
|
func stop_tracker() -> void:
|
|
|
|
if !is_tracker_running(): return # Do nothing if tracker isn't running.
|
|
|
|
python_process.call_rpc_sync("stop_tracker", [])
|
|
|
|
udp_server.close()
|
|
|
|
udp_server = null
|
|
|
|
|
|
|
|
func is_tracker_running() -> bool:
|
|
|
|
return udp_server != null
|
|
|
|
|
|
|
|
func receive_tracker_packets() -> void:
|
|
|
|
assert(is_tracker_running())
|
|
|
|
while true:
|
|
|
|
var bytes := udp_server.get_packet()
|
|
|
|
if bytes.size() == 0: break
|
|
|
|
var data = JSON.parse_string(bytes.get_string_from_utf8())
|
|
|
|
if data is Dictionary: process_tracker_data(data)
|
|
|
|
# FIXME: Find out why we appear to always be processing 2 packets a frame.
|
|
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# Functions to PROCESS and CONVERT the incoming TRACKER DATA.
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
func increase_last_received(delta: float) -> void:
|
|
|
|
head.last_received += delta
|
|
|
|
hands.left.last_received += delta
|
|
|
|
hands.right.last_received += delta
|
|
|
|
|
|
|
|
func process_tracker_data(data: Dictionary) -> void:
|
|
|
|
if "error" in data: on_tracker_error(data.error); return
|
|
|
|
if "status" in data: on_tracker_status(data.status); return
|
|
|
|
|
|
|
|
# Convert the arrays inside data to known data types like Vector3 and Transform3D.
|
|
|
|
data["face"]["transform"] = to_transform(data["face"]["transform"])
|
|
|
|
for side in data["hands"]:
|
|
|
|
var hand = data["hands"][side]
|
|
|
|
# Convert untyped array of arrays to typed Array[Vector3].
|
|
|
|
var image_landmarks = hand["image_landmarks"].map(to_vector)
|
|
|
|
var world_landmarks = hand["world_landmarks"].map(to_vector)
|
|
|
|
hand["image_landmarks"] = Array(image_landmarks, TYPE_VECTOR3, "", null)
|
|
|
|
hand["world_landmarks"] = Array(world_landmarks, TYPE_VECTOR3, "", null)
|
|
|
|
|
|
|
|
# Face matrix is in centimeters, convert to meters.
|
|
|
|
data["face"]["transform"].origin /= 100
|
|
|
|
|
|
|
|
# NOTE: Face confidence currently either 0.0 or 1.0.
|
|
|
|
if data["face"]["confidence"] > min_confidence_threshold:
|
|
|
|
head.last_data = data["face"]
|
|
|
|
head.last_received = 0.0
|
|
|
|
|
|
|
|
for side in hands:
|
|
|
|
var hand = hands[side]
|
|
|
|
var hand_data = data["hands"][side]
|
|
|
|
if hand_data["confidence"] > min_confidence_threshold:
|
|
|
|
var image_landmarks: Array[Vector3] = hand_data["image_landmarks"]
|
|
|
|
var world_landmarks: Array[Vector3] = hand_data["world_landmarks"]
|
|
|
|
|
|
|
|
# Mirror position on the X axis, since image landmarks are in view space.
|
|
|
|
for i in image_landmarks.size(): image_landmarks[i].x = (1 - image_landmarks[i].x)
|
|
|
|
# Unsure why, but world landmarks might be in a different coordinate system than expected?
|
|
|
|
var rotation_fix := Basis(Vector3.RIGHT, TAU / 2)
|
|
|
|
for i in world_landmarks.size(): world_landmarks[i] = rotation_fix * world_landmarks[i]
|
|
|
|
|
|
|
|
hand.last_data = hand_data
|
|
|
|
hand.last_received = 0.0
|
|
|
|
|
|
|
|
func on_tracker_status(status: String) -> void:
|
|
|
|
set_status(status)
|
|
|
|
|
|
|
|
func on_tracker_error(error: String) -> void:
|
|
|
|
print_log("Error: " + error)
|
|
|
|
|
|
|
|
func to_vector(array) -> Vector3:
|
|
|
|
return Vector3(array[0], array[1], array[2])
|
|
|
|
|
|
|
|
func to_transform(matrix) -> Transform3D:
|
|
|
|
return Transform3D(
|
|
|
|
Basis(Vector3(matrix[0][0], matrix[1][0], matrix[2][0]),
|
|
|
|
Vector3(matrix[0][1], matrix[1][1], matrix[2][1]),
|
|
|
|
Vector3(matrix[0][2], matrix[1][2], matrix[2][2])),
|
|
|
|
Vector3(matrix[0][3], matrix[1][3], matrix[2][3]))
|
|
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# Functions for updating VISUAL TRACKERS and THE MODEL itself.
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
func update_visual_trackers(delta: float) -> void:
|
|
|
|
if head.last_received >= time_to_rest:
|
|
|
|
# Reset to rest pose transform.
|
|
|
|
head.tracker.transform = fi_slerp(head.tracker.transform,
|
|
|
|
head.rest_pose, rest_interpolation_factor, delta)
|
|
|
|
else:
|
|
|
|
head.tracker.transform = fi_slerp(head.tracker.transform,
|
|
|
|
head.last_data["transform"], interpolation_factor, delta)
|
|
|
|
|
|
|
|
# TODO: Don't automatically trust the handedness of the input data.
|
|
|
|
for side in hands:
|
|
|
|
var hand = hands[side]
|
|
|
|
if hand.last_received >= time_to_rest:
|
|
|
|
# Reset to rest pose transform.
|
|
|
|
hand.tracker.transform = fi_slerp(hand.tracker.transform,
|
|
|
|
hand.rest_pose, rest_interpolation_factor, delta)
|
|
|
|
else:
|
|
|
|
var image_landmarks: Array[Vector3] = hand.last_data["image_landmarks"]
|
|
|
|
var world_landmarks: Array[Vector3] = hand.last_data["world_landmarks"]
|
|
|
|
|
|
|
|
var hand_rotation := get_hand_rotation(side, world_landmarks)
|
|
|
|
var hand_origin := get_hand_viewspace_origin(image_landmarks, world_landmarks, 2.0) \
|
|
|
|
* Vector3(7.0, 7.0, 3.5) # FIXME: Fudge factor to match better with world space.
|
|
|
|
|
|
|
|
var target_transform := Transform3D(hand_rotation, hand_origin)
|
|
|
|
hand.tracker.transform = fi_slerp(hand.tracker.transform,
|
|
|
|
target_transform, interpolation_factor, delta)
|
|
|
|
|
|
|
|
# Translate landmarks so the origin is at the wrist.
|
|
|
|
var wrist_position := world_landmarks[0]
|
|
|
|
# World landmarks are in world space, so we have to "subtract" the hand rotation.
|
|
|
|
for i in world_landmarks.size():
|
|
|
|
var pos := world_landmarks[i] - wrist_position
|
|
|
|
hand.landmarks[i].position = hand_rotation.inverse() * pos
|
|
|
|
|
|
|
|
func update_ik_chains() -> void:
|
|
|
|
for chain in ik_chains:
|
|
|
|
chain.do_ik_chain()
|
|
|
|
|
|
|
|
func update_blendshapes() -> void:
|
|
|
|
const Blendshapes := preload("res://Mods/MediaPipe/MediaPipeController_BlendShapes.gd")
|
|
|
|
|
|
|
|
var model := get_model()
|
|
|
|
if (not model) or (not head.last_data): return
|
|
|
|
var data: Dictionary = head.last_data.blendshapes
|
|
|
|
|
|
|
|
var shape_dict: Dictionary
|
|
|
|
match blendshape_mode:
|
|
|
|
BlendshapeMode.MEDIA_PIPE: shape_dict = data
|
|
|
|
BlendshapeMode.VRM_STANDARD: shape_dict = \
|
|
|
|
Blendshapes.convert_mediapipe_shapes_to_vrm_standard(data)
|
|
|
|
|
|
|
|
# TODO: Blendshapes.apply_smoothing(...)
|
|
|
|
Blendshapes.fixup_eyes(shape_dict)
|
|
|
|
Blendshapes.apply_animations(model, shape_dict)
|
|
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
# Utility functions, currently only relating to update_visual_trackers.
|
|
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
# Indices of hand landmarks.
|
|
|
|
const WRIST := 0
|
|
|
|
const THUMB_CMC := 1
|
|
|
|
const THUMB_MCP := 2
|
|
|
|
const THUMB_IP := 3
|
|
|
|
const THUMB_TIP := 4
|
|
|
|
const INDEX_FINGER_MCP := 5
|
|
|
|
const INDEX_FINGER_PIP := 6
|
|
|
|
const INDEX_FINGER_DIP := 7
|
|
|
|
const INDEX_FINGER_TIP := 8
|
|
|
|
const MIDDLE_FINGER_MCP := 9
|
|
|
|
const MIDDLE_FINGER_PIP := 10
|
|
|
|
const MIDDLE_FINGER_DIP := 11
|
|
|
|
const MIDDLE_FINGER_TIP := 12
|
|
|
|
const RING_FINGER_MCP := 13
|
|
|
|
const RING_FINGER_PIP := 14
|
|
|
|
const RING_FINGER_DIP := 15
|
|
|
|
const RING_FINGER_TIP := 16
|
|
|
|
const PINKY_MCP := 17
|
|
|
|
const PINKY_PIP := 18
|
|
|
|
const PINKY_DIP := 19
|
|
|
|
const PINKY_TIP := 20
|
|
|
|
|
|
|
|
## Calculate the hand rotation from the hand tracking's world landmarks.
|
|
|
|
func get_hand_rotation(side: String, landmarks: Array[Vector3]) -> Basis:
|
|
|
|
var knuckles_center := (landmarks[INDEX_FINGER_MCP] + landmarks[RING_FINGER_TIP]) / 2
|
|
|
|
var wrist_to_knuckles := landmarks[WRIST].direction_to(knuckles_center)
|
|
|
|
var towards_thumb := landmarks[RING_FINGER_TIP].direction_to(landmarks[INDEX_FINGER_MCP])
|
|
|
|
|
|
|
|
var palm_forward: Vector3
|
|
|
|
if side == "left": palm_forward = towards_thumb.cross(wrist_to_knuckles)
|
|
|
|
if side == "right": palm_forward = wrist_to_knuckles.cross(towards_thumb)
|
|
|
|
|
|
|
|
return Basis.looking_at(palm_forward, wrist_to_knuckles)
|
|
|
|
|
|
|
|
## Attempt to figure out the hand origin in viewspace.
|
|
|
|
## `hand_to_head_scale` is a fudge value so that we can attempt
|
|
|
|
## to force the hand and head into the same scale range, roughly.
|
|
|
|
func get_hand_viewspace_origin(
|
|
|
|
image_landmarks: Array[Vector3],
|
|
|
|
_world_landmarks: Array[Vector3], # unused
|
|
|
|
hand_to_head_scale: float,
|
|
|
|
) -> Vector3:
|
|
|
|
# Values found through experimentation.
|
|
|
|
var known_distances := [
|
|
|
|
[ WRIST , THUMB_CMC , 0.053861 ],
|
|
|
|
[ THUMB_CMC , THUMB_MCP , 0.057096 ],
|
|
|
|
[ THUMB_MCP , THUMB_IP , 0.048795 ],
|
|
|
|
[ THUMB_IP , THUMB_TIP , 0.039851 ],
|
|
|
|
[ WRIST , INDEX_FINGER_MCP , 0.152538 ],
|
|
|
|
[ WRIST , RING_FINGER_TIP , 0.138711 ],
|
|
|
|
[ INDEX_FINGER_MCP , MIDDLE_FINGER_MCP , 0.029368 ],
|
|
|
|
[ MIDDLE_FINGER_MCP , MIDDLE_FINGER_TIP , 0.027699 ],
|
|
|
|
[ MIDDLE_FINGER_TIP , RING_FINGER_TIP , 0.032673 ],
|
|
|
|
]
|
|
|
|
# FIXME: Hardcoded fudge-factor
|
|
|
|
for d in known_distances: d[2] *= 0.25
|
|
|
|
|
|
|
|
# Iterate through known distances and add up the weighted average.
|
|
|
|
var fake_z_avg := 0.0
|
|
|
|
var total_avg_weight := 0.0
|
|
|
|
for d in known_distances:
|
|
|
|
var pt0 := image_landmarks[d[0]]
|
|
|
|
var pt1 := image_landmarks[d[1]]
|
|
|
|
|
|
|
|
# Figure out a weighted average based on how much the vector
|
|
|
|
# is facing the camera Z axis. Stuff facing into the camera
|
|
|
|
# has less accurate results, so weight it lower.
|
|
|
|
var normvec := (pt0 - pt1).normalized()
|
|
|
|
var weight := clampf(1.0 - 2.0 * abs(normvec[2]), 0.0, 1.0)
|
|
|
|
|
|
|
|
# Add to the average.
|
|
|
|
fake_z_avg += guess_depth_from_known_distance(
|
|
|
|
pt0, pt1, d[2] / hand_to_head_scale) * weight
|
|
|
|
total_avg_weight += weight
|
|
|
|
|
|
|
|
if abs(total_avg_weight) < 0.000001:
|
|
|
|
print("HEY THE THING HAPPENED", total_avg_weight)
|
|
|
|
# FIXME: Fudge value because I'm tired of this thing throwing
|
|
|
|
# exceptions all the time. Do an actual fix later.
|
|
|
|
total_avg_weight = 0.01
|
|
|
|
|
|
|
|
# Finish the average.
|
|
|
|
fake_z_avg = fake_z_avg / total_avg_weight
|
|
|
|
|
|
|
|
return ndc_to_viewspace(image_landmarks[0], -fake_z_avg)
|
|
|
|
|
|
|
|
## Figure out a depth value based on the distance between known
|
|
|
|
## normalized (clip-space) coordinates of landmarks, compared to what
|
|
|
|
## we would expect the average distance between those points to be.
|
|
|
|
func guess_depth_from_known_distance(left: Vector3, right: Vector3, distance: float) -> float:
|
|
|
|
var dist_clip := left - right
|
|
|
|
dist_clip.x *= camera_aspect_ratio # FIXME: Fudge factor
|
|
|
|
return 1.0 / (dist_clip.length() / distance)
|
|
|
|
|
|
|
|
func ndc_to_viewspace(v: Vector3, z_offset: float) -> Vector3:
|
|
|
|
# This (px, py) is pretty important and Google's
|
|
|
|
# documentation didn't give much useful info about it.
|
|
|
|
var px := 0.5
|
|
|
|
var py := 0.5
|
|
|
|
|
|
|
|
# These default to 1.0, 1.0 according to Google's docs.
|
|
|
|
# I guess that's probably fine for default camera stuff.
|
|
|
|
var fx := 1.0
|
|
|
|
var fy := camera_aspect_ratio
|
|
|
|
|
|
|
|
# Inverse equation from the section on NDC space here
|
|
|
|
# https://google.github.io/mediapipe/solutions/objectron.html#coordinate-systems
|
|
|
|
# https://web.archive.org/web/20220727063132/https://google.github.io/mediapipe/solutions/objectron.html#coordinate-systems
|
|
|
|
# which describes going from camera coordinates to NDC space. It's kinda
|
|
|
|
# ambiguous on terms, but this seems to work to get view space coordinates.
|
|
|
|
|
|
|
|
# With this, coordinates seem to be evenly scaled (between x/y and z) and in view space.
|
|
|
|
var z_scale := 1.0
|
|
|
|
var z := 1.0 / (-v[2] + (1.0 / z_offset) * z_scale)
|
|
|
|
var x := (v[0] - px) * z / fx
|
|
|
|
var y := (v[1] - py) * z / fy
|
|
|
|
return Vector3(x, y, z)
|
|
|
|
|
|
|
|
## Smoothly interpolates transforms in a framerate-independent way.
|
|
|
|
## For example, using a factor of 0.2, will move roughly 80% of the remaining distance in a second.
|
|
|
|
func fi_slerp(value: Transform3D, target: Transform3D, factor: float, delta: float) -> Transform3D:
|
|
|
|
return value.interpolate_with(target, 1 - factor ** delta)
|