Implement rest post and fixed hand rotations

main
copygirl 3 weeks ago
parent ed9a63596f
commit f49865666b
  1. 217
      copyMediaPipe.gd
  2. 6
      copyMediaPipe.tscn

@ -1,24 +1,41 @@
class_name copyMediaPipe class_name copyMediaPipe
extends Mod_Base extends Mod_Base
var arm_rest_angle := 65
var time_to_rest := 0.1 # Time without tracking data before returning to the rest pose.
var interpolation_factor := 0.000000001 # Yes this value needs to be THAT small.
var rest_interpolation_factor := 0.2 # "Lerp about 80% in one second."
# TODO: Change this via calibration!
var camera_transform := Transform3D(Basis(), Vector3(0.0, 0.0, 0.3))
# FIXME: Best to get this from the tracker process (if possible). # FIXME: Best to get this from the tracker process (if possible).
var camera_aspect_ratio := 4.0 / 3.0 # Logitech C920 default? var camera_aspect_ratio := 4.0 / 3.0 # Logitech C920 default?
@onready var tracker_head : Node3D = $TrackingRoot/Head @onready var tracking_root: Node3D = $TrackingRoot
@onready var tracker_hand_left : Node3D = $TrackingRoot/LeftHand
@onready var tracker_hand_right : Node3D = $TrackingRoot/RightHand
@onready var landmark_template: MeshInstance3D = $TrackingRoot/LandmarkTemplate @onready var landmark_template: MeshInstance3D = $TrackingRoot/LandmarkTemplate
@onready var landmarks_hand_left : Array[MeshInstance3D] = []
@onready var landmarks_hand_right : Array[MeshInstance3D] = [] @onready var head := {
last_data = null, # Most recent tracking data received.
last_received = INF, # How long ago it was received (in seconds).
tracker = $TrackingRoot/Head, # Node for visualizing tracking data.
rest_pose = Transform3D.IDENTITY, # Rest position of the head (from 0,0,0).
}
@onready var hands := { @onready var hands := {
left = { left = {
tracker = tracker_hand_left, last_data = null,
landmarks = landmarks_hand_left, last_received = INF,
tracker = $TrackingRoot/LeftHand,
rest_pose = Transform3D.IDENTITY,
landmarks = [],
}, },
right = { right = {
tracker = tracker_hand_right, last_data = null,
landmarks = landmarks_hand_right, last_received = INF,
tracker = $TrackingRoot/RightHand,
rest_pose = Transform3D.IDENTITY,
landmarks = [],
}, },
} }
@ -41,17 +58,20 @@ func _exit_tree() -> void:
stop_process() stop_process()
# Called after mod is initialized or model is changed. # Called after mod is initialized or model is changed.
func scene_init(): func scene_init() -> void:
pass initialize_rest_pose()
# Called before mod is removed, model is changed or application is shut down. # Called before mod is removed, model is changed or application is shut down.
func scene_shutdown(): func scene_shutdown() -> void:
pass pass
func _process(_delta: float) -> void: func _process(delta: float) -> void:
increase_last_received(delta)
if is_tracker_running(): if is_tracker_running():
receive_tracker_packets() receive_tracker_packets()
update_visual_trackers(delta)
## Sets up 21 nodes for the landmarks that make up hand/finger tracking.
func setup_hand_landmarks() -> void: func setup_hand_landmarks() -> void:
for side in hands: for side in hands:
var hand = hands[side] var hand = hands[side]
@ -62,6 +82,33 @@ func setup_hand_landmarks() -> void:
hand.tracker.add_child(landmark) hand.tracker.add_child(landmark)
hand.landmarks.append(landmark) hand.landmarks.append(landmark)
## Initialized the stored rest positions for the head and hands.
## Also applies a rotation to the arms so they're not T-posing.
func initialize_rest_pose() -> void:
var skel := get_skeleton()
if not skel: return
var head_idx := skel.find_bone("Head")
var head_origin := skel.get_bone_global_rest(head_idx).origin
tracking_root.transform = camera_transform * Transform3D(Basis(), head_origin)
head.rest_pose = camera_transform.inverse()
for side in hands:
var shoulder_idx := skel.find_bone(side.capitalize() + "Shoulder")
var hand_idx := skel.find_bone(side.capitalize() + "Hand")
var shoulder_transform := skel.get_bone_global_rest(shoulder_idx)
var hand_transform := skel.get_bone_global_rest(hand_idx)
# First, get relative transform of hand to shoulder.
var hand_to_shoulder := shoulder_transform.inverse() * hand_transform
# Next, rotate this relative transform by arm_rest_angle.
hand_to_shoulder = hand_to_shoulder.rotated(Vector3.LEFT, deg_to_rad(arm_rest_angle))
# Finally, put the relative transform back into skeleton-relative coordinates.
var hand_rest_transform := shoulder_transform * hand_to_shoulder
hands[side].rest_pose = tracking_root.transform.inverse() * hand_rest_transform
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# Functions to start/stop the PYTHON TRACKER PROCESS and communicate with it. # Functions to start/stop the PYTHON TRACKER PROCESS and communicate with it.
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
@ -124,66 +171,58 @@ func receive_tracker_packets() -> void:
if bytes.size() == 0: break if bytes.size() == 0: break
var data = JSON.parse_string(bytes.get_string_from_utf8()) var data = JSON.parse_string(bytes.get_string_from_utf8())
if data is Dictionary: process_tracker_data(data) if data is Dictionary: process_tracker_data(data)
# FIXME: Find out why we appear to always be processing 2 packets a frame.
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# Functions to PROCESS the incoming TRACKER DATA, and update tracker objects. # Functions to PROCESS and CONVERT the incoming TRACKER DATA.
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
func increase_last_received(delta: float) -> void:
head.last_received += delta
hands.left.last_received += delta
hands.right.last_received += delta
func process_tracker_data(data: Dictionary) -> void: func process_tracker_data(data: Dictionary) -> void:
if "error" in data: on_tracker_error(data.error); return if "error" in data: on_tracker_error(data.error); return
if "status" in data: on_tracker_status(data.status); return if "status" in data: on_tracker_status(data.status); return
convert_tracker_data(data)
# MediaPipe reports hands from a viewer's perspective, not the # Convert the arrays inside data to known data types like Vector3 and Transform3D.
# person's own actual left and right hand, so swap them out here. data["face"]["transform"] = to_transform(data["face"]["transform"])
var left = data["hands"]["left"] for side in data["hands"]:
var right = data["hands"]["right"] var hand = data["hands"][side]
data["hands"]["left"] = right # Convert untyped array of arrays to typed Array[Vector3].
data["hands"]["right"] = left var image_landmarks = hand["image_landmarks"].map(to_vector)
var world_landmarks = hand["world_landmarks"].map(to_vector)
hand["image_landmarks"] = Array(image_landmarks, TYPE_VECTOR3, "", null)
hand["world_landmarks"] = Array(world_landmarks, TYPE_VECTOR3, "", null)
# Face matrix is in centimeters, convert to meters.
data["face"]["transform"].origin /= 100
tracker_head.transform = data["face"]["transform"] # TODO: Make this configurable.
tracker_head.position /= 100 # Centimeters to meters. var min_confidence_threshold := 0.85
# NOTE: Face confidence currently either 0.0 or 1.0.
if data["face"]["confidence"] > min_confidence_threshold:
head.last_data = data["face"]
head.last_received = 0.0
# TODO: Actually use this.
var num_hands_detected := 0
for side in hands: for side in hands:
var hand = hands[side] var hand = hands[side]
var tracker: Node3D = hand.tracker
# TODO: Don't automatically trust the handedness of the input data.
var hand_data = data["hands"][side] var hand_data = data["hands"][side]
if hand_data["confidence"] > min_confidence_threshold:
var image_landmarks: Array[Vector3] = hand_data["image_landmarks"] var image_landmarks: Array[Vector3] = hand_data["image_landmarks"]
var world_landmarks: Array[Vector3] = hand_data["world_landmarks"] var world_landmarks: Array[Vector3] = hand_data["world_landmarks"]
# FIXME: Make this configurable.
var min_confidence_threshold := 0.85
if hand_data["confidence"] < min_confidence_threshold: continue
num_hands_detected += 1
# Mirror position on the X axis, since image landmarks are in view space. # Mirror position on the X axis, since image landmarks are in view space.
for i in image_landmarks.size(): image_landmarks[i].x = (1 - image_landmarks[i].x) for i in image_landmarks.size(): image_landmarks[i].x = (1 - image_landmarks[i].x)
# Unsure why, but world landmarks might be in a different coordinate system than expected?
var rotation_fix := Basis(Vector3.RIGHT, TAU / 2)
for i in world_landmarks.size(): world_landmarks[i] = rotation_fix * world_landmarks[i]
tracker.basis = get_hand_rotation(world_landmarks) hand.last_data = hand_data
tracker.position = get_hand_viewspace_origin(image_landmarks, world_landmarks, 2.0) \ hand.last_received = 0.0
* Vector3(7.0, 7.0, 3.5) # FIXME: Fudge factor to match better with world space.
# Translate landmarks so the origin is at the wrist.
var wrist_position := world_landmarks[0]
# World landmarks are in world space, so we have to "subtract" the hand rotation.
# Also, the rotation is all wrong, so apply that here as well.
var hand_rotation := tracker.basis.inverse() * Basis.from_euler(Vector3(TAU / 2, 0, 0))
for i in world_landmarks.size():
var pos := world_landmarks[i] - wrist_position
hand.landmarks[i].position = hand_rotation * pos
# TODO: Interpolation needs to be done outside of this function,
# as it could be called multiple times a frame, or not at all.
# Smoothly interpolate tracker transforms (in a framerate-independent way).
# var f := 0.0000000001 # Yes this value needs to be THAT small.
# tracker_head .transform = tracker_head .transform.interpolate_with(head_transform , 1 - f ** delta)
# tracker_hand_left .transform = tracker_hand_left .transform.interpolate_with(hand_left_transform , 1 - f ** delta)
# tracker_hand_right.transform = tracker_hand_right.transform.interpolate_with(hand_right_transform, 1 - f ** delta)
func on_tracker_status(status: String) -> void: func on_tracker_status(status: String) -> void:
set_status(status) set_status(status)
@ -191,21 +230,6 @@ func on_tracker_status(status: String) -> void:
func on_tracker_error(error: String) -> void: func on_tracker_error(error: String) -> void:
print_log("Error: " + error) print_log("Error: " + error)
# -----------------------------------------------------------------------------
# Functions that deal with CONVERTING the TRACKER DATA to Godot types.
# -----------------------------------------------------------------------------
## Converts the arrays inside data to known data types like Vector3 and Transform3D.
func convert_tracker_data(data: Dictionary) -> void:
data["face"]["transform"] = to_transform(data["face"]["transform"])
for side in data["hands"]:
var hand = data["hands"][side]
# Convert untyped array of arrays to typed Array[Vector3].
var image_landmarks = hand["image_landmarks"].map(to_vector)
var world_landmarks = hand["world_landmarks"].map(to_vector)
hand["image_landmarks"] = Array(image_landmarks, TYPE_VECTOR3, "", null)
hand["world_landmarks"] = Array(world_landmarks, TYPE_VECTOR3, "", null)
func to_vector(array) -> Vector3: func to_vector(array) -> Vector3:
return Vector3(array[0], array[1], array[2]) return Vector3(array[0], array[1], array[2])
@ -216,6 +240,50 @@ func to_transform(matrix) -> Transform3D:
Vector3(matrix[0][2], matrix[1][2], matrix[2][2])), Vector3(matrix[0][2], matrix[1][2], matrix[2][2])),
Vector3(matrix[0][3], matrix[1][3], matrix[2][3])) Vector3(matrix[0][3], matrix[1][3], matrix[2][3]))
# -----------------------------------------------------------------------------
# Functions that take the CONVERTED DATA and update the VISUAL TRACKER nodes.
# -----------------------------------------------------------------------------
func update_visual_trackers(delta: float) -> void:
if head.last_received >= time_to_rest:
# Reset to rest pose transform.
head.tracker.transform = fi_slerp(head.tracker.transform,
head.rest_pose, rest_interpolation_factor, delta)
else:
head.tracker.transform = fi_slerp(head.tracker.transform,
head.last_data["transform"], interpolation_factor, delta)
# TODO: Don't automatically trust the handedness of the input data.
for side in hands:
var hand = hands[side]
if hand.last_received >= time_to_rest:
# Reset to rest pose transform.
hand.tracker.transform = fi_slerp(hand.tracker.transform,
hand.rest_pose, rest_interpolation_factor, delta)
else:
var image_landmarks: Array[Vector3] = hand.last_data["image_landmarks"]
var world_landmarks: Array[Vector3] = hand.last_data["world_landmarks"]
var hand_rotation := get_hand_rotation(side, world_landmarks)
var hand_origin := get_hand_viewspace_origin(image_landmarks, world_landmarks, 2.0) \
* Vector3(7.0, 7.0, 3.5) # FIXME: Fudge factor to match better with world space.
var target_transform := Transform3D(hand_rotation, hand_origin)
hand.tracker.transform = fi_slerp(hand.tracker.transform,
target_transform, interpolation_factor, delta)
# Translate landmarks so the origin is at the wrist.
var wrist_position := world_landmarks[0]
# World landmarks are in world space, so we have to "subtract" the hand rotation.
for i in world_landmarks.size():
var pos := world_landmarks[i] - wrist_position
hand.landmarks[i].position = hand_rotation.inverse() * pos
## Smoothly interpolates transforms in a framerate-independent way.
## For example, using a factor of 0.2, will move roughly 80% of the remaining distance in a second.
func fi_slerp(value: Transform3D, target: Transform3D, factor: float, delta: float) -> Transform3D:
return value.interpolate_with(target, 1 - factor ** delta)
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
@ -241,14 +309,17 @@ const PINKY_PIP := 19
const PINKY_DIP := 20 const PINKY_DIP := 20
const PINKY_TIP := 21 const PINKY_TIP := 21
# FIXME: I changed the way this was calculated and it doesn't quite fit the data right? ## Calculate the hand rotation from the hand tracking's world landmarks.
func get_hand_rotation(landmarks: Array[Vector3]) -> Basis: func get_hand_rotation(side: String, landmarks: Array[Vector3]) -> Basis:
var knuckles_center := (landmarks[INDEX_FINGER_MCP] + landmarks[RING_FINGER_TIP]) / 2 var knuckles_center := (landmarks[INDEX_FINGER_MCP] + landmarks[RING_FINGER_TIP]) / 2
var wrist_to_knuckles := landmarks[WRIST].direction_to(knuckles_center) var wrist_to_knuckles := landmarks[WRIST].direction_to(knuckles_center)
var towards_thumb := landmarks[RING_FINGER_TIP].direction_to(landmarks[INDEX_FINGER_MCP]) var towards_thumb := landmarks[RING_FINGER_TIP].direction_to(landmarks[INDEX_FINGER_MCP])
var up := wrist_to_knuckles.cross(towards_thumb) var palm_forward: Vector3
return Basis.looking_at(wrist_to_knuckles, up, true) if side == "left": palm_forward = towards_thumb.cross(wrist_to_knuckles)
if side == "right": palm_forward = wrist_to_knuckles.cross(towards_thumb)
return Basis.looking_at(palm_forward, wrist_to_knuckles)
## Attempt to figure out the hand origin in viewspace. ## Attempt to figure out the hand origin in viewspace.
## `hand_to_head_scale` is a fudge value so that we can attempt ## `hand_to_head_scale` is a fudge value so that we can attempt

@ -21,23 +21,21 @@ rings = 3
script = ExtResource("1_0kpr8") script = ExtResource("1_0kpr8")
[node name="TrackingRoot" type="Node3D" parent="."] [node name="TrackingRoot" type="Node3D" parent="."]
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1.5, 0.5)
[node name="DebugVisuals" parent="TrackingRoot" instance=ExtResource("2_8wmot")] [node name="DebugVisuals" parent="TrackingRoot" instance=ExtResource("2_8wmot")]
[node name="Head" type="MeshInstance3D" parent="TrackingRoot"] [node name="Head" type="MeshInstance3D" parent="TrackingRoot"]
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -0.3)
mesh = SubResource("BoxMesh_wtdv4") mesh = SubResource("BoxMesh_wtdv4")
[node name="DebugVisuals" parent="TrackingRoot/Head" instance=ExtResource("2_8wmot")] [node name="DebugVisuals" parent="TrackingRoot/Head" instance=ExtResource("2_8wmot")]
[node name="LeftHand" type="Node3D" parent="TrackingRoot"] [node name="LeftHand" type="Node3D" parent="TrackingRoot"]
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, -0.5, 0, -0.3) transform = Transform3D(-4.37114e-08, 1, -4.37114e-08, 0, -4.37114e-08, -1, -1, -4.37114e-08, 1.91069e-15, 0.5, 0, 0)
[node name="DebugVisuals" parent="TrackingRoot/LeftHand" instance=ExtResource("2_8wmot")] [node name="DebugVisuals" parent="TrackingRoot/LeftHand" instance=ExtResource("2_8wmot")]
[node name="RightHand" type="Node3D" parent="TrackingRoot"] [node name="RightHand" type="Node3D" parent="TrackingRoot"]
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0.5, 0, -0.3) transform = Transform3D(1.91069e-15, -1, 4.37114e-08, -4.37114e-08, -4.37114e-08, -1, 1, 0, -4.37114e-08, -0.5, 0, 0)
[node name="DebugVisuals" parent="TrackingRoot/RightHand" instance=ExtResource("2_8wmot")] [node name="DebugVisuals" parent="TrackingRoot/RightHand" instance=ExtResource("2_8wmot")]

Loading…
Cancel
Save