|
|
|
@ -1,24 +1,41 @@ |
|
|
|
|
class_name copyMediaPipe |
|
|
|
|
extends Mod_Base |
|
|
|
|
|
|
|
|
|
var arm_rest_angle := 65 |
|
|
|
|
var time_to_rest := 0.1 # Time without tracking data before returning to the rest pose. |
|
|
|
|
var interpolation_factor := 0.000000001 # Yes this value needs to be THAT small. |
|
|
|
|
var rest_interpolation_factor := 0.2 # "Lerp about 80% in one second." |
|
|
|
|
|
|
|
|
|
# TODO: Change this via calibration! |
|
|
|
|
var camera_transform := Transform3D(Basis(), Vector3(0.0, 0.0, 0.3)) |
|
|
|
|
|
|
|
|
|
# FIXME: Best to get this from the tracker process (if possible). |
|
|
|
|
var camera_aspect_ratio := 4.0 / 3.0 # Logitech C920 default? |
|
|
|
|
|
|
|
|
|
@onready var tracker_head : Node3D = $TrackingRoot/Head |
|
|
|
|
@onready var tracker_hand_left : Node3D = $TrackingRoot/LeftHand |
|
|
|
|
@onready var tracker_hand_right : Node3D = $TrackingRoot/RightHand |
|
|
|
|
@onready var landmark_template : MeshInstance3D = $TrackingRoot/LandmarkTemplate |
|
|
|
|
@onready var landmarks_hand_left : Array[MeshInstance3D] = [] |
|
|
|
|
@onready var landmarks_hand_right : Array[MeshInstance3D] = [] |
|
|
|
|
@onready var tracking_root: Node3D = $TrackingRoot |
|
|
|
|
@onready var landmark_template: MeshInstance3D = $TrackingRoot/LandmarkTemplate |
|
|
|
|
|
|
|
|
|
@onready var head := { |
|
|
|
|
last_data = null, # Most recent tracking data received. |
|
|
|
|
last_received = INF, # How long ago it was received (in seconds). |
|
|
|
|
tracker = $TrackingRoot/Head, # Node for visualizing tracking data. |
|
|
|
|
rest_pose = Transform3D.IDENTITY, # Rest position of the head (from 0,0,0). |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@onready var hands := { |
|
|
|
|
left = { |
|
|
|
|
tracker = tracker_hand_left, |
|
|
|
|
landmarks = landmarks_hand_left, |
|
|
|
|
last_data = null, |
|
|
|
|
last_received = INF, |
|
|
|
|
tracker = $TrackingRoot/LeftHand, |
|
|
|
|
rest_pose = Transform3D.IDENTITY, |
|
|
|
|
landmarks = [], |
|
|
|
|
}, |
|
|
|
|
right = { |
|
|
|
|
tracker = tracker_hand_right, |
|
|
|
|
landmarks = landmarks_hand_right, |
|
|
|
|
last_data = null, |
|
|
|
|
last_received = INF, |
|
|
|
|
tracker = $TrackingRoot/RightHand, |
|
|
|
|
rest_pose = Transform3D.IDENTITY, |
|
|
|
|
landmarks = [], |
|
|
|
|
}, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -41,17 +58,20 @@ func _exit_tree() -> void: |
|
|
|
|
stop_process() |
|
|
|
|
|
|
|
|
|
# Called after mod is initialized or model is changed. |
|
|
|
|
func scene_init(): |
|
|
|
|
pass |
|
|
|
|
func scene_init() -> void: |
|
|
|
|
initialize_rest_pose() |
|
|
|
|
|
|
|
|
|
# Called before mod is removed, model is changed or application is shut down. |
|
|
|
|
func scene_shutdown(): |
|
|
|
|
func scene_shutdown() -> void: |
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
func _process(_delta: float) -> void: |
|
|
|
|
func _process(delta: float) -> void: |
|
|
|
|
increase_last_received(delta) |
|
|
|
|
if is_tracker_running(): |
|
|
|
|
receive_tracker_packets() |
|
|
|
|
update_visual_trackers(delta) |
|
|
|
|
|
|
|
|
|
## Sets up 21 nodes for the landmarks that make up hand/finger tracking. |
|
|
|
|
func setup_hand_landmarks() -> void: |
|
|
|
|
for side in hands: |
|
|
|
|
var hand = hands[side] |
|
|
|
@ -62,6 +82,33 @@ func setup_hand_landmarks() -> void: |
|
|
|
|
hand.tracker.add_child(landmark) |
|
|
|
|
hand.landmarks.append(landmark) |
|
|
|
|
|
|
|
|
|
## Initialized the stored rest positions for the head and hands. |
|
|
|
|
## Also applies a rotation to the arms so they're not T-posing. |
|
|
|
|
func initialize_rest_pose() -> void: |
|
|
|
|
var skel := get_skeleton() |
|
|
|
|
if not skel: return |
|
|
|
|
|
|
|
|
|
var head_idx := skel.find_bone("Head") |
|
|
|
|
var head_origin := skel.get_bone_global_rest(head_idx).origin |
|
|
|
|
|
|
|
|
|
tracking_root.transform = camera_transform * Transform3D(Basis(), head_origin) |
|
|
|
|
head.rest_pose = camera_transform.inverse() |
|
|
|
|
|
|
|
|
|
for side in hands: |
|
|
|
|
var shoulder_idx := skel.find_bone(side.capitalize() + "Shoulder") |
|
|
|
|
var hand_idx := skel.find_bone(side.capitalize() + "Hand") |
|
|
|
|
var shoulder_transform := skel.get_bone_global_rest(shoulder_idx) |
|
|
|
|
var hand_transform := skel.get_bone_global_rest(hand_idx) |
|
|
|
|
|
|
|
|
|
# First, get relative transform of hand to shoulder. |
|
|
|
|
var hand_to_shoulder := shoulder_transform.inverse() * hand_transform |
|
|
|
|
# Next, rotate this relative transform by arm_rest_angle. |
|
|
|
|
hand_to_shoulder = hand_to_shoulder.rotated(Vector3.LEFT, deg_to_rad(arm_rest_angle)) |
|
|
|
|
# Finally, put the relative transform back into skeleton-relative coordinates. |
|
|
|
|
var hand_rest_transform := shoulder_transform * hand_to_shoulder |
|
|
|
|
|
|
|
|
|
hands[side].rest_pose = tracking_root.transform.inverse() * hand_rest_transform |
|
|
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
|
|
|
|
# Functions to start/stop the PYTHON TRACKER PROCESS and communicate with it. |
|
|
|
|
# ----------------------------------------------------------------------------- |
|
|
|
@ -124,66 +171,58 @@ func receive_tracker_packets() -> void: |
|
|
|
|
if bytes.size() == 0: break |
|
|
|
|
var data = JSON.parse_string(bytes.get_string_from_utf8()) |
|
|
|
|
if data is Dictionary: process_tracker_data(data) |
|
|
|
|
# FIXME: Find out why we appear to always be processing 2 packets a frame. |
|
|
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
|
|
|
|
# Functions to PROCESS the incoming TRACKER DATA, and update tracker objects. |
|
|
|
|
# Functions to PROCESS and CONVERT the incoming TRACKER DATA. |
|
|
|
|
# ----------------------------------------------------------------------------- |
|
|
|
|
|
|
|
|
|
func increase_last_received(delta: float) -> void: |
|
|
|
|
head.last_received += delta |
|
|
|
|
hands.left.last_received += delta |
|
|
|
|
hands.right.last_received += delta |
|
|
|
|
|
|
|
|
|
func process_tracker_data(data: Dictionary) -> void: |
|
|
|
|
if "error" in data: on_tracker_error(data.error); return |
|
|
|
|
if "status" in data: on_tracker_status(data.status); return |
|
|
|
|
convert_tracker_data(data) |
|
|
|
|
|
|
|
|
|
# MediaPipe reports hands from a viewer's perspective, not the |
|
|
|
|
# person's own actual left and right hand, so swap them out here. |
|
|
|
|
var left = data["hands"]["left"] |
|
|
|
|
var right = data["hands"]["right"] |
|
|
|
|
data["hands"]["left"] = right |
|
|
|
|
data["hands"]["right"] = left |
|
|
|
|
# Convert the arrays inside data to known data types like Vector3 and Transform3D. |
|
|
|
|
data["face"]["transform"] = to_transform(data["face"]["transform"]) |
|
|
|
|
for side in data["hands"]: |
|
|
|
|
var hand = data["hands"][side] |
|
|
|
|
# Convert untyped array of arrays to typed Array[Vector3]. |
|
|
|
|
var image_landmarks = hand["image_landmarks"].map(to_vector) |
|
|
|
|
var world_landmarks = hand["world_landmarks"].map(to_vector) |
|
|
|
|
hand["image_landmarks"] = Array(image_landmarks, TYPE_VECTOR3, "", null) |
|
|
|
|
hand["world_landmarks"] = Array(world_landmarks, TYPE_VECTOR3, "", null) |
|
|
|
|
|
|
|
|
|
# Face matrix is in centimeters, convert to meters. |
|
|
|
|
data["face"]["transform"].origin /= 100 |
|
|
|
|
|
|
|
|
|
# TODO: Make this configurable. |
|
|
|
|
var min_confidence_threshold := 0.85 |
|
|
|
|
|
|
|
|
|
# NOTE: Face confidence currently either 0.0 or 1.0. |
|
|
|
|
|
|
|
|
|
tracker_head.transform = data["face"]["transform"] |
|
|
|
|
tracker_head.position /= 100 # Centimeters to meters. |
|
|
|
|
if data["face"]["confidence"] > min_confidence_threshold: |
|
|
|
|
head.last_data = data["face"] |
|
|
|
|
head.last_received = 0.0 |
|
|
|
|
|
|
|
|
|
# TODO: Actually use this. |
|
|
|
|
var num_hands_detected := 0 |
|
|
|
|
for side in hands: |
|
|
|
|
var hand = hands[side] |
|
|
|
|
var tracker: Node3D = hand.tracker |
|
|
|
|
|
|
|
|
|
# TODO: Don't automatically trust the handedness of the input data. |
|
|
|
|
var hand_data = data["hands"][side] |
|
|
|
|
var image_landmarks: Array[Vector3] = hand_data["image_landmarks"] |
|
|
|
|
var world_landmarks: Array[Vector3] = hand_data["world_landmarks"] |
|
|
|
|
|
|
|
|
|
# FIXME: Make this configurable. |
|
|
|
|
var min_confidence_threshold := 0.85 |
|
|
|
|
if hand_data["confidence"] < min_confidence_threshold: continue |
|
|
|
|
num_hands_detected += 1 |
|
|
|
|
|
|
|
|
|
# Mirror position on the X axis, since image landmarks are in view space. |
|
|
|
|
for i in image_landmarks.size(): image_landmarks[i].x = (1 - image_landmarks[i].x) |
|
|
|
|
|
|
|
|
|
tracker.basis = get_hand_rotation(world_landmarks) |
|
|
|
|
tracker.position = get_hand_viewspace_origin(image_landmarks, world_landmarks, 2.0) \ |
|
|
|
|
* Vector3(7.0, 7.0, 3.5) # FIXME: Fudge factor to match better with world space. |
|
|
|
|
|
|
|
|
|
# Translate landmarks so the origin is at the wrist. |
|
|
|
|
var wrist_position := world_landmarks[0] |
|
|
|
|
# World landmarks are in world space, so we have to "subtract" the hand rotation. |
|
|
|
|
# Also, the rotation is all wrong, so apply that here as well. |
|
|
|
|
var hand_rotation := tracker.basis.inverse() * Basis.from_euler(Vector3(TAU / 2, 0, 0)) |
|
|
|
|
for i in world_landmarks.size(): |
|
|
|
|
var pos := world_landmarks[i] - wrist_position |
|
|
|
|
hand.landmarks[i].position = hand_rotation * pos |
|
|
|
|
|
|
|
|
|
# TODO: Interpolation needs to be done outside of this function, |
|
|
|
|
# as it could be called multiple times a frame, or not at all. |
|
|
|
|
|
|
|
|
|
# Smoothly interpolate tracker transforms (in a framerate-independent way). |
|
|
|
|
# var f := 0.0000000001 # Yes this value needs to be THAT small. |
|
|
|
|
# tracker_head .transform = tracker_head .transform.interpolate_with(head_transform , 1 - f ** delta) |
|
|
|
|
# tracker_hand_left .transform = tracker_hand_left .transform.interpolate_with(hand_left_transform , 1 - f ** delta) |
|
|
|
|
# tracker_hand_right.transform = tracker_hand_right.transform.interpolate_with(hand_right_transform, 1 - f ** delta) |
|
|
|
|
if hand_data["confidence"] > min_confidence_threshold: |
|
|
|
|
var image_landmarks: Array[Vector3] = hand_data["image_landmarks"] |
|
|
|
|
var world_landmarks: Array[Vector3] = hand_data["world_landmarks"] |
|
|
|
|
|
|
|
|
|
# Mirror position on the X axis, since image landmarks are in view space. |
|
|
|
|
for i in image_landmarks.size(): image_landmarks[i].x = (1 - image_landmarks[i].x) |
|
|
|
|
# Unsure why, but world landmarks might be in a different coordinate system than expected? |
|
|
|
|
var rotation_fix := Basis(Vector3.RIGHT, TAU / 2) |
|
|
|
|
for i in world_landmarks.size(): world_landmarks[i] = rotation_fix * world_landmarks[i] |
|
|
|
|
|
|
|
|
|
hand.last_data = hand_data |
|
|
|
|
hand.last_received = 0.0 |
|
|
|
|
|
|
|
|
|
func on_tracker_status(status: String) -> void: |
|
|
|
|
set_status(status) |
|
|
|
@ -191,31 +230,60 @@ func on_tracker_status(status: String) -> void: |
|
|
|
|
func on_tracker_error(error: String) -> void: |
|
|
|
|
print_log("Error: " + error) |
|
|
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
|
|
|
|
# Functions that deal with CONVERTING the TRACKER DATA to Godot types. |
|
|
|
|
# ----------------------------------------------------------------------------- |
|
|
|
|
|
|
|
|
|
## Converts the arrays inside data to known data types like Vector3 and Transform3D. |
|
|
|
|
func convert_tracker_data(data: Dictionary) -> void: |
|
|
|
|
data["face"]["transform"] = to_transform(data["face"]["transform"]) |
|
|
|
|
for side in data["hands"]: |
|
|
|
|
var hand = data["hands"][side] |
|
|
|
|
# Convert untyped array of arrays to typed Array[Vector3]. |
|
|
|
|
var image_landmarks = hand["image_landmarks"].map(to_vector) |
|
|
|
|
var world_landmarks = hand["world_landmarks"].map(to_vector) |
|
|
|
|
hand["image_landmarks"] = Array(image_landmarks, TYPE_VECTOR3, "", null) |
|
|
|
|
hand["world_landmarks"] = Array(world_landmarks, TYPE_VECTOR3, "", null) |
|
|
|
|
|
|
|
|
|
func to_vector(array) -> Vector3: |
|
|
|
|
return Vector3(array[0], array[1], array[2]) |
|
|
|
|
|
|
|
|
|
func to_transform(matrix) -> Transform3D: |
|
|
|
|
return Transform3D( |
|
|
|
|
Basis(Vector3(matrix[0][0], matrix[1][0], matrix[2][0]), |
|
|
|
|
Vector3(matrix[0][1], matrix[1][1], matrix[2][1]), |
|
|
|
|
Vector3(matrix[0][2], matrix[1][2], matrix[2][2])), |
|
|
|
|
Vector3(matrix[0][1], matrix[1][1], matrix[2][1]), |
|
|
|
|
Vector3(matrix[0][2], matrix[1][2], matrix[2][2])), |
|
|
|
|
Vector3(matrix[0][3], matrix[1][3], matrix[2][3])) |
|
|
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
|
|
|
|
# Functions that take the CONVERTED DATA and update the VISUAL TRACKER nodes. |
|
|
|
|
# ----------------------------------------------------------------------------- |
|
|
|
|
|
|
|
|
|
func update_visual_trackers(delta: float) -> void: |
|
|
|
|
if head.last_received >= time_to_rest: |
|
|
|
|
# Reset to rest pose transform. |
|
|
|
|
head.tracker.transform = fi_slerp(head.tracker.transform, |
|
|
|
|
head.rest_pose, rest_interpolation_factor, delta) |
|
|
|
|
else: |
|
|
|
|
head.tracker.transform = fi_slerp(head.tracker.transform, |
|
|
|
|
head.last_data["transform"], interpolation_factor, delta) |
|
|
|
|
|
|
|
|
|
# TODO: Don't automatically trust the handedness of the input data. |
|
|
|
|
for side in hands: |
|
|
|
|
var hand = hands[side] |
|
|
|
|
if hand.last_received >= time_to_rest: |
|
|
|
|
# Reset to rest pose transform. |
|
|
|
|
hand.tracker.transform = fi_slerp(hand.tracker.transform, |
|
|
|
|
hand.rest_pose, rest_interpolation_factor, delta) |
|
|
|
|
else: |
|
|
|
|
var image_landmarks: Array[Vector3] = hand.last_data["image_landmarks"] |
|
|
|
|
var world_landmarks: Array[Vector3] = hand.last_data["world_landmarks"] |
|
|
|
|
|
|
|
|
|
var hand_rotation := get_hand_rotation(side, world_landmarks) |
|
|
|
|
var hand_origin := get_hand_viewspace_origin(image_landmarks, world_landmarks, 2.0) \ |
|
|
|
|
* Vector3(7.0, 7.0, 3.5) # FIXME: Fudge factor to match better with world space. |
|
|
|
|
|
|
|
|
|
var target_transform := Transform3D(hand_rotation, hand_origin) |
|
|
|
|
hand.tracker.transform = fi_slerp(hand.tracker.transform, |
|
|
|
|
target_transform, interpolation_factor, delta) |
|
|
|
|
|
|
|
|
|
# Translate landmarks so the origin is at the wrist. |
|
|
|
|
var wrist_position := world_landmarks[0] |
|
|
|
|
# World landmarks are in world space, so we have to "subtract" the hand rotation. |
|
|
|
|
for i in world_landmarks.size(): |
|
|
|
|
var pos := world_landmarks[i] - wrist_position |
|
|
|
|
hand.landmarks[i].position = hand_rotation.inverse() * pos |
|
|
|
|
|
|
|
|
|
## Smoothly interpolates transforms in a framerate-independent way. |
|
|
|
|
## For example, using a factor of 0.2, will move roughly 80% of the remaining distance in a second. |
|
|
|
|
func fi_slerp(value: Transform3D, target: Transform3D, factor: float, delta: float) -> Transform3D: |
|
|
|
|
return value.interpolate_with(target, 1 - factor ** delta) |
|
|
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
|
|
|
|
# ----------------------------------------------------------------------------- |
|
|
|
|
|
|
|
|
@ -241,14 +309,17 @@ const PINKY_PIP := 19 |
|
|
|
|
const PINKY_DIP := 20 |
|
|
|
|
const PINKY_TIP := 21 |
|
|
|
|
|
|
|
|
|
# FIXME: I changed the way this was calculated and it doesn't quite fit the data right? |
|
|
|
|
func get_hand_rotation(landmarks: Array[Vector3]) -> Basis: |
|
|
|
|
## Calculate the hand rotation from the hand tracking's world landmarks. |
|
|
|
|
func get_hand_rotation(side: String, landmarks: Array[Vector3]) -> Basis: |
|
|
|
|
var knuckles_center := (landmarks[INDEX_FINGER_MCP] + landmarks[RING_FINGER_TIP]) / 2 |
|
|
|
|
var wrist_to_knuckles := landmarks[WRIST].direction_to(knuckles_center) |
|
|
|
|
var towards_thumb := landmarks[RING_FINGER_TIP].direction_to(landmarks[INDEX_FINGER_MCP]) |
|
|
|
|
|
|
|
|
|
var up := wrist_to_knuckles.cross(towards_thumb) |
|
|
|
|
return Basis.looking_at(wrist_to_knuckles, up, true) |
|
|
|
|
var palm_forward: Vector3 |
|
|
|
|
if side == "left": palm_forward = towards_thumb.cross(wrist_to_knuckles) |
|
|
|
|
if side == "right": palm_forward = wrist_to_knuckles.cross(towards_thumb) |
|
|
|
|
|
|
|
|
|
return Basis.looking_at(palm_forward, wrist_to_knuckles) |
|
|
|
|
|
|
|
|
|
## Attempt to figure out the hand origin in viewspace. |
|
|
|
|
## `hand_to_head_scale` is a fudge value so that we can attempt |
|
|
|
|