copyMediaPipe/copyMediaPipe.gd

class_name copyMediaPipe
extends Mod_Base

# FIXME: Best to get this from the tracker process (if possible).
var camera_aspect_ratio := 4.0 / 3.0 # Logitech C920 default?

@onready var tracker_head       : Node3D = $TrackingRoot/Head
@onready var tracker_hand_left  : Node3D = $TrackingRoot/LeftHand
@onready var tracker_hand_right : Node3D = $TrackingRoot/RightHand
@onready var landmark_template  : MeshInstance3D = $TrackingRoot/LandmarkTemplate
@onready var landmarks_hand_left  : Array[MeshInstance3D] = []
@onready var landmarks_hand_right : Array[MeshInstance3D] = []

@onready var hands := {
	left = {
		tracker   = tracker_hand_left,
		landmarks = landmarks_hand_left,
	},
	right = {
		tracker   = tracker_hand_right,
		landmarks = landmarks_hand_right,
	},
}

func _ready() -> void:
	setup_hand_landmarks()

	var dir  = get_script().get_path().get_base_dir()
	var path = dir.path_join("_tracker/Project/new_new_tracker.py")
	python_process = KiriPythonWrapperInstance.new(path)
	if not python_process.setup_python(false):
		OS.alert("Failed to setup tracker dependencies!")
	
	start_process()
	# FIXME: Don't hardcode the video device.
	set_video_device(get_video_devices()[0])
	start_tracker()

func _exit_tree() -> void:
	stop_tracker()
	stop_process()

# Called after mod is initialized or model is changed.
func scene_init():
	pass

# Called before mod is removed, model is changed or application is shut down.
func scene_shutdown():
	pass

func _process(_delta: float) -> void:
	if is_tracker_running():
		receive_tracker_packets()

func setup_hand_landmarks() -> void:
	for side in hands:
		var hand = hands[side]
		for i in 21:
			var landmark: MeshInstance3D = landmark_template.duplicate(0)
			landmark.position = Vector3.ZERO
			landmark.visible  = true
			hand.tracker.add_child(landmark)
			hand.landmarks.append(landmark)

# -----------------------------------------------------------------------------
# Functions to start/stop the PYTHON TRACKER PROCESS and communicate with it.
# -----------------------------------------------------------------------------

var python_process: KiriPythonWrapperInstance

func start_process() -> void:
	python_process.start_process(false)

func stop_process() -> void:
	python_process.stop_process()

func is_process_running() -> bool:
	return python_process.get_status() == KiriPythonWrapperInstance.KiriPythonWrapperStatus.STATUS_RUNNING

# [{ name: String, backend: String, path: String, index: int }]
func get_video_devices() -> Array:
	assert(is_process_running())
	var devices = python_process.call_rpc_sync("enumerate_camera_devices", [])
	return devices if devices is Array else []

func set_video_device(device) -> void:
	assert(is_process_running())
	var index: int = device.index if device else -1
	python_process.call_rpc_sync("set_video_device_number", [ index ])

# -----------------------------------------------------------------------------
# Functions to start/stop the TRACKER and receive packets coming from it.
# -----------------------------------------------------------------------------

var base_port := 7098
var udp_server: PacketPeerUDP
var udp_server_port: int

func start_tracker() -> void:
	assert(!is_tracker_running())

	udp_server = PacketPeerUDP.new()
	# Find a port number that's open to use.
	udp_server_port = base_port
	while udp_server.bind(udp_server_port, "127.0.0.1") != OK:
		udp_server_port += 1

	python_process.call_rpc_sync("set_udp_port_number", [ udp_server_port ])
	python_process.call_rpc_sync("start_tracker", [])

func stop_tracker() -> void:
	if !is_tracker_running(): return # Do nothing if tracker isn't running.
	python_process.call_rpc_sync("stop_tracker", [])
	udp_server.close()
	udp_server = null

func is_tracker_running() -> bool:
	return udp_server != null

func receive_tracker_packets() -> void:
	assert(is_tracker_running())
	while true:
		var bytes := udp_server.get_packet()
		if bytes.size() == 0: break
		var data = JSON.parse_string(bytes.get_string_from_utf8())
		if data is Dictionary: process_tracker_data(data)

# -----------------------------------------------------------------------------
# Functions to PROCESS the incoming TRACKER DATA, and update tracker objects.
# -----------------------------------------------------------------------------

func process_tracker_data(data: Dictionary) -> void:
	if "error" in data: on_tracker_error(data.error); return
	if "status" in data: on_tracker_status(data.status); return
	convert_tracker_data(data)

	# MediaPipe reports hands from a viewer's perspective, not the
	# person's own actual left and right hand, so swap them out here.
	var left  = data["hands"]["left"]
	var right = data["hands"]["right"]
	data["hands"]["left"]  = right
	data["hands"]["right"] = left

	tracker_head.transform = data["face"]["transform"]
	tracker_head.position /= 100 # Centimeters to meters.

	# TODO: Actually use this.
	var num_hands_detected := 0
	for side in hands:
		var hand = hands[side]
		var tracker: Node3D = hand.tracker

		# TODO: Don't automatically trust the handedness of the input data.
		var hand_data = data["hands"][side]
		var image_landmarks: Array[Vector3] = hand_data["image_landmarks"]
		var world_landmarks: Array[Vector3] = hand_data["world_landmarks"]

		# FIXME: Make this configurable.
		var min_confidence_threshold := 0.85
		if hand_data["confidence"] < min_confidence_threshold: continue
		num_hands_detected += 1

		# Mirror position on the X axis, since image landmarks are in view space.
		for i in image_landmarks.size(): image_landmarks[i].x = (1 - image_landmarks[i].x)

		tracker.basis    = get_hand_rotation(world_landmarks)
		tracker.position = get_hand_viewspace_origin(image_landmarks, world_landmarks, 2.0) \
			* Vector3(7.0, 7.0, 3.5) # FIXME: Fudge factor to match better with world space.

		# Translate landmarks so the origin is at the wrist.
		var wrist_position := world_landmarks[0]
		# World landmarks are in world space, so we have to "subtract" the hand rotation.
		# Also, the rotation is all wrong, so apply that here as well.
		var hand_rotation := tracker.basis.inverse() * Basis.from_euler(Vector3(TAU / 2, 0, 0))
		for i in world_landmarks.size():
			var pos := world_landmarks[i] - wrist_position
			hand.landmarks[i].position = hand_rotation * pos

	# TODO: Interpolation needs to be done outside of this function,
	#       as it could be called multiple times a frame, or not at all.

	# Smoothly interpolate tracker transforms (in a framerate-independent way).
	# var f := 0.0000000001 # Yes this value needs to be THAT small.
	# tracker_head      .transform = tracker_head      .transform.interpolate_with(head_transform      , 1 - f ** delta)
	# tracker_hand_left .transform = tracker_hand_left .transform.interpolate_with(hand_left_transform , 1 - f ** delta)
	# tracker_hand_right.transform = tracker_hand_right.transform.interpolate_with(hand_right_transform, 1 - f ** delta)

func on_tracker_status(status: String) -> void:
	set_status(status)

func on_tracker_error(error: String) -> void:
	print_log("Error: " + error)

# -----------------------------------------------------------------------------
# Functions that deal with CONVERTING the TRACKER DATA to Godot types.
# -----------------------------------------------------------------------------

## Converts the arrays inside data to known data types like Vector3 and Transform3D.
func convert_tracker_data(data: Dictionary) -> void:
	data["face"]["transform"] = to_transform(data["face"]["transform"])
	for side in data["hands"]:
		var hand = data["hands"][side]
		# Convert untyped array of arrays to typed Array[Vector3].
		var image_landmarks = hand["image_landmarks"].map(to_vector)
		var world_landmarks = hand["world_landmarks"].map(to_vector)
		hand["image_landmarks"] = Array(image_landmarks, TYPE_VECTOR3, "", null)
		hand["world_landmarks"] = Array(world_landmarks, TYPE_VECTOR3, "", null)

func to_vector(array) -> Vector3:
	return Vector3(array[0], array[1], array[2])

func to_transform(matrix) -> Transform3D:
	return Transform3D(
		Basis(Vector3(matrix[0][0], matrix[1][0], matrix[2][0]),
		      Vector3(matrix[0][1], matrix[1][1], matrix[2][1]),
		      Vector3(matrix[0][2], matrix[1][2], matrix[2][2])),
		Vector3(matrix[0][3], matrix[1][3], matrix[2][3]))

# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------

const WRIST             :=  0
const THUMB_CMC         :=  1
const THUMB_MCP         :=  2
const THUMB_IP          :=  3
const THUMB_TIP         :=  4
const INDEX_FINGER_MCP  :=  5
const INDEX_FINGER_PIP  :=  6
const INDEX_FINGER_DIP  :=  7
const INDEX_FINGER_TIP  :=  8
const MIDDLE_FINGER_MCP :=  9
const MIDDLE_FINGER_PIP := 10
const MIDDLE_FINGER_DIP := 12
const MIDDLE_FINGER_TIP := 13
const RING_FINGER_MCP   := 14
const RING_FINGER_PIP   := 15
const RING_FINGER_DIP   := 16
const RING_FINGER_TIP   := 17
const PINKY_MCP         := 18
const PINKY_PIP         := 19
const PINKY_DIP         := 20
const PINKY_TIP         := 21

# FIXME: I changed the way this was calculated and it doesn't quite fit the data right?
func get_hand_rotation(landmarks: Array[Vector3]) -> Basis:
	var knuckles_center   := (landmarks[INDEX_FINGER_MCP] + landmarks[RING_FINGER_TIP]) / 2
	var wrist_to_knuckles := landmarks[WRIST].direction_to(knuckles_center)
	var towards_thumb     := landmarks[RING_FINGER_TIP].direction_to(landmarks[INDEX_FINGER_MCP])

	var up := wrist_to_knuckles.cross(towards_thumb)
	return Basis.looking_at(wrist_to_knuckles, up, true)

## Attempt to figure out the hand origin in viewspace.
## `hand_to_head_scale` is a fudge value so that we can attempt
## to force the hand and head into the same scale range, roughly.
func get_hand_viewspace_origin(
	image_landmarks: Array[Vector3],
	_world_landmarks: Array[Vector3],
	hand_to_head_scale: float,
) -> Vector3:
	# Values found through experimentation.
	var known_distances := [
		[ WRIST             , THUMB_CMC         , 0.053861 ],
		[ THUMB_CMC         , THUMB_MCP         , 0.057096 ],
		[ THUMB_MCP         , THUMB_IP          , 0.048795 ],
		[ THUMB_IP          , THUMB_TIP         , 0.039851 ],
		[ WRIST             , INDEX_FINGER_MCP  , 0.152538 ],
		[ WRIST             , RING_FINGER_TIP   , 0.138711 ],
		[ INDEX_FINGER_MCP  , MIDDLE_FINGER_MCP , 0.029368 ],
		[ MIDDLE_FINGER_MCP , MIDDLE_FINGER_TIP , 0.027699 ],
		[ MIDDLE_FINGER_TIP , RING_FINGER_TIP   , 0.032673 ],
	]
	# FIXME: Hardcoded fudge-factor
	for d in known_distances: d[2] *= 0.25

	# Iterate through known distances and add up the weighted average.
	var fake_z_avg       := 0.0
	var total_avg_weight := 0.0
	for d in known_distances:
		var pt0 := image_landmarks[d[0]]
		var pt1 := image_landmarks[d[1]]

		# Figure out a weighted average based on how much the vector
		# is facing the camera Z axis. Stuff facing into the camera
		# has less accurate results, so weight it lower.
		var normvec := (pt0 - pt1).normalized()
		var weight  := clampf(1.0 - 2.0 * abs(normvec[2]), 0.0, 1.0)

		# Add to the average.
		fake_z_avg += guess_depth_from_known_distance(
			pt0, pt1, d[2] / hand_to_head_scale) * weight
		total_avg_weight += weight

	if abs(total_avg_weight) < 0.000001:
		print("HEY THE THING HAPPENED", total_avg_weight)
		# FIXME: Fudge value because I'm tired of this thing throwing
		#   exceptions all the time. Do an actual fix later.
		total_avg_weight = 0.01

	# Finish the average.
	fake_z_avg = fake_z_avg / total_avg_weight

	return ndc_to_viewspace(image_landmarks[0], -fake_z_avg)

## Figure out a depth value based on the distance between known
## normalized (clip-space) coordinates of landmarks, compared to what
## we would expect the average distance between those points to be.
func guess_depth_from_known_distance(left: Vector3, right: Vector3, distance: float) -> float:
	var dist_clip := left - right
	dist_clip.x *= camera_aspect_ratio # FIXME: Fudge factor
	return 1.0 / (dist_clip.length() / distance)

func ndc_to_viewspace(v: Vector3, z_offset: float) -> Vector3:
	# This (px, py) is pretty important and Google's
	# documentation didn't give much useful info about it.
	var px := 0.5
	var py := 0.5

	# These default to 1.0, 1.0 according to Google's docs.
	# I guess that's probably fine for default camera stuff.
	var fx := 1.0
	var fy := camera_aspect_ratio

	# Inverse equation from the section on NDC space here
	# https://google.github.io/mediapipe/solutions/objectron.html#coordinate-systems
	# https://web.archive.org/web/20220727063132/https://google.github.io/mediapipe/solutions/objectron.html#coordinate-systems
	# which describes going from camera coordinates to NDC space. It's kinda
	# ambiguous on terms, but this seems to work to get view space coordinates.

	# With this, coordinates seem to be evenly scaled (between x/y and z) and in view space.
	var z_scale := 1.0
	var z := 1.0 / (-v[2] + (1.0 / z_offset) * z_scale)
	var x := (v[0] - px) * z / fx
	var y := (v[1] - py) * z / fy
	return Vector3(x, y, z)
Initial commit 4 weeks ago			`class_name copyMediaPipe`
			`extends Mod_Base`

			`# FIXME: Best to get this from the tracker process (if possible).`
			`var camera_aspect_ratio := 4.0 / 3.0 # Logitech C920 default?`

			`@onready var tracker_head : Node3D = $TrackingRoot/Head`
			`@onready var tracker_hand_left : Node3D = $TrackingRoot/LeftHand`
			`@onready var tracker_hand_right : Node3D = $TrackingRoot/RightHand`
			`@onready var landmark_template : MeshInstance3D = $TrackingRoot/LandmarkTemplate`
			`@onready var landmarks_hand_left : Array[MeshInstance3D] = []`
			`@onready var landmarks_hand_right : Array[MeshInstance3D] = []`

			`@onready var hands := {`
			`left = {`
			`tracker = tracker_hand_left,`
			`landmarks = landmarks_hand_left,`
			`},`
			`right = {`
			`tracker = tracker_hand_right,`
			`landmarks = landmarks_hand_right,`
			`},`
			`}`

			`func _ready() -> void:`
			`setup_hand_landmarks()`

			`var dir = get_script().get_path().get_base_dir()`
			`var path = dir.path_join("_tracker/Project/new_new_tracker.py")`
			`python_process = KiriPythonWrapperInstance.new(path)`
			`if not python_process.setup_python(false):`
			`OS.alert("Failed to setup tracker dependencies!")`

			`start_process()`
			`# FIXME: Don't hardcode the video device.`
			`set_video_device(get_video_devices()[0])`
			`start_tracker()`

			`func _exit_tree() -> void:`
			`stop_tracker()`
			`stop_process()`

			`# Called after mod is initialized or model is changed.`
			`func scene_init():`
			`pass`

			`# Called before mod is removed, model is changed or application is shut down.`
			`func scene_shutdown():`
			`pass`

			`func _process(_delta: float) -> void:`
			`if is_tracker_running():`
			`receive_tracker_packets()`

			`func setup_hand_landmarks() -> void:`
			`for side in hands:`
			`var hand = hands[side]`
			`for i in 21:`
			`var landmark: MeshInstance3D = landmark_template.duplicate(0)`
			`landmark.position = Vector3.ZERO`
			`landmark.visible = true`
			`hand.tracker.add_child(landmark)`
			`hand.landmarks.append(landmark)`

			`# -----------------------------------------------------------------------------`
			`# Functions to start/stop the PYTHON TRACKER PROCESS and communicate with it.`
			`# -----------------------------------------------------------------------------`

			`var python_process: KiriPythonWrapperInstance`

			`func start_process() -> void:`
			`python_process.start_process(false)`

			`func stop_process() -> void:`
			`python_process.stop_process()`

			`func is_process_running() -> bool:`
			`return python_process.get_status() == KiriPythonWrapperInstance.KiriPythonWrapperStatus.STATUS_RUNNING`

			`# [{ name: String, backend: String, path: String, index: int }]`
			`func get_video_devices() -> Array:`
			`assert(is_process_running())`
			`var devices = python_process.call_rpc_sync("enumerate_camera_devices", [])`
			`return devices if devices is Array else []`

			`func set_video_device(device) -> void:`
			`assert(is_process_running())`
			`var index: int = device.index if device else -1`
			`python_process.call_rpc_sync("set_video_device_number", [ index ])`

			`# -----------------------------------------------------------------------------`
			`# Functions to start/stop the TRACKER and receive packets coming from it.`
			`# -----------------------------------------------------------------------------`

			`var base_port := 7098`
			`var udp_server: PacketPeerUDP`
			`var udp_server_port: int`

			`func start_tracker() -> void:`
			`assert(!is_tracker_running())`

			`udp_server = PacketPeerUDP.new()`
			`# Find a port number that's open to use.`
			`udp_server_port = base_port`
			`while udp_server.bind(udp_server_port, "127.0.0.1") != OK:`
			`udp_server_port += 1`

			`python_process.call_rpc_sync("set_udp_port_number", [ udp_server_port ])`
			`python_process.call_rpc_sync("start_tracker", [])`

			`func stop_tracker() -> void:`
			`if !is_tracker_running(): return # Do nothing if tracker isn't running.`
			`python_process.call_rpc_sync("stop_tracker", [])`
			`udp_server.close()`
			`udp_server = null`

			`func is_tracker_running() -> bool:`
			`return udp_server != null`

			`func receive_tracker_packets() -> void:`
			`assert(is_tracker_running())`
			`while true:`
			`var bytes := udp_server.get_packet()`
			`if bytes.size() == 0: break`
			`var data = JSON.parse_string(bytes.get_string_from_utf8())`
			`if data is Dictionary: process_tracker_data(data)`

			`# -----------------------------------------------------------------------------`
			`# Functions to PROCESS the incoming TRACKER DATA, and update tracker objects.`
			`# -----------------------------------------------------------------------------`

			`func process_tracker_data(data: Dictionary) -> void:`
			`if "error" in data: on_tracker_error(data.error); return`
			`if "status" in data: on_tracker_status(data.status); return`
			`convert_tracker_data(data)`

			`# MediaPipe reports hands from a viewer's perspective, not the`
			`# person's own actual left and right hand, so swap them out here.`
			`var left = data["hands"]["left"]`
			`var right = data["hands"]["right"]`
			`data["hands"]["left"] = right`
			`data["hands"]["right"] = left`

			`tracker_head.transform = data["face"]["transform"]`
			`tracker_head.position /= 100 # Centimeters to meters.`

			`# TODO: Actually use this.`
			`var num_hands_detected := 0`
			`for side in hands:`
			`var hand = hands[side]`
			`var tracker: Node3D = hand.tracker`

			`# TODO: Don't automatically trust the handedness of the input data.`
			`var hand_data = data["hands"][side]`
			`var image_landmarks: Array[Vector3] = hand_data["image_landmarks"]`
			`var world_landmarks: Array[Vector3] = hand_data["world_landmarks"]`

			`# FIXME: Make this configurable.`
			`var min_confidence_threshold := 0.85`
			`if hand_data["confidence"] < min_confidence_threshold: continue`
			`num_hands_detected += 1`

			`# Mirror position on the X axis, since image landmarks are in view space.`
			`for i in image_landmarks.size(): image_landmarks[i].x = (1 - image_landmarks[i].x)`

			`tracker.basis = get_hand_rotation(world_landmarks)`
			`tracker.position = get_hand_viewspace_origin(image_landmarks, world_landmarks, 2.0) \`
			`* Vector3(7.0, 7.0, 3.5) # FIXME: Fudge factor to match better with world space.`

			`# Translate landmarks so the origin is at the wrist.`
			`var wrist_position := world_landmarks[0]`
			`# World landmarks are in world space, so we have to "subtract" the hand rotation.`
			`# Also, the rotation is all wrong, so apply that here as well.`
			`var hand_rotation := tracker.basis.inverse() * Basis.from_euler(Vector3(TAU / 2, 0, 0))`
			`for i in world_landmarks.size():`
			`var pos := world_landmarks[i] - wrist_position`
			`hand.landmarks[i].position = hand_rotation * pos`

			`# TODO: Interpolation needs to be done outside of this function,`
			`# as it could be called multiple times a frame, or not at all.`

			`# Smoothly interpolate tracker transforms (in a framerate-independent way).`
			`# var f := 0.0000000001 # Yes this value needs to be THAT small.`
			`# tracker_head .transform = tracker_head .transform.interpolate_with(head_transform , 1 - f ** delta)`
			`# tracker_hand_left .transform = tracker_hand_left .transform.interpolate_with(hand_left_transform , 1 - f ** delta)`
			`# tracker_hand_right.transform = tracker_hand_right.transform.interpolate_with(hand_right_transform, 1 - f ** delta)`

			`func on_tracker_status(status: String) -> void:`
			`set_status(status)`

			`func on_tracker_error(error: String) -> void:`
			`print_log("Error: " + error)`

			`# -----------------------------------------------------------------------------`
			`# Functions that deal with CONVERTING the TRACKER DATA to Godot types.`
			`# -----------------------------------------------------------------------------`

			`## Converts the arrays inside data to known data types like Vector3 and Transform3D.`
			`func convert_tracker_data(data: Dictionary) -> void:`
			`data["face"]["transform"] = to_transform(data["face"]["transform"])`
			`for side in data["hands"]:`
			`var hand = data["hands"][side]`
			`# Convert untyped array of arrays to typed Array[Vector3].`
			`var image_landmarks = hand["image_landmarks"].map(to_vector)`
			`var world_landmarks = hand["world_landmarks"].map(to_vector)`
			`hand["image_landmarks"] = Array(image_landmarks, TYPE_VECTOR3, "", null)`
			`hand["world_landmarks"] = Array(world_landmarks, TYPE_VECTOR3, "", null)`

			`func to_vector(array) -> Vector3:`
			`return Vector3(array[0], array[1], array[2])`

			`func to_transform(matrix) -> Transform3D:`
			`return Transform3D(`
			`Basis(Vector3(matrix[0][0], matrix[1][0], matrix[2][0]),`
			`Vector3(matrix[0][1], matrix[1][1], matrix[2][1]),`
			`Vector3(matrix[0][2], matrix[1][2], matrix[2][2])),`
			`Vector3(matrix[0][3], matrix[1][3], matrix[2][3]))`

			`# -----------------------------------------------------------------------------`
			`# -----------------------------------------------------------------------------`

			`const WRIST := 0`
			`const THUMB_CMC := 1`
			`const THUMB_MCP := 2`
			`const THUMB_IP := 3`
			`const THUMB_TIP := 4`
			`const INDEX_FINGER_MCP := 5`
			`const INDEX_FINGER_PIP := 6`
			`const INDEX_FINGER_DIP := 7`
			`const INDEX_FINGER_TIP := 8`
			`const MIDDLE_FINGER_MCP := 9`
			`const MIDDLE_FINGER_PIP := 10`
			`const MIDDLE_FINGER_DIP := 12`
			`const MIDDLE_FINGER_TIP := 13`
			`const RING_FINGER_MCP := 14`
			`const RING_FINGER_PIP := 15`
			`const RING_FINGER_DIP := 16`
			`const RING_FINGER_TIP := 17`
			`const PINKY_MCP := 18`
			`const PINKY_PIP := 19`
			`const PINKY_DIP := 20`
			`const PINKY_TIP := 21`

			`# FIXME: I changed the way this was calculated and it doesn't quite fit the data right?`
			`func get_hand_rotation(landmarks: Array[Vector3]) -> Basis:`
			`var knuckles_center := (landmarks[INDEX_FINGER_MCP] + landmarks[RING_FINGER_TIP]) / 2`
			`var wrist_to_knuckles := landmarks[WRIST].direction_to(knuckles_center)`
			`var towards_thumb := landmarks[RING_FINGER_TIP].direction_to(landmarks[INDEX_FINGER_MCP])`

			`var up := wrist_to_knuckles.cross(towards_thumb)`
			`return Basis.looking_at(wrist_to_knuckles, up, true)`

			`## Attempt to figure out the hand origin in viewspace.`
			## `hand_to_head_scale` is a fudge value so that we can attempt
			`## to force the hand and head into the same scale range, roughly.`
			`func get_hand_viewspace_origin(`
			`image_landmarks: Array[Vector3],`
			`_world_landmarks: Array[Vector3],`
			`hand_to_head_scale: float,`
			`) -> Vector3:`
			`# Values found through experimentation.`
			`var known_distances := [`
			`[ WRIST , THUMB_CMC , 0.053861 ],`
			`[ THUMB_CMC , THUMB_MCP , 0.057096 ],`
			`[ THUMB_MCP , THUMB_IP , 0.048795 ],`
			`[ THUMB_IP , THUMB_TIP , 0.039851 ],`
			`[ WRIST , INDEX_FINGER_MCP , 0.152538 ],`
			`[ WRIST , RING_FINGER_TIP , 0.138711 ],`
			`[ INDEX_FINGER_MCP , MIDDLE_FINGER_MCP , 0.029368 ],`
			`[ MIDDLE_FINGER_MCP , MIDDLE_FINGER_TIP , 0.027699 ],`
			`[ MIDDLE_FINGER_TIP , RING_FINGER_TIP , 0.032673 ],`
			`]`
			`# FIXME: Hardcoded fudge-factor`
			`for d in known_distances: d[2] *= 0.25`

			`# Iterate through known distances and add up the weighted average.`
			`var fake_z_avg := 0.0`
			`var total_avg_weight := 0.0`
			`for d in known_distances:`
			`var pt0 := image_landmarks[d[0]]`
			`var pt1 := image_landmarks[d[1]]`

			`# Figure out a weighted average based on how much the vector`
			`# is facing the camera Z axis. Stuff facing into the camera`
			`# has less accurate results, so weight it lower.`
			`var normvec := (pt0 - pt1).normalized()`
			`var weight := clampf(1.0 - 2.0 * abs(normvec[2]), 0.0, 1.0)`

			`# Add to the average.`
			`fake_z_avg += guess_depth_from_known_distance(`
			`pt0, pt1, d[2] / hand_to_head_scale) * weight`
			`total_avg_weight += weight`

			`if abs(total_avg_weight) < 0.000001:`
			`print("HEY THE THING HAPPENED", total_avg_weight)`
			`# FIXME: Fudge value because I'm tired of this thing throwing`
			`# exceptions all the time. Do an actual fix later.`
			`total_avg_weight = 0.01`

			`# Finish the average.`
			`fake_z_avg = fake_z_avg / total_avg_weight`

			`return ndc_to_viewspace(image_landmarks[0], -fake_z_avg)`

			`## Figure out a depth value based on the distance between known`
			`## normalized (clip-space) coordinates of landmarks, compared to what`
			`## we would expect the average distance between those points to be.`
			`func guess_depth_from_known_distance(left: Vector3, right: Vector3, distance: float) -> float:`
			`var dist_clip := left - right`
			`dist_clip.x *= camera_aspect_ratio # FIXME: Fudge factor`
			`return 1.0 / (dist_clip.length() / distance)`

			`func ndc_to_viewspace(v: Vector3, z_offset: float) -> Vector3:`
			`# This (px, py) is pretty important and Google's`
			`# documentation didn't give much useful info about it.`
			`var px := 0.5`
			`var py := 0.5`

			`# These default to 1.0, 1.0 according to Google's docs.`
			`# I guess that's probably fine for default camera stuff.`
			`var fx := 1.0`
			`var fy := camera_aspect_ratio`

			`# Inverse equation from the section on NDC space here`
			`# https://google.github.io/mediapipe/solutions/objectron.html#coordinate-systems`
			`# https://web.archive.org/web/20220727063132/https://google.github.io/mediapipe/solutions/objectron.html#coordinate-systems`
			`# which describes going from camera coordinates to NDC space. It's kinda`
			`# ambiguous on terms, but this seems to work to get view space coordinates.`

			`# With this, coordinates seem to be evenly scaled (between x/y and z) and in view space.`
			`var z_scale := 1.0`
			`var z := 1.0 / (-v[2] + (1.0 / z_offset) * z_scale)`
			`var x := (v[0] - px) * z / fx`
			`var y := (v[1] - py) * z / fy`
			`return Vector3(x, y, z)`