Files
mimicrig/apply_pose.py
Bryce 6feb83c9e3 Add pose retargeting pipeline
Single command that runs RTMPose3D inference on a reference image and
retargets the keypoints onto a Mixamo-named armature in a .blend file:

    ./pose.sh <reference.png> <input.blend> <output.blend>

Components:
- pose.sh: shell wrapper that runs RTMPose3D in the rtmpose3d conda env
  then invokes Blender headless with apply_pose.py.
- apply_pose.py: opens the input .blend, finds the character armature,
  applies pure FK direction-matching for arms and legs, captures head
  yaw via orient_bone with the horizontal ear axis, lands the rig on
  the floor, saves the posed scene, and renders a front-view preview.
- inputs/: test references (basepose, basepose2, 0102, pose-test, squat)
  and the rosella-hunyuan-online rig blend.
- README.md: usage, design notes, and the list of bones intentionally
  left at rest (head, feet, spine, clavicles).

mmpose is treated as an external library at $MMPOSE_DIR; this repo is
only the retargeting glue.
2026-05-10 19:51:25 -07:00

300 lines
11 KiB
Python

"""
Blender script: open a .blend, retarget keypoints to its Mixamo armature,
save, and render a front-view preview.
Invoked by Blender:
blender --background INPUT.blend --python apply_pose.py \
-- JSON_PATH OUTPUT.blend [PREVIEW.png]
Picks the first armature whose bone names start with "mixamorig:" and which
has a mesh child (the character mesh, as opposed to debug rigs).
"""
import json
import sys
import bpy
from mathutils import Matrix, Vector
# ---- args after '--' ----
argv = sys.argv
if "--" not in argv:
raise SystemExit("Pass keypoint JSON and output blend path after '--'")
extra = argv[argv.index("--") + 1:]
JSON_PATH = extra[0]
OUTPUT_BLEND = extra[1]
PREVIEW_PNG = extra[2] if len(extra) > 2 else None
# ---- keypoint indices (COCO-WholeBody) ----
KP = {
"left_ear": 3, "right_ear": 4,
"left_shoulder": 5, "right_shoulder": 6,
"left_elbow": 7, "right_elbow": 8,
"left_wrist": 9, "right_wrist": 10,
"left_hip": 11, "right_hip": 12,
"left_knee": 13, "right_knee": 14,
"left_ankle": 15, "right_ankle": 16,
"left_hand_root": 91, "right_hand_root": 112,
}
# Bone -> (head keypoint, tail keypoint). Special tokens: "shoulders" =
# midpoint of L/R shoulders.
#
# Hips, Spine bones, Shoulder (clavicle), Head, and Feet stay at rest - the
# rest pose already encodes sensible defaults (forward-facing torso, feet
# flat) that RTMPose3D's noisy nose/toe depth would otherwise distort.
#
# The Neck bone is handled separately in apply_pose() with orient_bone()
# so both head pitch (shoulders -> ears midpoint, Y axis) and head yaw
# (right_ear -> left_ear, X axis) are captured. point_bone alone can't
# represent rotation around the bone's own axis.
BONE_TARGETS = [
("mixamorig:LeftArm", "left_shoulder", "left_elbow"),
("mixamorig:LeftForeArm", "left_elbow", "left_wrist"),
("mixamorig:LeftHand", "left_wrist", "left_hand_root"),
("mixamorig:RightArm", "right_shoulder", "right_elbow"),
("mixamorig:RightForeArm", "right_elbow", "right_wrist"),
("mixamorig:RightHand", "right_wrist", "right_hand_root"),
("mixamorig:LeftUpLeg", "left_hip", "left_knee"),
("mixamorig:LeftLeg", "left_knee", "left_ankle"),
("mixamorig:RightUpLeg", "right_hip", "right_knee"),
("mixamorig:RightLeg", "right_knee", "right_ankle"),
]
def load_keypoints(path):
with open(path) as f:
return json.load(f)["instance_info"][0]["keypoints"]
def to_world(pos):
# RTMPose3D camera frame -> Blender world. Negate X and Y (180-degree
# rotation around Z) so the character faces -Y in Mixamo convention.
return Vector((-pos[0], -pos[1], pos[2]))
def enforce_forward_arms(positions):
"""Mirror Y across parent to fix monocular depth flips in arm chains."""
for side in ("left", "right"):
s = positions[KP[f"{side}_shoulder"]]
e = positions[KP[f"{side}_elbow"]]
w = positions[KP[f"{side}_wrist"]]
if e.y > s.y:
e = Vector((e.x, 2.0 * s.y - e.y, e.z))
positions[KP[f"{side}_elbow"]] = e
if w.y > e.y:
w = Vector((w.x, 2.0 * e.y - w.y, w.z))
positions[KP[f"{side}_wrist"]] = w
def find_character_armature():
"""Return the Mixamo armature that's driving the character mesh."""
candidates = []
for o in bpy.data.objects:
if o.type != "ARMATURE":
continue
bones = list(o.pose.bones)
if not any(b.name.startswith("mixamorig:") for b in bones):
continue
has_mesh_child = any(c.type == "MESH" for c in bpy.data.objects if c.parent == o)
candidates.append((o, has_mesh_child, len(bones)))
if not candidates:
raise RuntimeError("No Mixamo armature found in this blend file")
# Prefer one with a mesh child; tiebreak on bone count (richer rig).
candidates.sort(key=lambda c: (-int(c[1]), -c[2]))
return candidates[0][0]
def reset_to_rest(arm):
if arm.animation_data:
arm.animation_data_clear()
bpy.context.view_layer.objects.active = arm
bpy.ops.object.mode_set(mode="POSE")
bpy.ops.pose.transforms_clear()
bpy.context.view_layer.update()
def _world_to_local_dir(arm, v_w):
"""Convert a world-space direction to armature-local, ignoring scale."""
return arm.matrix_world.to_quaternion().inverted() @ v_w
def point_bone(pbone, direction_w):
"""Rotate pbone so its Y axis points along direction_w (world space).
Uses the rotation-only quaternion of matrix_world for world-to-local
conversion, so non-uniform armature scale doesn't distort directions.
"""
arm = pbone.id_data
d = _world_to_local_dir(arm, direction_w)
if d.length < 1e-6:
return
d.normalize()
rot = pbone.y_axis.normalized().rotation_difference(d)
head_pos = pbone.matrix.to_translation()
new_3x3 = rot.to_matrix() @ pbone.matrix.to_3x3()
pbone.matrix = Matrix.Translation(head_pos) @ new_3x3.to_4x4()
def orient_bone(pbone, y_dir_w, x_dir_w):
"""Set pbone's full rotation: Y axis along y_dir_w, X axis along x_dir_w.
x_dir_w is orthonormalized against y_dir_w. Captures rotations that
point_bone misses - notably the rotation AROUND the bone's Y axis (e.g.
head yaw turning left/right).
"""
arm = pbone.id_data
y = _world_to_local_dir(arm, y_dir_w)
x_raw = _world_to_local_dir(arm, x_dir_w)
if y.length < 1e-6 or x_raw.length < 1e-6:
return
y.normalize()
x = x_raw - x_raw.dot(y) * y
if x.length < 1e-6:
return
x.normalize()
z = x.cross(y).normalized()
new_3x3 = Matrix((x, y, z)).transposed() # columns are bone X, Y, Z axes
head_pos = pbone.matrix.to_translation()
pbone.matrix = Matrix.Translation(head_pos) @ new_3x3.to_4x4()
def joint_pos(positions, name):
"""Look up a keypoint position, handling special midpoint tokens."""
if name == "shoulders":
return (positions[KP["left_shoulder"]] + positions[KP["right_shoulder"]]) / 2
return positions[KP[name]]
def apply_neck(positions, arm):
"""Capture head YAW (left/right turn) only.
The neck's Y axis (up direction) stays vertical, matching the rest pose.
The neck's X axis is set to the HORIZONTAL projection of the ear axis
(right_ear -> left_ear), which rotates the head around the vertical
axis to face the same direction as in the source pose.
Why not also use shoulders->ears for Y direction? Because the ear
midpoint sits ~5 cm forward of the shoulder midpoint anatomically,
which gets read as a head-pitch-forward (chin-down) tilt and looks
wrong on a standing pose. Yaw is cleanly recoverable from a 2D
horizontal projection; pitch would need a more reliable reference.
"""
neck_name = "mixamorig:Neck"
if neck_name not in arm.pose.bones:
return
ear_axis = positions[KP["left_ear"]] - positions[KP["right_ear"]]
yaw_dir = Vector((ear_axis.x, ear_axis.y, 0.0))
if yaw_dir.length < 1e-5:
return
y_up = Vector((0.0, 0.0, 1.0)) # Vertical world up
orient_bone(arm.pose.bones[neck_name], y_up, yaw_dir)
bpy.context.view_layer.update()
def settle_on_floor(arm):
"""Translate the armature in world-Z so the lowest foot point sits at
Z=0. The Hips bone isn't translated by point_bone (rotation only), so
poses where the source's hips are lower than standing (squat, kneel,
sit) leave the rig floating. Lowering the whole armature is a simple
fix that preserves all relative bone rotations.
"""
bpy.context.view_layer.update()
foot_bone_names = (
"mixamorig:LeftToeBase", "mixamorig:RightToeBase",
"mixamorig:LeftFoot", "mixamorig:RightFoot",
)
foot_points = []
for name in foot_bone_names:
if name in arm.pose.bones:
pb = arm.pose.bones[name]
foot_points.append(arm.matrix_world @ pb.head)
foot_points.append(arm.matrix_world @ pb.tail)
if not foot_points:
return
min_z = min(p.z for p in foot_points)
arm.location.z -= min_z
def apply_pose(positions, arm):
reset_to_rest(arm)
apply_neck(positions, arm)
for bone_name, head_kp, tail_kp in BONE_TARGETS:
if bone_name not in arm.pose.bones:
continue
head_w = joint_pos(positions, head_kp)
tail_w = joint_pos(positions, tail_kp)
point_bone(arm.pose.bones[bone_name], tail_w - head_w)
# Propagate so child y_axis reflects the new parent pose.
bpy.context.view_layer.update()
settle_on_floor(arm)
def render_preview(arm, out_png):
"""Render a quick front-view preview of the posed character."""
# Hide everything except the character so the preview is clean.
keep = {arm.name} | {c.name for c in bpy.data.objects if c.parent == arm}
for o in bpy.data.objects:
if o.type in {"CAMERA", "LIGHT", "EMPTY"} or o.name in keep:
continue
o.hide_viewport = True
o.hide_render = True
bpy.context.view_layer.update()
# Frame to character bounds (toe tail to head tail, plus a bit of margin).
head = arm.matrix_world @ arm.pose.bones["mixamorig:Head"].tail
toe_bone = "mixamorig:LeftToeBase" if "mixamorig:LeftToeBase" in arm.pose.bones else "mixamorig:LeftFoot"
toe = arm.matrix_world @ arm.pose.bones[toe_bone].tail
target = (head + toe) * 0.5
target.x = 0.0
height = abs(head.z - toe.z) * 1.15
cam_dist = max(2.0, height * 2.5)
bpy.ops.object.camera_add(location=(0.0, -cam_dist, target.z))
cam = bpy.context.active_object
cam.rotation_euler = (target - cam.location).to_track_quat("-Z", "Y").to_euler()
cam.data.lens = 85
bpy.context.scene.camera = cam
for loc, energy, sz in [((2, -3, 4), 600, 4), ((-3, -1, 3), 200, 6), ((0, 4, 4), 200, 4)]:
bpy.ops.object.light_add(type="AREA", location=loc)
L = bpy.context.active_object
L.data.energy = energy
L.data.size = sz
world = bpy.data.worlds["World"]
world.use_nodes = True
world.node_tree.nodes["Background"].inputs["Color"].default_value = (0.65, 0.65, 0.7, 1.0)
scene = bpy.context.scene
scene.render.engine = "BLENDER_EEVEE_NEXT"
scene.render.resolution_x = 720
scene.render.resolution_y = 960
scene.render.filepath = out_png
bpy.ops.render.render(write_still=True)
print(f"Preview: {out_png}")
def main():
arm = find_character_armature()
print(f"Posing armature: {arm.name}")
keypoints = load_keypoints(JSON_PATH)
positions = [to_world(p) for p in keypoints]
enforce_forward_arms(positions)
apply_pose(positions, arm)
bpy.ops.object.mode_set(mode="OBJECT")
bpy.ops.wm.save_as_mainfile(filepath=OUTPUT_BLEND)
print(f"Saved: {OUTPUT_BLEND}")
if PREVIEW_PNG:
render_preview(arm, PREVIEW_PNG)
if __name__ == "__main__":
main()