diff --git a/.gitignore b/.gitignore index 36b13f1..dd8bc75 100644 --- a/.gitignore +++ b/.gitignore @@ -174,3 +174,6 @@ cython_debug/ # PyPI configuration file .pypirc + +# mimicrig outputs +out/ diff --git a/README.md b/README.md index 6ae40dd..1466754 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,65 @@ # mimicrig -Mapping 2D photo poses to Mixamo Blender rigs \ No newline at end of file +Mapping 2D photo poses to Mixamo Blender rigs. + +Single command line: takes a reference image and a `.blend` containing a Mixamo-rigged character, and produces a posed `.blend` plus a front-view preview PNG. + +```bash +./pose.sh +``` + +Outputs: +- `` — the posed scene +- `.png` — front-view preview render (same basename) + +## Example + +```bash +./pose.sh inputs/pose-test.png inputs/rosella-hunyuan-online.blend out/pose-test_posed.blend +``` + +## How it works + +1. **RTMPose3D inference** on the reference image, in the `rtmpose3d` conda env, produces 133 3D keypoint coordinates (COCO-WholeBody). +2. **`apply_pose.py`** runs inside Blender: + - Picks the first armature whose bone names start with `mixamorig:` and which has a mesh child (skips OpenPose debug rigs etc.). + - Clears any animation and resets to rest pose. + - **Pure FK direction matching**: for each major bone, rotates so its Y axis points from one keypoint to the next, processed parent-first. + - **Head yaw via `orient_bone()`**: the neck uses a full two-axis orientation (vertical Y + horizontal ear axis projection) to capture head turn around the vertical axis. `point_bone` alone can't represent rotation around the bone's own axis. + - **Forward-monotone arm chain**: mirrors arm joints whose Y is behind their parent — a safety net for monocular depth flips that put wrists behind the body. + - **Floor settle**: after rotations, translates the whole armature in Z so the lowest foot point sits at world Z=0. Fixes the "floating squat" problem for low-stance poses. +3. **Preview render**: front-view, framed to the posed character, lit and saved alongside the output `.blend`. + +### Bones the script does NOT rotate + +- **Hips, Spine, Spine1, Spine2**: keeping the spine straight avoids unnatural torso lean from monocular depth noise. +- **Head**: rest pose has the face pointing forward; rotating it from a single nose keypoint causes chin-down tilt because RTMPose3D places the nose well in front of the shoulders. Yaw is captured via the neck instead. +- **Feet/toes**: RTMPose3D's toe Z is consistently too low, which would make the feet point downward like ballet pointe. Rest pose (feet flat) is more accurate for standing/sitting poses. +- **Clavicle (Shoulder)**: doubly-rotating clavicle + upper arm causes the shoulder joint to over-extend. + +## Best results + +- Use a **head-on full-body reference image**. RTMPose3D's monocular depth estimation is much cleaner from a frontal view; angled views get the wrong front/back for individual joints. +- The character mesh and rig should be in **Mixamo bone-name convention** (`mixamorig:LeftArm` etc.). +- Rest pose should have the character **facing -Y** in world (Mixamo default). + +## Layout + +``` +mimicrig/ +├── pose.sh # CLI wrapper +├── apply_pose.py # Blender script (retargeting + preview render) +├── inputs/ # Test reference images and the rig blend +│ ├── *.png +│ └── rosella-hunyuan-online.blend +└── out/ # Outputs (created on first run) +``` + +## Dependencies + +- **Blender 4.x** at `/opt/blender-4.3.2-linux-x64/blender` (override via `$BLENDER`) +- **Conda env `rtmpose3d`** with mmpose + mmdet + mmcv installed +- **mmpose checkout** with the `projects/rtmpose3d` directory, defaulting to `~/dev/playground/mmpose` (override via `$MMPOSE_DIR`) +- **RTMPose3D model weights** cached at `~/.cache/torch/hub/checkpoints/` — auto-downloaded by the demo script on first run + +The mmpose checkout is treated as an external **library** here; this repo only holds the retargeting glue. diff --git a/apply_pose.py b/apply_pose.py new file mode 100644 index 0000000..a3318ce --- /dev/null +++ b/apply_pose.py @@ -0,0 +1,299 @@ +""" +Blender script: open a .blend, retarget keypoints to its Mixamo armature, +save, and render a front-view preview. + +Invoked by Blender: + blender --background INPUT.blend --python apply_pose.py \ + -- JSON_PATH OUTPUT.blend [PREVIEW.png] + +Picks the first armature whose bone names start with "mixamorig:" and which +has a mesh child (the character mesh, as opposed to debug rigs). +""" +import json +import sys + +import bpy +from mathutils import Matrix, Vector + +# ---- args after '--' ---- +argv = sys.argv +if "--" not in argv: + raise SystemExit("Pass keypoint JSON and output blend path after '--'") +extra = argv[argv.index("--") + 1:] +JSON_PATH = extra[0] +OUTPUT_BLEND = extra[1] +PREVIEW_PNG = extra[2] if len(extra) > 2 else None + + +# ---- keypoint indices (COCO-WholeBody) ---- +KP = { + "left_ear": 3, "right_ear": 4, + "left_shoulder": 5, "right_shoulder": 6, + "left_elbow": 7, "right_elbow": 8, + "left_wrist": 9, "right_wrist": 10, + "left_hip": 11, "right_hip": 12, + "left_knee": 13, "right_knee": 14, + "left_ankle": 15, "right_ankle": 16, + "left_hand_root": 91, "right_hand_root": 112, +} + +# Bone -> (head keypoint, tail keypoint). Special tokens: "shoulders" = +# midpoint of L/R shoulders. +# +# Hips, Spine bones, Shoulder (clavicle), Head, and Feet stay at rest - the +# rest pose already encodes sensible defaults (forward-facing torso, feet +# flat) that RTMPose3D's noisy nose/toe depth would otherwise distort. +# +# The Neck bone is handled separately in apply_pose() with orient_bone() +# so both head pitch (shoulders -> ears midpoint, Y axis) and head yaw +# (right_ear -> left_ear, X axis) are captured. point_bone alone can't +# represent rotation around the bone's own axis. +BONE_TARGETS = [ + ("mixamorig:LeftArm", "left_shoulder", "left_elbow"), + ("mixamorig:LeftForeArm", "left_elbow", "left_wrist"), + ("mixamorig:LeftHand", "left_wrist", "left_hand_root"), + ("mixamorig:RightArm", "right_shoulder", "right_elbow"), + ("mixamorig:RightForeArm", "right_elbow", "right_wrist"), + ("mixamorig:RightHand", "right_wrist", "right_hand_root"), + ("mixamorig:LeftUpLeg", "left_hip", "left_knee"), + ("mixamorig:LeftLeg", "left_knee", "left_ankle"), + ("mixamorig:RightUpLeg", "right_hip", "right_knee"), + ("mixamorig:RightLeg", "right_knee", "right_ankle"), +] + + +def load_keypoints(path): + with open(path) as f: + return json.load(f)["instance_info"][0]["keypoints"] + + +def to_world(pos): + # RTMPose3D camera frame -> Blender world. Negate X and Y (180-degree + # rotation around Z) so the character faces -Y in Mixamo convention. + return Vector((-pos[0], -pos[1], pos[2])) + + +def enforce_forward_arms(positions): + """Mirror Y across parent to fix monocular depth flips in arm chains.""" + for side in ("left", "right"): + s = positions[KP[f"{side}_shoulder"]] + e = positions[KP[f"{side}_elbow"]] + w = positions[KP[f"{side}_wrist"]] + if e.y > s.y: + e = Vector((e.x, 2.0 * s.y - e.y, e.z)) + positions[KP[f"{side}_elbow"]] = e + if w.y > e.y: + w = Vector((w.x, 2.0 * e.y - w.y, w.z)) + positions[KP[f"{side}_wrist"]] = w + + +def find_character_armature(): + """Return the Mixamo armature that's driving the character mesh.""" + candidates = [] + for o in bpy.data.objects: + if o.type != "ARMATURE": + continue + bones = list(o.pose.bones) + if not any(b.name.startswith("mixamorig:") for b in bones): + continue + has_mesh_child = any(c.type == "MESH" for c in bpy.data.objects if c.parent == o) + candidates.append((o, has_mesh_child, len(bones))) + if not candidates: + raise RuntimeError("No Mixamo armature found in this blend file") + # Prefer one with a mesh child; tiebreak on bone count (richer rig). + candidates.sort(key=lambda c: (-int(c[1]), -c[2])) + return candidates[0][0] + + +def reset_to_rest(arm): + if arm.animation_data: + arm.animation_data_clear() + bpy.context.view_layer.objects.active = arm + bpy.ops.object.mode_set(mode="POSE") + bpy.ops.pose.transforms_clear() + bpy.context.view_layer.update() + + +def _world_to_local_dir(arm, v_w): + """Convert a world-space direction to armature-local, ignoring scale.""" + return arm.matrix_world.to_quaternion().inverted() @ v_w + + +def point_bone(pbone, direction_w): + """Rotate pbone so its Y axis points along direction_w (world space). + + Uses the rotation-only quaternion of matrix_world for world-to-local + conversion, so non-uniform armature scale doesn't distort directions. + """ + arm = pbone.id_data + d = _world_to_local_dir(arm, direction_w) + if d.length < 1e-6: + return + d.normalize() + rot = pbone.y_axis.normalized().rotation_difference(d) + head_pos = pbone.matrix.to_translation() + new_3x3 = rot.to_matrix() @ pbone.matrix.to_3x3() + pbone.matrix = Matrix.Translation(head_pos) @ new_3x3.to_4x4() + + +def orient_bone(pbone, y_dir_w, x_dir_w): + """Set pbone's full rotation: Y axis along y_dir_w, X axis along x_dir_w. + + x_dir_w is orthonormalized against y_dir_w. Captures rotations that + point_bone misses - notably the rotation AROUND the bone's Y axis (e.g. + head yaw turning left/right). + """ + arm = pbone.id_data + y = _world_to_local_dir(arm, y_dir_w) + x_raw = _world_to_local_dir(arm, x_dir_w) + if y.length < 1e-6 or x_raw.length < 1e-6: + return + y.normalize() + x = x_raw - x_raw.dot(y) * y + if x.length < 1e-6: + return + x.normalize() + z = x.cross(y).normalized() + + new_3x3 = Matrix((x, y, z)).transposed() # columns are bone X, Y, Z axes + head_pos = pbone.matrix.to_translation() + pbone.matrix = Matrix.Translation(head_pos) @ new_3x3.to_4x4() + + +def joint_pos(positions, name): + """Look up a keypoint position, handling special midpoint tokens.""" + if name == "shoulders": + return (positions[KP["left_shoulder"]] + positions[KP["right_shoulder"]]) / 2 + return positions[KP[name]] + + +def apply_neck(positions, arm): + """Capture head YAW (left/right turn) only. + + The neck's Y axis (up direction) stays vertical, matching the rest pose. + The neck's X axis is set to the HORIZONTAL projection of the ear axis + (right_ear -> left_ear), which rotates the head around the vertical + axis to face the same direction as in the source pose. + + Why not also use shoulders->ears for Y direction? Because the ear + midpoint sits ~5 cm forward of the shoulder midpoint anatomically, + which gets read as a head-pitch-forward (chin-down) tilt and looks + wrong on a standing pose. Yaw is cleanly recoverable from a 2D + horizontal projection; pitch would need a more reliable reference. + """ + neck_name = "mixamorig:Neck" + if neck_name not in arm.pose.bones: + return + ear_axis = positions[KP["left_ear"]] - positions[KP["right_ear"]] + yaw_dir = Vector((ear_axis.x, ear_axis.y, 0.0)) + if yaw_dir.length < 1e-5: + return + y_up = Vector((0.0, 0.0, 1.0)) # Vertical world up + orient_bone(arm.pose.bones[neck_name], y_up, yaw_dir) + bpy.context.view_layer.update() + + +def settle_on_floor(arm): + """Translate the armature in world-Z so the lowest foot point sits at + Z=0. The Hips bone isn't translated by point_bone (rotation only), so + poses where the source's hips are lower than standing (squat, kneel, + sit) leave the rig floating. Lowering the whole armature is a simple + fix that preserves all relative bone rotations. + """ + bpy.context.view_layer.update() + foot_bone_names = ( + "mixamorig:LeftToeBase", "mixamorig:RightToeBase", + "mixamorig:LeftFoot", "mixamorig:RightFoot", + ) + foot_points = [] + for name in foot_bone_names: + if name in arm.pose.bones: + pb = arm.pose.bones[name] + foot_points.append(arm.matrix_world @ pb.head) + foot_points.append(arm.matrix_world @ pb.tail) + if not foot_points: + return + min_z = min(p.z for p in foot_points) + arm.location.z -= min_z + + +def apply_pose(positions, arm): + reset_to_rest(arm) + + apply_neck(positions, arm) + + for bone_name, head_kp, tail_kp in BONE_TARGETS: + if bone_name not in arm.pose.bones: + continue + head_w = joint_pos(positions, head_kp) + tail_w = joint_pos(positions, tail_kp) + point_bone(arm.pose.bones[bone_name], tail_w - head_w) + # Propagate so child y_axis reflects the new parent pose. + bpy.context.view_layer.update() + + settle_on_floor(arm) + + +def render_preview(arm, out_png): + """Render a quick front-view preview of the posed character.""" + # Hide everything except the character so the preview is clean. + keep = {arm.name} | {c.name for c in bpy.data.objects if c.parent == arm} + for o in bpy.data.objects: + if o.type in {"CAMERA", "LIGHT", "EMPTY"} or o.name in keep: + continue + o.hide_viewport = True + o.hide_render = True + + bpy.context.view_layer.update() + + # Frame to character bounds (toe tail to head tail, plus a bit of margin). + head = arm.matrix_world @ arm.pose.bones["mixamorig:Head"].tail + toe_bone = "mixamorig:LeftToeBase" if "mixamorig:LeftToeBase" in arm.pose.bones else "mixamorig:LeftFoot" + toe = arm.matrix_world @ arm.pose.bones[toe_bone].tail + target = (head + toe) * 0.5 + target.x = 0.0 + height = abs(head.z - toe.z) * 1.15 + cam_dist = max(2.0, height * 2.5) + + bpy.ops.object.camera_add(location=(0.0, -cam_dist, target.z)) + cam = bpy.context.active_object + cam.rotation_euler = (target - cam.location).to_track_quat("-Z", "Y").to_euler() + cam.data.lens = 85 + bpy.context.scene.camera = cam + + for loc, energy, sz in [((2, -3, 4), 600, 4), ((-3, -1, 3), 200, 6), ((0, 4, 4), 200, 4)]: + bpy.ops.object.light_add(type="AREA", location=loc) + L = bpy.context.active_object + L.data.energy = energy + L.data.size = sz + + world = bpy.data.worlds["World"] + world.use_nodes = True + world.node_tree.nodes["Background"].inputs["Color"].default_value = (0.65, 0.65, 0.7, 1.0) + + scene = bpy.context.scene + scene.render.engine = "BLENDER_EEVEE_NEXT" + scene.render.resolution_x = 720 + scene.render.resolution_y = 960 + scene.render.filepath = out_png + bpy.ops.render.render(write_still=True) + print(f"Preview: {out_png}") + + +def main(): + arm = find_character_armature() + print(f"Posing armature: {arm.name}") + keypoints = load_keypoints(JSON_PATH) + positions = [to_world(p) for p in keypoints] + enforce_forward_arms(positions) + apply_pose(positions, arm) + bpy.ops.object.mode_set(mode="OBJECT") + bpy.ops.wm.save_as_mainfile(filepath=OUTPUT_BLEND) + print(f"Saved: {OUTPUT_BLEND}") + + if PREVIEW_PNG: + render_preview(arm, PREVIEW_PNG) + + +if __name__ == "__main__": + main() diff --git a/inputs/0102.png b/inputs/0102.png new file mode 100644 index 0000000..5b93987 Binary files /dev/null and b/inputs/0102.png differ diff --git a/inputs/basepose.png b/inputs/basepose.png new file mode 100644 index 0000000..77c1388 Binary files /dev/null and b/inputs/basepose.png differ diff --git a/inputs/basepose2.png b/inputs/basepose2.png new file mode 100755 index 0000000..0916dac Binary files /dev/null and b/inputs/basepose2.png differ diff --git a/inputs/pose-test.png b/inputs/pose-test.png new file mode 100644 index 0000000..b3100e7 Binary files /dev/null and b/inputs/pose-test.png differ diff --git a/inputs/rosella-hunyuan-online.blend b/inputs/rosella-hunyuan-online.blend new file mode 100644 index 0000000..d800a60 Binary files /dev/null and b/inputs/rosella-hunyuan-online.blend differ diff --git a/inputs/squat.png b/inputs/squat.png new file mode 100644 index 0000000..8b7ff30 Binary files /dev/null and b/inputs/squat.png differ diff --git a/pose.sh b/pose.sh new file mode 100755 index 0000000..b97736f --- /dev/null +++ b/pose.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# Pose a Mixamo armature in a .blend file to match a reference image. +# +# Usage: +# ./pose.sh +# +# 1) Runs RTMPose3D inference on the image (conda env: rtmpose3d). +# 2) Opens the blend, finds the first mixamorig armature with a mesh child, +# retargets the keypoints onto it via pure FK direction matching, +# captures head yaw, lands the character on the floor, saves to +# , and writes a front-view preview to .png. +# +# Best results: use a head-on full-body reference image. RTMPose3D's depth +# is noisy from angled views and gets the wrong front/back for joints. +# +# Required environment: +# - conda env "rtmpose3d" with mmpose + mmdet + mmcv +# - Blender 4.x at $BLENDER (defaults to /opt/blender-4.3.2-linux-x64/blender) +# - RTMPose3D model weights at ~/.cache/torch/hub/checkpoints/ +# - mmpose checkout with the rtmpose3d project at $MMPOSE_DIR +# (defaults to ~/dev/playground/mmpose) + +set -euo pipefail + +if [[ $# -lt 3 ]]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +# Resolve paths against the user's pwd BEFORE any cd. +IMAGE="$(realpath "$1")" +INPUT_BLEND="$(realpath "$2")" +OUTPUT_BLEND="$(realpath -m "$3")" +mkdir -p "$(dirname "$OUTPUT_BLEND")" + +# Where this script lives - apply_pose.py is its sibling. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# External tools / data (override with env vars to relocate). +MMPOSE_DIR="${MMPOSE_DIR:-$HOME/dev/playground/mmpose}" +BLENDER="${BLENDER:-/opt/blender-4.3.2-linux-x64/blender}" +DET_CKPT="${DET_CKPT:-$HOME/.cache/torch/hub/checkpoints/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth}" +POSE_CKPT="${POSE_CKPT:-$HOME/.cache/torch/hub/checkpoints/rtmw3d-l_8xb64_cocktail14-384x288-794dbc78_20240626.pth}" + +[[ -d "$MMPOSE_DIR/projects/rtmpose3d" ]] || { + echo "MMPOSE_DIR does not contain projects/rtmpose3d: $MMPOSE_DIR" >&2; exit 1 +} +[[ -x "$BLENDER" ]] || { echo "Blender not found at $BLENDER" >&2; exit 1; } +[[ -s "$DET_CKPT" ]] || { echo "Detection checkpoint missing: $DET_CKPT" >&2; exit 1; } +[[ -s "$POSE_CKPT" ]] || { echo "Pose checkpoint missing: $POSE_CKPT" >&2; exit 1; } + +WORK_DIR="$(mktemp -d)" +trap 'rm -rf "$WORK_DIR"' EXIT + +echo "[1/2] Running RTMPose3D inference on $IMAGE ..." +# shellcheck disable=SC1091 +source "$HOME/miniconda3/etc/profile.d/conda.sh" +conda activate rtmpose3d +cd "$MMPOSE_DIR/projects/rtmpose3d" +export PYTHONPATH="$PWD:${PYTHONPATH:-}" +python demo/body3d_img2pose_demo.py \ + demo/rtmdet_m_640-8xb32_coco-person.py "$DET_CKPT" \ + configs/rtmw3d-l_8xb64_cocktail14-384x288.py "$POSE_CKPT" \ + --input "$IMAGE" --output-root "$WORK_DIR" \ + --save-predictions --device cpu + +INPUT_BASE="$(basename "${IMAGE%.*}")" +JSON="$WORK_DIR/results_${INPUT_BASE}.json" +[[ -s "$JSON" ]] || { echo "Inference did not produce $JSON" >&2; exit 1; } + +PREVIEW_PNG="${OUTPUT_BLEND%.blend}.png" + +echo "[2/2] Retargeting onto armature in $INPUT_BLEND ..." +"$BLENDER" --background "$INPUT_BLEND" \ + --python "$SCRIPT_DIR/apply_pose.py" \ + -- "$JSON" "$OUTPUT_BLEND" "$PREVIEW_PNG" + +echo "Done: $OUTPUT_BLEND" +echo "Preview: $PREVIEW_PNG"