This commit is contained in:
2026-03-04 22:05:05 -08:00
parent 33dc00cb6a
commit 7c0ff65a19
10 changed files with 777 additions and 7 deletions

1
.gitignore vendored
View File

@@ -3,3 +3,4 @@ tools/venv/**
.import
addons
build/
.tmp/**

View File

@@ -0,0 +1,9 @@
---
description: Image Inspector
mode: subagent
model: local/Qwen3-VL
tools:
read: true
---
You are an image inspection expert. You will be asked questions about images and you will answer directly. You may need to read the image if you are given a path.

View File

@@ -0,0 +1,79 @@
---
name: image-inspector
description: Inspect images to answer Yes/No questions about visual content. Use when asking "Is a <thing> visible in this image?" or checking for specific objects, people, colors, text, or other visual elements. Always arrives at a definitive Yes/No conclusion.
---
# Image Inspector
Inspect images using the Qwen3-VL vision model to answer Yes/No questions about visual content.
## When to Use
Use this skill when you need to:
- Check if a specific object is present in an image
- Verify visual elements exist
- Answer binary questions about image content
- Confirm or deny the presence of things in images
## How It Works
1. You provide an image path and a Yes/No question
2. You resize the image to be a max of 1MP
3. Ask the @image-expert to examine the image, and return a Yes/No
4. You receive a definitive Yes or No answer
## Usage Pattern
### Step 1: Read the Image
Use the Read tool to load the image file. The Read tool can read image files and return them as attachments.
### Step 3: Resize the image to 1MP
Use imagemagick and resize to a maximum of 1MP, outputting to ./.tmp/
### Step 3: Formulate the Question
Ask @image-expert a clear Yes/No question about the image:
- "Is a [object] visible in this image?"
- "Does this image contain [element]?"
- "Can you see [thing] in this scene?"
### Step 3: Provide the Answer
After analyzing the (smaller) image, provide:
1. **The Answer**: Yes or No (always definitive)
2. **Brief Justification**: 1-2 sentences explaining why
## Example Questions
- "Is a tree visible in this image?"
- "Does this image contain a person wearing a hat?"
- "Is there text visible in this image?"
- "Can you see a water feature in this scene?"
- "Is the sky visible in this image?"
- "Does this image show an indoor scene?"
## Response Format
```
**Answer:** Yes/No
**Reasoning:** [1-2 sentences explaining what you see or don't see]
```
## Guidelines
- Always provide a definitive Yes or No answer
- Be specific about what you observe
- If uncertain, describe what you see and make your best judgment
- Don't hedge with "maybe" or "possibly" - commit to an answer
- Focus only on the specific question asked
## Limitations
- The model can only analyze what's visually apparent
- Small or partially obscured objects may be missed
- The model cannot zoom or enhance the image
- Text must be clearly legible to be detected

0
.tmp/.save Normal file
View File

View File

@@ -0,0 +1,8 @@
[gd_resource type="Resource" script_class="PolygonPointsResource" format=3 uid="uid://2oba97xunlssu"]
[ext_resource type="Script" uid="uid://dtemboas3bi8y" path="res://PolygonPointsResource.gd" id="1_ppr"]
[resource]
script = ExtResource("1_ppr")
points = PackedVector2Array(1147, 779, 1153, 854, 1152, 1053, 1151, 1054, 1055, 1046, 1043, 920, 1050, 781, 1051, 780)
metadata/_custom_type_script = "uid://dtemboas3bi8y"

View File

@@ -15,3 +15,9 @@ func _on_forest_path_interacted() -> void:
func _on_forest_grove_interacted() -> void:
$kq4_005_forest_grove.default_script(self)
func _on_door_looked() -> void:
start_main_script(ScriptBuilder.init(
ScriptBuilder.say(ego, "It's a sturdy wooden door to the ogre's cottage.")
).build(self, "_on_script_complete"))

View File

@@ -4,6 +4,8 @@
[ext_resource type="Texture2D" uid="uid://b1yeiwh8uqii2" path="res://scenes/kq4_004_ogres_cottage/caption_1_454377357_generated.png" id="2_u8g8b"]
[ext_resource type="Script" uid="uid://xmphq3i0wbg3" path="res://ScalePoint_.gd" id="3_kvdqi"]
[ext_resource type="PackedScene" uid="uid://c4vc1wx7k6cw" path="res://TransitionPiece.tscn" id="4_67nph"]
[ext_resource type="Resource" uid="uid://2oba97xunlssu" path="res://scenes/kq4_004_ogres_cottage/door_polygon.tres" id="5_door"]
[ext_resource type="Script" uid="uid://bounwnqg34t5k" path="res://SetPiece_.gd" id="6_setpiece"]
[sub_resource type="NavigationPolygon" id="NavigationPolygon_furs3"]
vertices = PackedVector2Array(325.656, 570.578, 582.328, 580.656, 525.289, 597.977, 454.133, 654.148, 445.539, 889.25, 919.219, 873.633, 1158.89, 691.359, 1294.26, 705.508, 1204.28, 907.469, -58.2969, 1146.06, -76.6797, 562, -6.6875, 562, 126.258, 883.891, 1990, 1160.92, 1662.15, 956.969, 1990, 977.391)
@@ -91,10 +93,19 @@ position = Vector2(24, 565)
[node name="exit" parent="kq4_005_forest_grove" index="1"]
position = Vector2(293, 554)
[node name="door" type="Polygon2D" parent="." groups=["set-piece"]]
scale = Vector2(0.783, 0.78)
color = Color(0.5, 0.5, 0.5, 0.25)
polygon = PackedVector2Array(1147, 779, 1153, 854, 1152, 1053, 1151, 1054, 1055, 1046, 1043, 920, 1050, 781, 1051, 780)
script = ExtResource("6_setpiece")
label = "Door"
points_resource = ExtResource("5_door")
[connection signal="interacted" from="kq4_028_mine_entrance" to="." method="_on_mine_entrance_interacted"]
[connection signal="interacted" from="kq4_003_fountain_pool" to="." method="_on_pool_interacted"]
[connection signal="interacted" from="kq4_010_forest_path" to="." method="_on_forest_path_interacted"]
[connection signal="interacted" from="kq4_005_forest_grove" to="." method="_on_forest_grove_interacted"]
[connection signal="looked" from="door" to="." method="_on_door_looked"]
[editable path="kq4_028_mine_entrance"]
[editable path="kq4_003_fountain_pool"]

252
tools/extract_mask.py Executable file
View File

@@ -0,0 +1,252 @@
#!/usr/bin/env python3
"""Extract a mask from an image using ComfyUI workflow."""
import base64
import json
import os
import sys
import time
import urllib.error
import urllib.request
import uuid
from urllib.parse import urlencode
def check_server(server_address: str = "127.0.0.1:8188", timeout: int = 5) -> bool:
"""Check if ComfyUI server is running and accessible."""
try:
req = urllib.request.Request(
f"http://{server_address}/system_stats",
method="GET",
)
with urllib.request.urlopen(req, timeout=timeout) as response:
return response.status == 200
except Exception:
return False
def encode_image_base64(image_path: str) -> str:
"""Encode an image file as base64 string."""
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
def queue_prompt(prompt: dict, server_address: str = "127.0.0.1:8188") -> dict:
"""Queue a prompt to ComfyUI server."""
client_id = str(uuid.uuid4())
p = {"prompt": prompt, "client_id": client_id}
data = json.dumps(p).encode("utf-8")
req = urllib.request.Request(
f"http://{server_address}/prompt",
data=data,
headers={"Content-Type": "application/json"},
)
try:
with urllib.request.urlopen(req) as response:
return json.loads(response.read())
except urllib.error.HTTPError as e:
error_body = e.read().decode("utf-8")
print(f"HTTP Error {e.code}: {error_body}")
raise
def get_history(prompt_id: str, server_address: str = "127.0.0.1:8188") -> dict:
"""Get the history/status of a prompt from ComfyUI."""
req = urllib.request.Request(
f"http://{server_address}/history/{prompt_id}",
method="GET",
)
try:
with urllib.request.urlopen(req) as response:
return json.loads(response.read())
except urllib.error.HTTPError as e:
error_body = e.read().decode("utf-8")
print(f"HTTP Error {e.code}: {error_body}")
raise
def download_image(
filename: str,
subfolder: str,
folder_type: str,
server_address: str = "127.0.0.1:8188",
) -> bytes:
"""Download an image from ComfyUI."""
params = {"filename": filename, "type": folder_type}
if subfolder:
params["subfolder"] = subfolder
url = f"http://{server_address}/view?{urlencode(params)}"
req = urllib.request.Request(url, method="GET")
with urllib.request.urlopen(req) as response:
return response.read()
def wait_for_prompt_completion(
prompt_id: str, server_address: str = "127.0.0.1:8188", timeout: int = 240
) -> dict | None:
"""Wait for a prompt to complete and return the output info."""
start_time = time.time()
while time.time() - start_time < timeout:
history = get_history(prompt_id, server_address)
if prompt_id in history:
prompt_history = history[prompt_id]
if "outputs" in prompt_history and prompt_history["outputs"]:
return prompt_history["outputs"]
time.sleep(0.5)
return None
def extract_mask(
subject: str,
input_image: str,
output_path: str,
server_address: str = "127.0.0.1:8188",
) -> str:
"""Extract mask from image for given subject.
Args:
subject: The subject to extract mask for (e.g., "the stump", "the door")
input_image: Path to the input image file
output_path: Path where the output mask should be saved
server_address: ComfyUI server address
Returns:
Path to the saved output mask
"""
script_dir = os.path.dirname(os.path.abspath(__file__))
workflow_path = os.path.join(script_dir, "image_mask_extraction.json")
with open(workflow_path, "r") as f:
workflow = json.load(f)
prompt_text = f"Create a black and white alpha mask of {subject}"
print(f"Encoding input image...")
base64_image = encode_image_base64(input_image)
workflow["1:68"]["inputs"]["prompt"] = prompt_text
workflow["87"]["inputs"]["image"] = base64_image
unique_id = str(uuid.uuid4())[:8]
filename_prefix = f"masks/mask_{unique_id}"
workflow["82"]["inputs"]["filename_prefix"] = filename_prefix
print(f"Queuing mask extraction for: {subject}")
print(f"Input image: {input_image}")
print(f"Prompt: {prompt_text}")
response = queue_prompt(workflow, server_address)
prompt_id = response["prompt_id"]
print(f"Prompt ID: {prompt_id}")
print("Waiting for generation (up to 4 minutes)...")
outputs = wait_for_prompt_completion(prompt_id, server_address, timeout=240)
if not outputs:
raise RuntimeError("Timeout: Workflow did not complete in 4 minutes")
output_filename = None
output_subfolder = ""
output_type = "output"
for node_id, node_output in outputs.items():
if "images" in node_output:
for image_info in node_output["images"]:
output_filename = image_info["filename"]
output_subfolder = image_info.get("subfolder", "")
output_type = image_info.get("type", "output")
break
if output_filename:
break
if not output_filename:
raise RuntimeError("No output image found in workflow results")
print(f"Downloading generated mask: {output_filename}")
image_data = download_image(
output_filename, output_subfolder, output_type, server_address
)
output_dir_path = os.path.dirname(os.path.abspath(output_path))
os.makedirs(output_dir_path, exist_ok=True)
with open(output_path, "wb") as f:
f.write(image_data)
print(f"Saved mask: {output_path}")
return output_path
def main():
import argparse
parser = argparse.ArgumentParser(
description="Extract mask from image using ComfyUI"
)
parser.add_argument(
"subject", help="Subject to extract mask for (e.g., 'the stump', 'the door')"
)
parser.add_argument("input_image", help="Path to input image file")
parser.add_argument("output_path", help="Path where output mask should be saved")
parser.add_argument(
"--server",
default="127.0.0.1:8188",
help="ComfyUI server address (default: 127.0.0.1:8188)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Test mode: validate inputs and server connection without generating",
)
args = parser.parse_args()
if not os.path.exists(args.input_image):
print(f"Error: Input image not found: {args.input_image}")
sys.exit(1)
print(f"Subject: {args.subject}")
print(f"Input: {args.input_image}")
print(f"Output: {args.output_path}")
print(f"Server: {args.server}")
if args.dry_run:
print("\n[Dry Run Mode - Checking server connection...]")
if check_server(args.server):
print("✓ ComfyUI server is running and accessible")
print("\n✓ Dry run successful! All checks passed.")
sys.exit(0)
else:
print(f"✗ ComfyUI server is not accessible at {args.server}")
print(" Please ensure ComfyUI is running before extracting masks.")
sys.exit(1)
print("\nChecking ComfyUI server...")
if not check_server(args.server):
print(f"Error: ComfyUI server is not running at {args.server}")
print("Please start ComfyUI first or check the server address.")
print(f"\nTo test without generating, use: --dry-run")
sys.exit(1)
print("✓ ComfyUI server is running")
try:
output = extract_mask(
args.subject, args.input_image, args.output_path, args.server
)
print(f"\nMask extraction complete! Output: {output}")
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

File diff suppressed because one or more lines are too long

View File

@@ -257,8 +257,8 @@ def main():
parser.add_argument(
"--min-area",
type=int,
default=100,
help="Minimum contour area to include in multiple mode (default: 100)",
default=150,
help="Minimum contour area to include (default: 150)",
)
args = parser.parse_args()
@@ -279,14 +279,15 @@ def main():
print("Error: No contours found in mask", file=sys.stderr)
sys.exit(1)
if args.mode == "multiple":
contours = sorted(contours, key=cv2.contourArea, reverse=True)
contours = [c for c in contours if cv2.contourArea(c) >= args.min_area]
if not contours:
print("Error: No contours meet minimum area requirement", file=sys.stderr)
sys.exit(1)
if args.mode == "multiple":
contours = sorted(contours, key=cv2.contourArea, reverse=True)
output_base = args.output if args.output else args.image.with_suffix("")
output_dir = output_base.parent
output_stem = output_base.stem