progress
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,3 +3,4 @@ tools/venv/**
|
||||
.import
|
||||
addons
|
||||
build/
|
||||
.tmp/**
|
||||
|
||||
9
.opencode/agents/image-expert.md
Normal file
9
.opencode/agents/image-expert.md
Normal file
@@ -0,0 +1,9 @@
|
||||
---
|
||||
description: Image Inspector
|
||||
mode: subagent
|
||||
model: local/Qwen3-VL
|
||||
tools:
|
||||
read: true
|
||||
---
|
||||
|
||||
You are an image inspection expert. You will be asked questions about images and you will answer directly. You may need to read the image if you are given a path.
|
||||
79
.opencode/skills/image-inspector/SKILL.md
Normal file
79
.opencode/skills/image-inspector/SKILL.md
Normal file
@@ -0,0 +1,79 @@
|
||||
---
|
||||
name: image-inspector
|
||||
description: Inspect images to answer Yes/No questions about visual content. Use when asking "Is a <thing> visible in this image?" or checking for specific objects, people, colors, text, or other visual elements. Always arrives at a definitive Yes/No conclusion.
|
||||
---
|
||||
|
||||
# Image Inspector
|
||||
|
||||
Inspect images using the Qwen3-VL vision model to answer Yes/No questions about visual content.
|
||||
|
||||
## When to Use
|
||||
|
||||
Use this skill when you need to:
|
||||
- Check if a specific object is present in an image
|
||||
- Verify visual elements exist
|
||||
- Answer binary questions about image content
|
||||
- Confirm or deny the presence of things in images
|
||||
|
||||
## How It Works
|
||||
|
||||
1. You provide an image path and a Yes/No question
|
||||
2. You resize the image to be a max of 1MP
|
||||
3. Ask the @image-expert to examine the image, and return a Yes/No
|
||||
4. You receive a definitive Yes or No answer
|
||||
|
||||
## Usage Pattern
|
||||
|
||||
### Step 1: Read the Image
|
||||
|
||||
Use the Read tool to load the image file. The Read tool can read image files and return them as attachments.
|
||||
|
||||
### Step 3: Resize the image to 1MP
|
||||
Use imagemagick and resize to a maximum of 1MP, outputting to ./.tmp/
|
||||
|
||||
### Step 3: Formulate the Question
|
||||
|
||||
Ask @image-expert a clear Yes/No question about the image:
|
||||
- "Is a [object] visible in this image?"
|
||||
- "Does this image contain [element]?"
|
||||
- "Can you see [thing] in this scene?"
|
||||
|
||||
|
||||
|
||||
### Step 3: Provide the Answer
|
||||
|
||||
After analyzing the (smaller) image, provide:
|
||||
1. **The Answer**: Yes or No (always definitive)
|
||||
2. **Brief Justification**: 1-2 sentences explaining why
|
||||
|
||||
## Example Questions
|
||||
|
||||
- "Is a tree visible in this image?"
|
||||
- "Does this image contain a person wearing a hat?"
|
||||
- "Is there text visible in this image?"
|
||||
- "Can you see a water feature in this scene?"
|
||||
- "Is the sky visible in this image?"
|
||||
- "Does this image show an indoor scene?"
|
||||
|
||||
## Response Format
|
||||
|
||||
```
|
||||
**Answer:** Yes/No
|
||||
|
||||
**Reasoning:** [1-2 sentences explaining what you see or don't see]
|
||||
```
|
||||
|
||||
## Guidelines
|
||||
|
||||
- Always provide a definitive Yes or No answer
|
||||
- Be specific about what you observe
|
||||
- If uncertain, describe what you see and make your best judgment
|
||||
- Don't hedge with "maybe" or "possibly" - commit to an answer
|
||||
- Focus only on the specific question asked
|
||||
|
||||
## Limitations
|
||||
|
||||
- The model can only analyze what's visually apparent
|
||||
- Small or partially obscured objects may be missed
|
||||
- The model cannot zoom or enhance the image
|
||||
- Text must be clearly legible to be detected
|
||||
0
.tmp/.save
Normal file
0
.tmp/.save
Normal file
8
scenes/kq4_004_ogres_cottage/door_polygon.tres
Normal file
8
scenes/kq4_004_ogres_cottage/door_polygon.tres
Normal file
@@ -0,0 +1,8 @@
|
||||
[gd_resource type="Resource" script_class="PolygonPointsResource" format=3 uid="uid://2oba97xunlssu"]
|
||||
|
||||
[ext_resource type="Script" uid="uid://dtemboas3bi8y" path="res://PolygonPointsResource.gd" id="1_ppr"]
|
||||
|
||||
[resource]
|
||||
script = ExtResource("1_ppr")
|
||||
points = PackedVector2Array(1147, 779, 1153, 854, 1152, 1053, 1151, 1054, 1055, 1046, 1043, 920, 1050, 781, 1051, 780)
|
||||
metadata/_custom_type_script = "uid://dtemboas3bi8y"
|
||||
@@ -15,3 +15,9 @@ func _on_forest_path_interacted() -> void:
|
||||
|
||||
func _on_forest_grove_interacted() -> void:
|
||||
$kq4_005_forest_grove.default_script(self)
|
||||
|
||||
|
||||
func _on_door_looked() -> void:
|
||||
start_main_script(ScriptBuilder.init(
|
||||
ScriptBuilder.say(ego, "It's a sturdy wooden door to the ogre's cottage.")
|
||||
).build(self, "_on_script_complete"))
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
[ext_resource type="Texture2D" uid="uid://b1yeiwh8uqii2" path="res://scenes/kq4_004_ogres_cottage/caption_1_454377357_generated.png" id="2_u8g8b"]
|
||||
[ext_resource type="Script" uid="uid://xmphq3i0wbg3" path="res://ScalePoint_.gd" id="3_kvdqi"]
|
||||
[ext_resource type="PackedScene" uid="uid://c4vc1wx7k6cw" path="res://TransitionPiece.tscn" id="4_67nph"]
|
||||
[ext_resource type="Resource" uid="uid://2oba97xunlssu" path="res://scenes/kq4_004_ogres_cottage/door_polygon.tres" id="5_door"]
|
||||
[ext_resource type="Script" uid="uid://bounwnqg34t5k" path="res://SetPiece_.gd" id="6_setpiece"]
|
||||
|
||||
[sub_resource type="NavigationPolygon" id="NavigationPolygon_furs3"]
|
||||
vertices = PackedVector2Array(325.656, 570.578, 582.328, 580.656, 525.289, 597.977, 454.133, 654.148, 445.539, 889.25, 919.219, 873.633, 1158.89, 691.359, 1294.26, 705.508, 1204.28, 907.469, -58.2969, 1146.06, -76.6797, 562, -6.6875, 562, 126.258, 883.891, 1990, 1160.92, 1662.15, 956.969, 1990, 977.391)
|
||||
@@ -91,10 +93,19 @@ position = Vector2(24, 565)
|
||||
[node name="exit" parent="kq4_005_forest_grove" index="1"]
|
||||
position = Vector2(293, 554)
|
||||
|
||||
[node name="door" type="Polygon2D" parent="." groups=["set-piece"]]
|
||||
scale = Vector2(0.783, 0.78)
|
||||
color = Color(0.5, 0.5, 0.5, 0.25)
|
||||
polygon = PackedVector2Array(1147, 779, 1153, 854, 1152, 1053, 1151, 1054, 1055, 1046, 1043, 920, 1050, 781, 1051, 780)
|
||||
script = ExtResource("6_setpiece")
|
||||
label = "Door"
|
||||
points_resource = ExtResource("5_door")
|
||||
|
||||
[connection signal="interacted" from="kq4_028_mine_entrance" to="." method="_on_mine_entrance_interacted"]
|
||||
[connection signal="interacted" from="kq4_003_fountain_pool" to="." method="_on_pool_interacted"]
|
||||
[connection signal="interacted" from="kq4_010_forest_path" to="." method="_on_forest_path_interacted"]
|
||||
[connection signal="interacted" from="kq4_005_forest_grove" to="." method="_on_forest_grove_interacted"]
|
||||
[connection signal="looked" from="door" to="." method="_on_door_looked"]
|
||||
|
||||
[editable path="kq4_028_mine_entrance"]
|
||||
[editable path="kq4_003_fountain_pool"]
|
||||
|
||||
252
tools/extract_mask.py
Executable file
252
tools/extract_mask.py
Executable file
@@ -0,0 +1,252 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Extract a mask from an image using ComfyUI workflow."""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import uuid
|
||||
from urllib.parse import urlencode
|
||||
|
||||
|
||||
def check_server(server_address: str = "127.0.0.1:8188", timeout: int = 5) -> bool:
|
||||
"""Check if ComfyUI server is running and accessible."""
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
f"http://{server_address}/system_stats",
|
||||
method="GET",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as response:
|
||||
return response.status == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def encode_image_base64(image_path: str) -> str:
|
||||
"""Encode an image file as base64 string."""
|
||||
with open(image_path, "rb") as f:
|
||||
return base64.b64encode(f.read()).decode("utf-8")
|
||||
|
||||
|
||||
def queue_prompt(prompt: dict, server_address: str = "127.0.0.1:8188") -> dict:
|
||||
"""Queue a prompt to ComfyUI server."""
|
||||
client_id = str(uuid.uuid4())
|
||||
p = {"prompt": prompt, "client_id": client_id}
|
||||
data = json.dumps(p).encode("utf-8")
|
||||
req = urllib.request.Request(
|
||||
f"http://{server_address}/prompt",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req) as response:
|
||||
return json.loads(response.read())
|
||||
except urllib.error.HTTPError as e:
|
||||
error_body = e.read().decode("utf-8")
|
||||
print(f"HTTP Error {e.code}: {error_body}")
|
||||
raise
|
||||
|
||||
|
||||
def get_history(prompt_id: str, server_address: str = "127.0.0.1:8188") -> dict:
|
||||
"""Get the history/status of a prompt from ComfyUI."""
|
||||
req = urllib.request.Request(
|
||||
f"http://{server_address}/history/{prompt_id}",
|
||||
method="GET",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req) as response:
|
||||
return json.loads(response.read())
|
||||
except urllib.error.HTTPError as e:
|
||||
error_body = e.read().decode("utf-8")
|
||||
print(f"HTTP Error {e.code}: {error_body}")
|
||||
raise
|
||||
|
||||
|
||||
def download_image(
|
||||
filename: str,
|
||||
subfolder: str,
|
||||
folder_type: str,
|
||||
server_address: str = "127.0.0.1:8188",
|
||||
) -> bytes:
|
||||
"""Download an image from ComfyUI."""
|
||||
params = {"filename": filename, "type": folder_type}
|
||||
if subfolder:
|
||||
params["subfolder"] = subfolder
|
||||
|
||||
url = f"http://{server_address}/view?{urlencode(params)}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
with urllib.request.urlopen(req) as response:
|
||||
return response.read()
|
||||
|
||||
|
||||
def wait_for_prompt_completion(
|
||||
prompt_id: str, server_address: str = "127.0.0.1:8188", timeout: int = 240
|
||||
) -> dict | None:
|
||||
"""Wait for a prompt to complete and return the output info."""
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
history = get_history(prompt_id, server_address)
|
||||
|
||||
if prompt_id in history:
|
||||
prompt_history = history[prompt_id]
|
||||
if "outputs" in prompt_history and prompt_history["outputs"]:
|
||||
return prompt_history["outputs"]
|
||||
|
||||
time.sleep(0.5)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def extract_mask(
|
||||
subject: str,
|
||||
input_image: str,
|
||||
output_path: str,
|
||||
server_address: str = "127.0.0.1:8188",
|
||||
) -> str:
|
||||
"""Extract mask from image for given subject.
|
||||
|
||||
Args:
|
||||
subject: The subject to extract mask for (e.g., "the stump", "the door")
|
||||
input_image: Path to the input image file
|
||||
output_path: Path where the output mask should be saved
|
||||
server_address: ComfyUI server address
|
||||
|
||||
Returns:
|
||||
Path to the saved output mask
|
||||
"""
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
workflow_path = os.path.join(script_dir, "image_mask_extraction.json")
|
||||
with open(workflow_path, "r") as f:
|
||||
workflow = json.load(f)
|
||||
|
||||
prompt_text = f"Create a black and white alpha mask of {subject}"
|
||||
|
||||
print(f"Encoding input image...")
|
||||
base64_image = encode_image_base64(input_image)
|
||||
|
||||
workflow["1:68"]["inputs"]["prompt"] = prompt_text
|
||||
workflow["87"]["inputs"]["image"] = base64_image
|
||||
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
filename_prefix = f"masks/mask_{unique_id}"
|
||||
workflow["82"]["inputs"]["filename_prefix"] = filename_prefix
|
||||
|
||||
print(f"Queuing mask extraction for: {subject}")
|
||||
print(f"Input image: {input_image}")
|
||||
print(f"Prompt: {prompt_text}")
|
||||
|
||||
response = queue_prompt(workflow, server_address)
|
||||
prompt_id = response["prompt_id"]
|
||||
print(f"Prompt ID: {prompt_id}")
|
||||
|
||||
print("Waiting for generation (up to 4 minutes)...")
|
||||
outputs = wait_for_prompt_completion(prompt_id, server_address, timeout=240)
|
||||
|
||||
if not outputs:
|
||||
raise RuntimeError("Timeout: Workflow did not complete in 4 minutes")
|
||||
|
||||
output_filename = None
|
||||
output_subfolder = ""
|
||||
output_type = "output"
|
||||
|
||||
for node_id, node_output in outputs.items():
|
||||
if "images" in node_output:
|
||||
for image_info in node_output["images"]:
|
||||
output_filename = image_info["filename"]
|
||||
output_subfolder = image_info.get("subfolder", "")
|
||||
output_type = image_info.get("type", "output")
|
||||
break
|
||||
if output_filename:
|
||||
break
|
||||
|
||||
if not output_filename:
|
||||
raise RuntimeError("No output image found in workflow results")
|
||||
|
||||
print(f"Downloading generated mask: {output_filename}")
|
||||
|
||||
image_data = download_image(
|
||||
output_filename, output_subfolder, output_type, server_address
|
||||
)
|
||||
|
||||
output_dir_path = os.path.dirname(os.path.abspath(output_path))
|
||||
os.makedirs(output_dir_path, exist_ok=True)
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(image_data)
|
||||
|
||||
print(f"Saved mask: {output_path}")
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Extract mask from image using ComfyUI"
|
||||
)
|
||||
parser.add_argument(
|
||||
"subject", help="Subject to extract mask for (e.g., 'the stump', 'the door')"
|
||||
)
|
||||
parser.add_argument("input_image", help="Path to input image file")
|
||||
parser.add_argument("output_path", help="Path where output mask should be saved")
|
||||
parser.add_argument(
|
||||
"--server",
|
||||
default="127.0.0.1:8188",
|
||||
help="ComfyUI server address (default: 127.0.0.1:8188)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Test mode: validate inputs and server connection without generating",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.input_image):
|
||||
print(f"Error: Input image not found: {args.input_image}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Subject: {args.subject}")
|
||||
print(f"Input: {args.input_image}")
|
||||
print(f"Output: {args.output_path}")
|
||||
print(f"Server: {args.server}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n[Dry Run Mode - Checking server connection...]")
|
||||
if check_server(args.server):
|
||||
print("✓ ComfyUI server is running and accessible")
|
||||
print("\n✓ Dry run successful! All checks passed.")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(f"✗ ComfyUI server is not accessible at {args.server}")
|
||||
print(" Please ensure ComfyUI is running before extracting masks.")
|
||||
sys.exit(1)
|
||||
|
||||
print("\nChecking ComfyUI server...")
|
||||
if not check_server(args.server):
|
||||
print(f"Error: ComfyUI server is not running at {args.server}")
|
||||
print("Please start ComfyUI first or check the server address.")
|
||||
print(f"\nTo test without generating, use: --dry-run")
|
||||
sys.exit(1)
|
||||
|
||||
print("✓ ComfyUI server is running")
|
||||
|
||||
try:
|
||||
output = extract_mask(
|
||||
args.subject, args.input_image, args.output_path, args.server
|
||||
)
|
||||
print(f"\nMask extraction complete! Output: {output}")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
403
tools/image_mask_extraction.json
Normal file
403
tools/image_mask_extraction.json
Normal file
File diff suppressed because one or more lines are too long
@@ -257,8 +257,8 @@ def main():
|
||||
parser.add_argument(
|
||||
"--min-area",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Minimum contour area to include in multiple mode (default: 100)",
|
||||
default=150,
|
||||
help="Minimum contour area to include (default: 150)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -279,13 +279,14 @@ def main():
|
||||
print("Error: No contours found in mask", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
contours = [c for c in contours if cv2.contourArea(c) >= args.min_area]
|
||||
|
||||
if not contours:
|
||||
print("Error: No contours meet minimum area requirement", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.mode == "multiple":
|
||||
contours = sorted(contours, key=cv2.contourArea, reverse=True)
|
||||
contours = [c for c in contours if cv2.contourArea(c) >= args.min_area]
|
||||
|
||||
if not contours:
|
||||
print("Error: No contours meet minimum area requirement", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
output_base = args.output if args.output else args.image.with_suffix("")
|
||||
output_dir = output_base.parent
|
||||
|
||||
Reference in New Issue
Block a user