オブジェクト除去とは?
その名の通り、画像から特定のオブジェクトだけを消すタスクです。
人物・看板・電線・ゴミ・通行人など、写っていてほしくないものを自然に消して、背景を滑らかに埋めます。
LaMa
拡散モデル登場以前は、LaMaのようなCNNベースのinpaintingモデルがSoTAとしてよく使われていました。

{
"last_node_id": 14,
"last_link_id": 13,
"nodes": [
{
"id": 12,
"type": "PreviewImage",
"pos": [
990,
226
],
"size": [
442.23713562774674,
470.30942480468775
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 11
}
],
"properties": {
"Node name for S&R": "PreviewImage"
},
"color": "#232",
"bgcolor": "#353"
},
{
"id": 14,
"type": "PreviewImage",
"pos": [
995,
-302
],
"size": [
415.6031356277467,
445.6484248046876
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 13
}
],
"properties": {
"Node name for S&R": "PreviewImage"
}
},
{
"id": 11,
"type": "LoadImage",
"pos": [
119,
138
],
"size": [
460.0661356277467,
559.486431152344
],
"flags": {},
"order": 0,
"mode": 0,
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
10,
13
],
"shape": 3,
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": [
12
],
"shape": 3,
"slot_index": 1
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"clipspace/clipspace-mask-4492994.199999999.png [input]",
"image"
]
},
{
"id": 10,
"type": "LamaRemover",
"pos": [
636,
227
],
"size": {
"0": 315,
"1": 126
},
"flags": {},
"order": 2,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 10,
"slot_index": 0
},
{
"name": "masks",
"type": "MASK",
"link": 12
}
],
"outputs": [
{
"name": "images",
"type": "IMAGE",
"links": [
11
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "LamaRemover"
},
"widgets_values": [
250,
5,
false
],
"color": "#232",
"bgcolor": "#353"
}
],
"links": [
[
10,
11,
0,
10,
0,
"IMAGE"
],
[
11,
10,
0,
12,
0,
"IMAGE"
],
[
12,
11,
1,
10,
1,
"MASK"
],
[
13,
11,
0,
14,
0,
"IMAGE"
]
],
"groups": [],
"config": {},
"extra": {},
"version": 0.4
}
マスクされた領域を周囲のテクスチャで埋めることに特化しており、透かし除去などにも使われていましたね。
inpaintingによる除去
もっとも素朴な方法は、マスクを用意して普通のinpaintingで塗りつぶすやり方です。
消したいオブジェクトにマスクを描き、背景に合わせたプロンプト(例:「背景の芝生だけ」「何もない床」)を書いて、inpaintingします。

{
"last_node_id": 72,
"last_link_id": 172,
"nodes": [
{
"id": 20,
"type": "IPAdapterModelLoader",
"pos": [
1220,
225
],
"size": {
"0": 315,
"1": 58
},
"flags": {},
"order": 0,
"mode": 0,
"outputs": [
{
"name": "IPADAPTER",
"type": "IPADAPTER",
"links": [
29
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "IPAdapterModelLoader"
},
"widgets_values": [
"ip-adapter_sdxl_vit-h.bin"
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 11,
"type": "INPAINT_VAEEncodeInpaintConditioning",
"pos": [
2080,
630
],
"size": {
"0": 292.20001220703125,
"1": 106
},
"flags": {},
"order": 23,
"mode": 0,
"inputs": [
{
"name": "positive",
"type": "CONDITIONING",
"link": 13
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 14
},
{
"name": "vae",
"type": "VAE",
"link": 166
},
{
"name": "pixels",
"type": "IMAGE",
"link": 98
},
{
"name": "mask",
"type": "MASK",
"link": 122
}
],
"outputs": [
{
"name": "positive",
"type": "CONDITIONING",
"links": [
7
],
"shape": 3,
"slot_index": 0
},
{
"name": "negative",
"type": "CONDITIONING",
"links": [
8
],
"shape": 3,
"slot_index": 1
},
{
"name": "latent_inpaint",
"type": "LATENT",
"links": [
20
],
"shape": 3,
"slot_index": 2
},
{
"name": "latent_samples",
"type": "LATENT",
"links": [
9
],
"shape": 3,
"slot_index": 3
}
],
"properties": {
"Node name for S&R": "INPAINT_VAEEncodeInpaintConditioning"
},
"color": "#2a363b",
"bgcolor": "#3f5159"
},
{
"id": 28,
"type": "SAMModelLoader (segment anything)",
"pos": [
-535,
1305
],
"size": {
"0": 358.7974548339844,
"1": 58
},
"flags": {},
"order": 1,
"mode": 0,
"outputs": [
{
"name": "SAM_MODEL",
"type": "SAM_MODEL",
"links": [
51
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "SAMModelLoader (segment anything)"
},
"widgets_values": [
"sam_hq_vit_h (2.57GB)"
],
"color": "#332922",
"bgcolor": "#593930"
},
{
"id": 27,
"type": "GroundingDinoModelLoader (segment anything)",
"pos": [
-540,
1415
],
"size": {
"0": 361.20001220703125,
"1": 58
},
"flags": {},
"order": 2,
"mode": 0,
"outputs": [
{
"name": "GROUNDING_DINO_MODEL",
"type": "GROUNDING_DINO_MODEL",
"links": [
50
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "GroundingDinoModelLoader (segment anything)"
},
"widgets_values": [
"GroundingDINO_SwinB (938MB)"
],
"color": "#332922",
"bgcolor": "#593930"
},
{
"id": 12,
"type": "INPAINT_ApplyFooocusInpaint",
"pos": [
2455,
410
],
"size": {
"0": 210,
"1": 66
},
"flags": {},
"order": 24,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 150
},
{
"name": "patch",
"type": "INPAINT_PATCH",
"link": 19
},
{
"name": "latent",
"type": "LATENT",
"link": 20
}
],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
120
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "INPAINT_ApplyFooocusInpaint"
},
"color": "#2a363b",
"bgcolor": "#3f5159"
},
{
"id": 1,
"type": "CheckpointLoaderSimple",
"pos": [
875,
595
],
"size": {
"0": 463.80413818359375,
"1": 98
},
"flags": {},
"order": 3,
"mode": 0,
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
28
],
"slot_index": 0
},
{
"name": "CLIP",
"type": "CLIP",
"links": [
2,
65
],
"slot_index": 1
},
{
"name": "VAE",
"type": "VAE",
"links": [
162
],
"slot_index": 2
}
],
"properties": {
"Node name for S&R": "CheckpointLoaderSimple"
},
"widgets_values": [
"📷-XL\\RealismEngineSDXL_V2.0_FP16_VAE.safetensors"
]
},
{
"id": 7,
"type": "KSampler",
"pos": [
2710,
610
],
"size": {
"0": 315,
"1": 262
},
"flags": {},
"order": 25,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 120
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 7
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 8
},
{
"name": "latent_image",
"type": "LATENT",
"link": 9
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
3
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "KSampler"
},
"widgets_values": [
931423542174017,
"randomize",
20,
8,
"dpmpp_2m",
"karras",
1
]
},
{
"id": 66,
"type": "Reroute",
"pos": [
2943,
521
],
"size": [
75,
26
],
"flags": {},
"order": 15,
"mode": 0,
"inputs": [
{
"name": "",
"type": "*",
"link": 167
}
],
"outputs": [
{
"name": "",
"type": "VAE",
"links": [
164
],
"slot_index": 0
}
],
"properties": {
"showOutputText": false,
"horizontal": false
}
},
{
"id": 4,
"type": "VAEDecode",
"pos": [
3098,
608
],
"size": {
"0": 190.01614379882812,
"1": 46
},
"flags": {},
"order": 26,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 3
},
{
"name": "vae",
"type": "VAE",
"link": 164,
"slot_index": 1
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
146
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VAEDecode"
}
},
{
"id": 67,
"type": "Reroute",
"pos": [
1940,
521
],
"size": [
75,
26
],
"flags": {},
"order": 12,
"mode": 0,
"inputs": [
{
"name": "",
"type": "*",
"link": 165
}
],
"outputs": [
{
"name": "",
"type": "VAE",
"links": [
166,
167
],
"slot_index": 0
}
],
"properties": {
"showOutputText": false,
"horizontal": false
}
},
{
"id": 13,
"type": "INPAINT_LoadFooocusInpaint",
"pos": [
2080,
410
],
"size": {
"0": 315,
"1": 82
},
"flags": {},
"order": 4,
"mode": 0,
"outputs": [
{
"name": "INPAINT_PATCH",
"type": "INPAINT_PATCH",
"links": [
19
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "INPAINT_LoadFooocusInpaint"
},
"widgets_values": [
"fooocus_inpaint_head.pth",
"inpaint_v26.fooocus.patch"
],
"color": "#2a363b",
"bgcolor": "#3f5159"
},
{
"id": 19,
"type": "IPAdapterApply",
"pos": [
1680,
225
],
"size": {
"0": 315,
"1": 258
},
"flags": {},
"order": 20,
"mode": 0,
"inputs": [
{
"name": "ipadapter",
"type": "IPADAPTER",
"link": 29,
"slot_index": 0
},
{
"name": "clip_vision",
"type": "CLIP_VISION",
"link": 32,
"slot_index": 1
},
{
"name": "image",
"type": "IMAGE",
"link": 99
},
{
"name": "model",
"type": "MODEL",
"link": 28
},
{
"name": "attn_mask",
"type": "MASK",
"link": null
}
],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
150
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "IPAdapterApply"
},
"widgets_values": [
0.35000000000000003,
0.01,
"linear",
0,
1,
false
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 39,
"type": "GrowMask",
"pos": [
727,
1338
],
"size": {
"0": 315,
"1": 82
},
"flags": {},
"order": 17,
"mode": 0,
"inputs": [
{
"name": "mask",
"type": "MASK",
"link": 138
}
],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
96,
122
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "GrowMask"
},
"widgets_values": [
40,
false
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 32,
"type": "INPAINT_LoadInpaintModel",
"pos": [
728,
1227
],
"size": {
"0": 315,
"1": 58
},
"flags": {},
"order": 5,
"mode": 0,
"outputs": [
{
"name": "INPAINT_MODEL",
"type": "INPAINT_MODEL",
"links": [
56
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "INPAINT_LoadInpaintModel"
},
"widgets_values": [
"big-lama.pt"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 33,
"type": "INPAINT_InpaintWithModel",
"pos": [
1091,
1127
],
"size": {
"0": 241.79998779296875,
"1": 66
},
"flags": {},
"order": 19,
"mode": 0,
"inputs": [
{
"name": "inpaint_model",
"type": "INPAINT_MODEL",
"link": 56
},
{
"name": "image",
"type": "IMAGE",
"link": 64
},
{
"name": "mask",
"type": "MASK",
"link": 96
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
98,
99,
158
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "INPAINT_InpaintWithModel"
},
"color": "#232",
"bgcolor": "#353"
},
{
"id": 26,
"type": "GroundingDinoSAMSegment (segment anything)",
"pos": [
-73,
1540
],
"size": {
"0": 352.79998779296875,
"1": 122
},
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "sam_model",
"type": "SAM_MODEL",
"link": 51,
"slot_index": 0
},
{
"name": "grounding_dino_model",
"type": "GROUNDING_DINO_MODEL",
"link": 50,
"slot_index": 1
},
{
"name": "image",
"type": "IMAGE",
"link": 125
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
52
],
"shape": 3,
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": [
53,
139
],
"shape": 3,
"slot_index": 1
}
],
"properties": {
"Node name for S&R": "GroundingDinoSAMSegment (segment anything)"
},
"widgets_values": [
"human",
0.3
],
"color": "#332922",
"bgcolor": "#593930"
},
{
"id": 48,
"type": "Image Size to Number",
"pos": [
3058.816131591797,
746
],
"size": {
"0": 229.20001220703125,
"1": 126
},
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 127
}
],
"outputs": [
{
"name": "width_num",
"type": "NUMBER",
"links": null,
"shape": 3
},
{
"name": "height_num",
"type": "NUMBER",
"links": null,
"shape": 3
},
{
"name": "width_float",
"type": "FLOAT",
"links": null,
"shape": 3
},
{
"name": "height_float",
"type": "FLOAT",
"links": null,
"shape": 3
},
{
"name": "width_int",
"type": "INT",
"links": [
133
],
"shape": 3,
"slot_index": 4
},
{
"name": "height_int",
"type": "INT",
"links": [
134
],
"shape": 3,
"slot_index": 5
}
],
"properties": {
"Node name for S&R": "Image Size to Number"
}
},
{
"id": 17,
"type": "ImageResize",
"pos": [
370,
1127
],
"size": {
"0": 315,
"1": 246
},
"flags": {},
"order": 14,
"mode": 0,
"inputs": [
{
"name": "pixels",
"type": "IMAGE",
"link": 25
},
{
"name": "mask_optional",
"type": "MASK",
"link": 139
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
64
],
"shape": 3,
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": [
138
],
"shape": 3,
"slot_index": 1
}
],
"properties": {
"Node name for S&R": "ImageResize"
},
"widgets_values": [
"resize only",
0,
1408,
0,
"any",
"4:3",
0.5,
20
]
},
{
"id": 2,
"type": "CLIPTextEncode",
"pos": [
1770,
630
],
"size": {
"0": 210,
"1": 54
},
"flags": {
"collapsed": false
},
"order": 22,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 65
},
{
"name": "text",
"type": "STRING",
"link": 160,
"widget": {
"name": "text"
},
"slot_index": 1
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
13
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
""
]
},
{
"id": 3,
"type": "CLIPTextEncode",
"pos": [
1772,
729
],
"size": {
"0": 210,
"1": 76
},
"flags": {
"collapsed": true
},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 2
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
14
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"worst quality"
]
},
{
"id": 22,
"type": "CLIPVisionLoader",
"pos": [
1220,
335
],
"size": {
"0": 315,
"1": 58
},
"flags": {},
"order": 6,
"mode": 0,
"outputs": [
{
"name": "CLIP_VISION",
"type": "CLIP_VISION",
"links": [
32
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "CLIPVisionLoader"
},
"widgets_values": [
"OpenCLIP-ViT-H-14.safetensors"
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 29,
"type": "JoinImageWithAlpha",
"pos": [
725,
1540
],
"size": {
"0": 176.39999389648438,
"1": 46
},
"flags": {},
"order": 16,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 52
},
{
"name": "alpha",
"type": "MASK",
"link": 54
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
171
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "JoinImageWithAlpha"
}
},
{
"id": 51,
"type": "ImageScale",
"pos": [
3361,
610
],
"size": {
"0": 315,
"1": 130
},
"flags": {},
"order": 27,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 146
},
{
"name": "width",
"type": "INT",
"link": 133,
"widget": {
"name": "width"
}
},
{
"name": "height",
"type": "INT",
"link": 134,
"widget": {
"name": "height"
}
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
172
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "ImageScale"
},
"widgets_values": [
"nearest-exact",
512,
512,
"disabled"
]
},
{
"id": 6,
"type": "LoadImage",
"pos": [
-740,
1555
],
"size": {
"0": 577.4927368164062,
"1": 941.2804565429688
},
"flags": {},
"order": 7,
"mode": 0,
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
25,
125,
127
],
"shape": 3,
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": [],
"shape": 3,
"slot_index": 1
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"pexels-photo-2734964.jpg",
"image"
]
},
{
"id": 72,
"type": "SaveImage",
"pos": [
3734,
610
],
"size": [
678.9828195312484,
1072.5615755859365
],
"flags": {},
"order": 28,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 172
}
],
"properties": {},
"widgets_values": [
"ComfyUI"
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 64,
"type": "WD14Tagger|pysssss",
"pos": [
1401,
809
],
"size": {
"0": 315,
"1": 220
},
"flags": {},
"order": 21,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 158
}
],
"outputs": [
{
"name": "STRING",
"type": "STRING",
"links": [
160
],
"shape": 6,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "WD14Tagger|pysssss"
},
"widgets_values": [
"wd-v1-4-moat-tagger-v2",
0.35,
0.85,
false,
false,
"",
"flower, outdoors, blurry, no_humans, depth_of_field, leaf, plant, red_flower, nature, scenery"
]
},
{
"id": 71,
"type": "SaveImage",
"pos": [
946,
1541
],
"size": [
640.3110279744426,
969.1198102891353
],
"flags": {},
"order": 18,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 171
}
],
"properties": {},
"widgets_values": [
"ComfyUI"
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 65,
"type": "Reroute",
"pos": [
1494,
521
],
"size": [
75,
26
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "",
"type": "*",
"link": 162
}
],
"outputs": [
{
"name": "",
"type": "VAE",
"links": [
165
]
}
],
"properties": {
"showOutputText": false,
"horizontal": false
}
},
{
"id": 30,
"type": "InvertMask",
"pos": [
565,
1590
],
"size": {
"0": 210,
"1": 26
},
"flags": {
"collapsed": true
},
"order": 13,
"mode": 0,
"inputs": [
{
"name": "mask",
"type": "MASK",
"link": 53
}
],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
54
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "InvertMask"
}
}
],
"links": [
[
2,
1,
1,
3,
0,
"CLIP"
],
[
3,
7,
0,
4,
0,
"LATENT"
],
[
7,
11,
0,
7,
1,
"CONDITIONING"
],
[
8,
11,
1,
7,
2,
"CONDITIONING"
],
[
9,
11,
3,
7,
3,
"LATENT"
],
[
13,
2,
0,
11,
0,
"CONDITIONING"
],
[
14,
3,
0,
11,
1,
"CONDITIONING"
],
[
19,
13,
0,
12,
1,
"INPAINT_PATCH"
],
[
20,
11,
2,
12,
2,
"LATENT"
],
[
25,
6,
0,
17,
0,
"IMAGE"
],
[
28,
1,
0,
19,
3,
"MODEL"
],
[
29,
20,
0,
19,
0,
"IPADAPTER"
],
[
32,
22,
0,
19,
1,
"CLIP_VISION"
],
[
50,
27,
0,
26,
1,
"GROUNDING_DINO_MODEL"
],
[
51,
28,
0,
26,
0,
"SAM_MODEL"
],
[
52,
26,
0,
29,
0,
"IMAGE"
],
[
53,
26,
1,
30,
0,
"MASK"
],
[
54,
30,
0,
29,
1,
"MASK"
],
[
56,
32,
0,
33,
0,
"INPAINT_MODEL"
],
[
64,
17,
0,
33,
1,
"IMAGE"
],
[
65,
1,
1,
2,
0,
"CLIP"
],
[
96,
39,
0,
33,
2,
"MASK"
],
[
98,
33,
0,
11,
3,
"IMAGE"
],
[
99,
33,
0,
19,
2,
"IMAGE"
],
[
120,
12,
0,
7,
0,
"MODEL"
],
[
122,
39,
0,
11,
4,
"MASK"
],
[
125,
6,
0,
26,
2,
"IMAGE"
],
[
127,
6,
0,
48,
0,
"IMAGE"
],
[
133,
48,
4,
51,
1,
"INT"
],
[
134,
48,
5,
51,
2,
"INT"
],
[
138,
17,
1,
39,
0,
"MASK"
],
[
139,
26,
1,
17,
1,
"MASK"
],
[
146,
4,
0,
51,
0,
"IMAGE"
],
[
150,
19,
0,
12,
0,
"MODEL"
],
[
158,
33,
0,
64,
0,
"IMAGE"
],
[
160,
64,
0,
2,
1,
"STRING"
],
[
162,
1,
2,
65,
0,
"*"
],
[
164,
66,
0,
4,
1,
"VAE"
],
[
165,
65,
0,
67,
0,
"*"
],
[
166,
67,
0,
11,
2,
"VAE"
],
[
167,
67,
0,
66,
0,
"*"
],
[
171,
29,
0,
71,
0,
"IMAGE"
],
[
172,
51,
0,
72,
0,
"IMAGE"
]
],
"groups": [
{
"title": "GroundingDinoSAM",
"bounding": [
154,
206,
370,
80
],
"color": "#b06634",
"font_size": 40,
"locked": false
},
{
"title": "LaMa",
"bounding": [
157,
310,
140,
80
],
"color": "#8A8",
"font_size": 40,
"locked": false
},
{
"title": "IP-Adapter",
"bounding": [
158,
415,
218,
80
],
"color": "#a1309b",
"font_size": 40,
"locked": false
},
{
"title": "Fooocus Inpaint",
"bounding": [
157,
519,
299,
80
],
"color": "#3f789e",
"font_size": 40,
"locked": false
}
],
"config": {},
"extra": {},
"version": 0.4
}
ただし、オブジェクトを消すどころか、別のオブジェクトを新たに増やしてしまうこともあり、オブジェクト除去としては安定しない場合もありました。
そのため、昔はinpaintingの下処理にLaMaを使っていたこともありましたが、現在のモデルには不要でしょう。
指示ベース画像編集でのオブジェクト除去
最近の指示ベース画像編集モデルでは、オブジェクト除去もかなり単純なタスクになりつつあります。
「この人を消して」「この標識を消して」「右下のロゴを消して」などと指示するだけです。

{
"id": "d8034549-7e0a-40f1-8c2e-de3ffc6f1cae",
"revision": 0,
"last_node_id": 119,
"last_link_id": 318,
"nodes": [
{
"id": 54,
"type": "ModelSamplingAuraFlow",
"pos": [
634.9767456054688,
-1.8326886892318726
],
"size": [
230.33058166503906,
58
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 282
}
],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
123
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.49",
"Node name for S&R": "ModelSamplingAuraFlow"
},
"widgets_values": [
3.1000000000000005
]
},
{
"id": 63,
"type": "VAEEncode",
"pos": [
714.6403198242188,
673.7313842773438
],
"size": [
140,
46
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "pixels",
"type": "IMAGE",
"link": 239
},
{
"name": "vae",
"type": "VAE",
"link": 115
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
112
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.51",
"Node name for S&R": "VAEEncode"
},
"widgets_values": []
},
{
"id": 55,
"type": "MarkdownNote",
"pos": [
-84.94583892822266,
-171.1671905517578
],
"size": [
386.9856262207031,
251.33447265625
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [],
"properties": {},
"widgets_values": [
"## models\n- [Qwen-Image-Edit-gguf](https://huggingface.co/QuantStack/Qwen-Image-Edit-GGUF/tree/main)\n- [Qwen2.5-VL-7B-Instruct-GGUF](https://huggingface.co/unsloth/Qwen2.5-VL-7B-Instruct-GGUF/tree/main)\n- [Qwen2.5-VL-7B-Instruct-mmproj-BF16.gguf](https://huggingface.co/QuantStack/Qwen-Image-Edit-GGUF/blob/main/mmproj/Qwen2.5-VL-7B-Instruct-mmproj-BF16.gguf)\n- [qwen_image_vae.safetensors](https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/vae)\n\n\n```\n📂ComfyUI/\n└── 📂models/\n ├── 📂text_encoders/\n │ ├── Qwen2.5-VL-7B-Instruct-.gguf\n │ └── Qwen2.5-VL-7B-Instruct-mmproj-BF16.gguf\n ├── 📂unet/\n │ └── Qwen_Image_Edit.gguf\n └── 📂vae/\n └── qwen_image_vae.safetensors\n```"
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
1293.939697265625,
143.6978759765625
],
"size": [
157.56002807617188,
46
],
"flags": {},
"order": 12,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 35
},
{
"name": "vae",
"type": "VAE",
"link": 76
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
254
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 112,
"type": "CLIPLoader",
"pos": [
75.53079223632812,
277.016357421875
],
"size": [
270,
106
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
"type": "CLIP",
"links": [
290,
291
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.51",
"Node name for S&R": "CLIPLoader"
},
"widgets_values": [
"qwen_2.5_vl_7b_fp8_scaled.safetensors",
"qwen_image",
"default"
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 39,
"type": "VAELoader",
"pos": [
107.53079223632812,
446.7167663574219
],
"size": [
238,
58
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"slot_index": 0,
"links": [
76,
115,
292,
293
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"qwen_image_vae.safetensors"
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 82,
"type": "ImageScaleToTotalPixels",
"pos": [
-224.63221740722656,
668.4074096679688
],
"size": [
270,
82
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 275
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
244
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.51",
"Node name for S&R": "ImageScaleToTotalPixels"
},
"widgets_values": [
"nearest-exact",
1
]
},
{
"id": 114,
"type": "TextEncodeQwenImageEditPlus",
"pos": [
454.6401672363281,
419.63690185546875
],
"size": [
400,
200
],
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 291
},
{
"name": "vae",
"shape": 7,
"type": "VAE",
"link": 293
},
{
"name": "image1",
"shape": 7,
"type": "IMAGE",
"link": 295
},
{
"name": "image2",
"shape": 7,
"type": "IMAGE",
"link": null
},
{
"name": "image3",
"shape": 7,
"type": "IMAGE",
"link": null
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
315
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.59",
"Node name for S&R": "TextEncodeQwenImageEditPlus"
},
"widgets_values": [
""
]
},
{
"id": 83,
"type": "ImageResizeKJv2",
"pos": [
75.53079223632812,
668.4074096679688
],
"size": [
270,
336
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 244
},
{
"name": "mask",
"shape": 7,
"type": "MASK",
"link": null
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
239,
294,
295
]
},
{
"name": "width",
"type": "INT",
"links": null
},
{
"name": "height",
"type": "INT",
"links": null
},
{
"name": "mask",
"type": "MASK",
"links": []
}
],
"properties": {
"cnr_id": "comfyui-kjnodes",
"ver": "e2ce0843d1183aea86ce6a1617426f492dcdc802",
"Node name for S&R": "ImageResizeKJv2"
},
"widgets_values": [
0,
0,
"nearest-exact",
"crop",
"0, 0, 0",
"center",
8,
"cpu",
"<tr><td>Output: </td><td><b>1</b> x <b>1248</b> x <b>832 | 11.88MB</b></td></tr>"
]
},
{
"id": 111,
"type": "UNETLoader",
"pos": [
338.8721923828125,
-1.8326886892318726
],
"size": [
276.62274169921875,
82
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
282
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.51",
"Node name for S&R": "UNETLoader"
},
"widgets_values": [
"Qwen-Image\\qwen_image_edit_2509_fp8_e4m3fn.safetensors",
"fp8_e4m3fn"
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 3,
"type": "KSampler",
"pos": [
933.5941772460938,
143.6978759765625
],
"size": [
315,
262
],
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 123
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 314
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 315
},
{
"name": "latent_image",
"type": "LATENT",
"link": 112
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
35
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "KSampler"
},
"widgets_values": [
1234,
"fixed",
20,
2.5,
"res_multistep",
"simple",
1
]
},
{
"id": 97,
"type": "SaveImage",
"pos": [
1495.48046875,
143.6978759765625
],
"size": [
588.699563154297,
477.18239744140624
],
"flags": {},
"order": 13,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 254
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.51"
},
"widgets_values": [
"ComfyUI"
]
},
{
"id": 99,
"type": "LoadImage",
"pos": [
-716.9654541015625,
668.4074096679688
],
"size": [
456.17022705078125,
417.46728515625
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
275
]
},
{
"name": "MASK",
"type": "MASK",
"links": null
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.51",
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"1f421a11eb7f46ffcf970787036c5cc1.jpg",
"image"
]
},
{
"id": 113,
"type": "TextEncodeQwenImageEditPlus",
"pos": [
454.6401672363281,
163.63690185546875
],
"size": [
400,
200
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 290
},
{
"name": "vae",
"shape": 7,
"type": "VAE",
"link": 292
},
{
"name": "image1",
"shape": 7,
"type": "IMAGE",
"link": 294
},
{
"name": "image2",
"shape": 7,
"type": "IMAGE",
"link": null
},
{
"name": "image3",
"shape": 7,
"type": "IMAGE",
"link": null
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
314
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.59",
"Node name for S&R": "TextEncodeQwenImageEditPlus"
},
"widgets_values": [
"remove the woman"
]
}
],
"links": [
[
35,
3,
0,
8,
0,
"LATENT"
],
[
76,
39,
0,
8,
1,
"VAE"
],
[
112,
63,
0,
3,
3,
"LATENT"
],
[
115,
39,
0,
63,
1,
"VAE"
],
[
123,
54,
0,
3,
0,
"MODEL"
],
[
239,
83,
0,
63,
0,
"IMAGE"
],
[
244,
82,
0,
83,
0,
"IMAGE"
],
[
254,
8,
0,
97,
0,
"IMAGE"
],
[
275,
99,
0,
82,
0,
"IMAGE"
],
[
282,
111,
0,
54,
0,
"MODEL"
],
[
290,
112,
0,
113,
0,
"CLIP"
],
[
291,
112,
0,
114,
0,
"CLIP"
],
[
292,
39,
0,
113,
1,
"VAE"
],
[
293,
39,
0,
114,
1,
"VAE"
],
[
294,
83,
0,
113,
2,
"IMAGE"
],
[
295,
83,
0,
114,
2,
"IMAGE"
],
[
314,
113,
0,
3,
1,
"CONDITIONING"
],
[
315,
114,
0,
3,
2,
"CONDITIONING"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.6830134553650705,
"offset": [
815.5013541015625,
271.1671905517578
]
},
"frontendVersion": "1.34.2",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true
},
"version": 0.4
}
マスク不要という利点
inpaintingと比較して明確に優れている点は、マスクを描かなくて済む点です。
オブジェクト除去を自動化しようとすると、セグメンテーションでオブジェクトのマスクを作る必要がありますが、本来は対象オブジェクトだけではなく、影やガラスに映った反射まで消さなければいけません。これが難しいのです。
指示ベース画像編集モデルならば、それらも含めて消してくれます。