inpaintingとは?
inpainting は 「画像の一部分だけを描き直す」 ための手法です。
が、実は中身を見てみると、次の 2 パターンに分けられます。
- タイプA: マスク部分だけを image2image する
- タイプB: 周囲の情報を見ながら、マスク部分を自然に埋める
一般的には、これらに区別をつけていませんが、それゆえ混乱している初心者をよく見かけます。
いったん別物として分けて考えていきましょう。
マスクの作り方やマスク編集の詳細は、別ページのマスク操作、AIを使ったマスク生成を参照してください。
タイプA: マスク部分だけの image2image
マスクした部分だけを、通常の image2image と同じノリで描き直す方法です。
少し顔の表情を変える、絵柄を変える、細かい部分をちょっと修正したいときに向いています。
workflow
この workflow では、SetLatentNoiseMask ノードを使って「どこにノイズを足すか」を指定します。

{
"id": "8b9f7796-0873-4025-be3c-0f997f67f866",
"revision": 0,
"last_node_id": 17,
"last_link_id": 20,
"nodes": [
{
"id": 10,
"type": "VAELoader",
"pos": [
150.35849892963756,
608.4685752753562
],
"size": [
231.48760330578511,
58
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
10,
13
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"vae-ft-mse-840000-ema-pruned.safetensors"
]
},
{
"id": 12,
"type": "VAEEncode",
"pos": [
427.11516235234546,
674.510187570697
],
"size": [
140,
46
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "pixels",
"type": "IMAGE",
"link": 11
},
{
"name": "vae",
"type": "VAE",
"link": 13
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
14
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "VAEEncode"
},
"color": "#322",
"bgcolor": "#533"
},
{
"id": 7,
"type": "CLIPTextEncode",
"pos": [
416.1970166015625,
392.37848510742185
],
"size": [
410.75801513671877,
158.82607910156253
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 5
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
6
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"text, watermark, worst quality"
]
},
{
"id": 9,
"type": "SaveImage",
"pos": [
1451,
189
],
"size": [
503.2876035004722,
576.239673217884
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 9
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33"
},
"widgets_values": [
"ComfyUI"
]
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
1209,
188
],
"size": [
203.39999999999986,
46
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 7
},
{
"name": "vae",
"type": "VAE",
"link": 10
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
9
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 4,
"type": "CheckpointLoaderSimple",
"pos": [
39.900463636363625,
349.9095500000008
],
"size": [
315,
98
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"slot_index": 0,
"links": [
1
]
},
{
"name": "CLIP",
"type": "CLIP",
"slot_index": 1,
"links": [
3,
5
]
},
{
"name": "VAE",
"type": "VAE",
"slot_index": 2,
"links": []
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CheckpointLoaderSimple"
},
"widgets_values": [
"v1-5-pruned-emaonly-fp16.safetensors"
]
},
{
"id": 6,
"type": "CLIPTextEncode",
"pos": [
415,
186
],
"size": [
411.95503173828126,
151.0030493164063
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 3
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
4
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"high quality, RAW photo of a woman with red hair, Punch perm"
]
},
{
"id": 11,
"type": "LoadImage",
"pos": [
39.85257865587715,
722.7009982393679
],
"size": [
341.9935235795455,
473.93255454545454
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
11
]
},
{
"name": "MASK",
"type": "MASK",
"links": [
15
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "LoadImage",
"image": "clipspace/clipspace-painted-masked-1765071052914.png [input]"
},
"widgets_values": [
"clipspace/clipspace-painted-masked-1765071052914.png [input]",
"image"
]
},
{
"id": 3,
"type": "KSampler",
"pos": [
863,
186
],
"size": [
315,
262
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 1
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 4
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 6
},
{
"name": "latent_image",
"type": "LATENT",
"link": 16
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
7
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "KSampler"
},
"widgets_values": [
45678,
"fixed",
20,
8,
"euler",
"normal",
0.8
]
},
{
"id": 13,
"type": "SetLatentNoiseMask",
"pos": [
607.2212780548256,
721.4635076308024
],
"size": [
208.02038352272726,
46
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 14
},
{
"name": "mask",
"type": "MASK",
"link": 15
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
16
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "SetLatentNoiseMask"
},
"color": "#232",
"bgcolor": "#353"
}
],
"links": [
[
1,
4,
0,
3,
0,
"MODEL"
],
[
3,
4,
1,
6,
0,
"CLIP"
],
[
4,
6,
0,
3,
1,
"CONDITIONING"
],
[
5,
4,
1,
7,
0,
"CLIP"
],
[
6,
7,
0,
3,
2,
"CONDITIONING"
],
[
7,
3,
0,
8,
0,
"LATENT"
],
[
9,
8,
0,
9,
0,
"IMAGE"
],
[
10,
10,
0,
8,
1,
"VAE"
],
[
11,
11,
0,
12,
0,
"IMAGE"
],
[
13,
10,
0,
12,
1,
"VAE"
],
[
14,
12,
0,
13,
0,
"LATENT"
],
[
15,
11,
1,
13,
1,
"MASK"
],
[
16,
13,
0,
3,
3,
"LATENT"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.620921323059155,
"offset": [
332.32361134412287,
153.96599000000006
]
},
"frontendVersion": "1.34.6",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true
},
"version": 0.4
}
- ベースは image2image の workflow です。
- 🟥
VAE Encodeノードで元画像を latent に変換 - 🟩
Set Latent Noise Maskノードで latent とマスクを組み合わせる
この手法の問題点
試しに上の workflow で denoise を 1.00 にしてみましょう。

わお、ホラー画像が生成されました(;・∀・)
この手法は、あくまで「マスク部分だけをキャンバスにした image2image」です。
denoise を上げると、マスク部分でほとんど text2image に近い挙動 になります。
プロンプトに「赤いパンチパーマの女性」と書いたので、元の画像とは関係なく、新たに女性を描き出したわけですね。
全体の雰囲気を見つつ、マスクされた部分を描いてもらう方法はないでしょうか?
タイプB: 周囲を見ながらマスクを埋める
画像全体を見たうえで、「周りと自然につなげるようにマスク部分を描き直す」タイプです。
先ほどは、「image2image の適用範囲をマスクで物理的に切り取る」だけでした。
こちらのタイプでは、マスク領域そのものを Conditioning の一種として扱い、「この範囲だけを描き直してほしい」という条件をモデルに直接渡します。
そのうえで、実装のアプローチはいろいろありますが、SD1.5 では次の 2 系統を押さえておけば十分です。
- inpainting 専用モデルを使う
- ControlNet inpaint でノーマルモデルを inpaint 対応にする
inpaintingモデル
SD1.5 を「周囲を見ながら埋める」タスク向けに調整したチェックポイントです。
モデルのダウンロード
- stable-diffusion-v1-5/sd-v1-5-inpainting.ckpt
-
📂ComfyUI/ └── 📂models/ └── 📂checkpoints/ └── sd-v1-5-inpainting.ckpt
workflow

{
"id": "8b9f7796-0873-4025-be3c-0f997f67f866",
"revision": 0,
"last_node_id": 21,
"last_link_id": 30,
"nodes": [
{
"id": 9,
"type": "SaveImage",
"pos": [
1773.2999999999997,
213.20000000000002
],
"size": [
503.2876035004722,
576.239673217884
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 9
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33"
},
"widgets_values": [
"ComfyUI"
]
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
1531.2999999999997,
212.20000000000002
],
"size": [
203.39999999999986,
46
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 7
},
{
"name": "vae",
"type": "VAE",
"link": 10
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
9
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 6,
"type": "CLIPTextEncode",
"pos": [
415,
186
],
"size": [
411.95503173828126,
151.0030493164063
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 3
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
25
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"high quality, RAW photo of a woman with red hair, Punch perm"
]
},
{
"id": 7,
"type": "CLIPTextEncode",
"pos": [
416.1970166015625,
392.37848510742185
],
"size": [
410.75801513671877,
158.82607910156253
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 5
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
26
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"text, watermark, worst quality"
]
},
{
"id": 11,
"type": "LoadImage",
"pos": [
479.08257865587706,
719.0709982393678
],
"size": [
341.9935235795455,
473.93255454545454
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
29
]
},
{
"name": "MASK",
"type": "MASK",
"links": [
30
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "LoadImage",
"image": "clipspace/clipspace-painted-masked-1765071052914.png [input]"
},
"widgets_values": [
"clipspace/clipspace-painted-masked-1765071052914.png [input]",
"image"
]
},
{
"id": 10,
"type": "VAELoader",
"pos": [
589.5884989296375,
604.8385752753561
],
"size": [
231.48760330578511,
58
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
10,
27
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"vae-ft-mse-840000-ema-pruned.safetensors"
]
},
{
"id": 3,
"type": "KSampler",
"pos": [
1185.2999999999995,
210.20000000000002
],
"size": [
315,
262
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 1
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 23
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 24
},
{
"name": "latent_image",
"type": "LATENT",
"link": 28
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
7
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "KSampler"
},
"widgets_values": [
45678,
"fixed",
20,
8,
"euler",
"normal",
1
]
},
{
"id": 20,
"type": "InpaintModelConditioning",
"pos": [
885.5197492645148,
229.90120706804433
],
"size": [
258.49395533092,
138
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "positive",
"type": "CONDITIONING",
"link": 25
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 26
},
{
"name": "vae",
"type": "VAE",
"link": 27
},
{
"name": "pixels",
"type": "IMAGE",
"link": 29
},
{
"name": "mask",
"type": "MASK",
"link": 30
}
],
"outputs": [
{
"name": "positive",
"type": "CONDITIONING",
"links": [
23
]
},
{
"name": "negative",
"type": "CONDITIONING",
"links": [
24
]
},
{
"name": "latent",
"type": "LATENT",
"links": [
28
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "InpaintModelConditioning"
},
"widgets_values": [
true
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 4,
"type": "CheckpointLoaderSimple",
"pos": [
39.900463636363625,
349.9095500000008
],
"size": [
315,
98
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"slot_index": 0,
"links": [
1
]
},
{
"name": "CLIP",
"type": "CLIP",
"slot_index": 1,
"links": [
3,
5
]
},
{
"name": "VAE",
"type": "VAE",
"slot_index": 2,
"links": []
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CheckpointLoaderSimple"
},
"widgets_values": [
"sd-v1-5-inpainting.ckpt"
],
"color": "#323",
"bgcolor": "#535"
}
],
"links": [
[
1,
4,
0,
3,
0,
"MODEL"
],
[
3,
4,
1,
6,
0,
"CLIP"
],
[
5,
4,
1,
7,
0,
"CLIP"
],
[
7,
3,
0,
8,
0,
"LATENT"
],
[
9,
8,
0,
9,
0,
"IMAGE"
],
[
10,
10,
0,
8,
1,
"VAE"
],
[
23,
20,
0,
3,
1,
"CONDITIONING"
],
[
24,
20,
1,
3,
2,
"CONDITIONING"
],
[
25,
6,
0,
20,
0,
"CONDITIONING"
],
[
26,
7,
0,
20,
1,
"CONDITIONING"
],
[
27,
10,
0,
20,
2,
"VAE"
],
[
28,
20,
2,
3,
3,
"LATENT"
],
[
29,
11,
0,
20,
3,
"IMAGE"
],
[
30,
11,
1,
20,
4,
"MASK"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.6830134553650705,
"offset": [
58.63543636363637,
-84.5359
]
},
"frontendVersion": "1.34.6",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true
},
"version": 0.4
}
- 🟪 inpainting モデルを読み込みます。
- 🟩
VAE Encode、Set Latent Noise MaskをInpaintModelConditioningノードに置き換えます。- 入力するパラメータはほぼ同じです。
noise_maskパラメータだけ少し注意が必要です。trueSet Latent Noise Maskのときと同様に、マスクの中だけを書き直すよう強制します。通常はこの設定で問題ありません。
false- 一部のモデルでは、
trueにすると破綻することがあります。その場合の逃げ道としてfalseを試してみてください。
- 一部のモデルでは、
上の例では、denoise を 1.00 にしても、画像全体が自然に見えるように女性の髪を描き直していることが分かります。
タイプA と違って、「周囲との整合性を見ながらマスク部分を埋める」挙動になっていますね。
ControlNet inpaint
inpainting モデルの欠点は、inpainting モデルを使わないといけないことです。 Stable Diffusion 1.5 をファインチューニングしたモデルを、そのまま inpainting に使いたいときもあるでしょう。
そんな時、ControlNet inpaint が役に立ちます。
ControlNet については、また別のページで説明します。
カスタムノード
ControlNet モデルのダウンロード
- comfyanonymous/control_v11p_sd15_inpaint_fp16.safetensors
-
📂ComfyUI/ └── 📂models/ └── 📂controlnet/ └── control_v11p_sd15_inpaint_fp16.safetensors
workflow

{
"id": "8b9f7796-0873-4025-be3c-0f997f67f866",
"revision": 0,
"last_node_id": 30,
"last_link_id": 53,
"nodes": [
{
"id": 9,
"type": "SaveImage",
"pos": [
1757.2999999999997,
214.20000000000002
],
"size": [
503.2876035004722,
576.239673217884
],
"flags": {},
"order": 12,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 9
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33"
},
"widgets_values": [
"ComfyUI"
]
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
1515.2999999999997,
213.20000000000002
],
"size": [
203.39999999999986,
46
],
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 7
},
{
"name": "vae",
"type": "VAE",
"link": 10
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
9
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 7,
"type": "CLIPTextEncode",
"pos": [
420.98808709870343,
387.97848510742176
],
"size": [
410.75801513671877,
158.82607910156253
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 5
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
33
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"text, watermark, worst quality"
]
},
{
"id": 22,
"type": "ControlNetApplyAdvanced",
"pos": [
901.0437048084135,
232.48767017280198
],
"size": [
235.54470323065357,
186
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "positive",
"type": "CONDITIONING",
"link": 32
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 33
},
{
"name": "control_net",
"type": "CONTROL_NET",
"link": 39
},
{
"name": "image",
"type": "IMAGE",
"link": 31
},
{
"name": "vae",
"shape": 7,
"type": "VAE",
"link": 34
}
],
"outputs": [
{
"name": "positive",
"type": "CONDITIONING",
"links": [
37
]
},
{
"name": "negative",
"type": "CONDITIONING",
"links": [
38
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "ControlNetApplyAdvanced"
},
"widgets_values": [
0.8,
0,
1
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 6,
"type": "CLIPTextEncode",
"pos": [
419.79107049714094,
181.60000000000002
],
"size": [
411.95503173828126,
151.0030493164063
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 3
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
32
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"high quality, RAW photo of a woman with red hair, Punch perm"
]
},
{
"id": 26,
"type": "ControlNetLoader",
"pos": [
573.8122179379016,
601.5646590492796
],
"size": [
257.93388429752065,
58
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CONTROL_NET",
"type": "CONTROL_NET",
"links": [
39
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "ControlNetLoader"
},
"widgets_values": [
"control_v11p_sd15_inpaint_fp16.safetensors"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 24,
"type": "InpaintPreprocessor",
"pos": [
593.6777428604222,
728.2127272727273
],
"size": [
238.068359375,
78
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 35
},
{
"name": "mask",
"type": "MASK",
"link": 36
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
31
]
}
],
"properties": {
"cnr_id": "comfyui_controlnet_aux",
"ver": "12f35647f0d510e03b45a47fb420fe1245a575df",
"Node name for S&R": "InpaintPreprocessor"
},
"widgets_values": [
false
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 3,
"type": "KSampler",
"pos": [
1169.2999999999995,
211.20000000000002
],
"size": [
315,
262
],
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 1
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 37
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 38
},
{
"name": "latent_image",
"type": "LATENT",
"link": 53
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
7
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "KSampler"
},
"widgets_values": [
12345,
"fixed",
20,
8,
"euler",
"normal",
1
]
},
{
"id": 10,
"type": "VAELoader",
"pos": [
395.1303171114555,
1012.947966184447
],
"size": [
231.48760330578511,
58
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
10,
34,
41
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"vae-ft-mse-840000-ema-pruned.safetensors"
]
},
{
"id": 27,
"type": "VAEEncode",
"pos": [
486.7770161500693,
868.15734837917
],
"size": [
140,
46
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "pixels",
"type": "IMAGE",
"link": 43
},
{
"name": "vae",
"type": "VAE",
"link": 41
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
51
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "VAEEncode"
},
"color": "#322",
"bgcolor": "#533"
},
{
"id": 30,
"type": "SetLatentNoiseMask",
"pos": [
654.6523241448506,
916.4969147623518
],
"size": [
180.74765625,
46
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 51
},
{
"name": "mask",
"type": "MASK",
"link": 52
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
53
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "SetLatentNoiseMask"
},
"color": "#322",
"bgcolor": "#533"
},
{
"id": 11,
"type": "LoadImage",
"pos": [
17.462760474058893,
915.0939090909087
],
"size": [
341.9935235795455,
473.93255454545454
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
35,
43
]
},
{
"name": "MASK",
"type": "MASK",
"links": [
36,
52
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "LoadImage",
"image": "clipspace/clipspace-painted-masked-1765071052914.png [input]"
},
"widgets_values": [
"clipspace/clipspace-painted-masked-1765071052914.png [input]",
"image"
]
},
{
"id": 4,
"type": "CheckpointLoaderSimple",
"pos": [
59.95686363636362,
333.7249500000008
],
"size": [
315,
98
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"slot_index": 0,
"links": [
1
]
},
{
"name": "CLIP",
"type": "CLIP",
"slot_index": 1,
"links": [
3,
5
]
},
{
"name": "VAE",
"type": "VAE",
"slot_index": 2,
"links": []
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CheckpointLoaderSimple"
},
"widgets_values": [
"v1-5-pruned-emaonly-fp16.safetensors"
]
}
],
"links": [
[
1,
4,
0,
3,
0,
"MODEL"
],
[
3,
4,
1,
6,
0,
"CLIP"
],
[
5,
4,
1,
7,
0,
"CLIP"
],
[
7,
3,
0,
8,
0,
"LATENT"
],
[
9,
8,
0,
9,
0,
"IMAGE"
],
[
10,
10,
0,
8,
1,
"VAE"
],
[
31,
24,
0,
22,
3,
"IMAGE"
],
[
32,
6,
0,
22,
0,
"CONDITIONING"
],
[
33,
7,
0,
22,
1,
"CONDITIONING"
],
[
34,
10,
0,
22,
4,
"VAE"
],
[
35,
11,
0,
24,
0,
"IMAGE"
],
[
36,
11,
1,
24,
1,
"MASK"
],
[
37,
22,
0,
3,
1,
"CONDITIONING"
],
[
38,
22,
1,
3,
2,
"CONDITIONING"
],
[
39,
26,
0,
22,
2,
"CONTROL_NET"
],
[
41,
10,
0,
27,
1,
"VAE"
],
[
43,
11,
0,
27,
0,
"IMAGE"
],
[
51,
27,
0,
30,
0,
"LATENT"
],
[
52,
11,
1,
30,
1,
"MASK"
],
[
53,
30,
0,
3,
3,
"LATENT"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.683013455365071,
"offset": [
82.5372395259411,
-81.60000000000002
]
},
"frontendVersion": "1.34.6",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true
},
"version": 0.4
}
- 好きな SD1.5 checkpoint(+LoRA)をロード
- 🟨 画像とマスクを
Inpaint Preprocessorに入力し、ControlNet 用の画像に変換- 実際のところ、マスク部分を黒で塗りつぶしているだけです。
- 🟩
Apply ControlNetノードに ControlNet モデル・画像・VAE を入力 - 🟥 上でやった
Set Latent Noise Maskを使った inpainting を組み込む
SDXL / Flux などへのつなぎ
このページは SD1.5 に特化していますが、ほかにも inpainting 手段はいくつか存在します。
- Fooocus inpaint(SDXL 向けの inpaint モデル)
- Flux.fill(Flux 系の塗りつぶし機能)
- Lanpaint(画像編集・inpaint 系ツール)
これらは、また別で取り扱う予定です。