什么是换装?
如果说 ID 转移是“专注于人的 Subject 转移”,那么换装就可以说是 “专注于衣服的 Subject 转移”。
也被称为 虚拟试穿(Virtual try-on / VTON)。
特别是作为商品图像使用时,
- 纹样和细节不改变
- 自然地贴合体型和姿势
这样的 一致性 非常重要。
LoRA
不管怎么说,最确切且灵活的方法,是直接制作衣服的 LoRA。
如果与 inpainting 组合,就可以对特定人物的衣服进行换装。
catvton-flux
虽然有许多专注于 VTON 系任务(衣服换装)的模型,但作为代表例,举一下 catvton-flux。
基本思路与 IC-LoRA / ACE++ 相同,使用 并排布局。

{
"last_node_id": 65,
"last_link_id": 147,
"nodes": [
{
"id": 7,
"type": "CLIPTextEncode",
"pos": [
153.81593322753906,
193.08474731445312
],
"size": [
397.89935302734375,
132.290771484375
],
"flags": {
"collapsed": false
},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 63
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
81
],
"slot_index": 0
}
],
"title": "CLIP Text Encode (Negative Prompt)",
"properties": {
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
""
]
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
1600,
40
],
"size": [
190,
46
],
"flags": {},
"order": 19,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 7
},
{
"name": "vae",
"type": "VAE",
"link": 60
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
102
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 23,
"type": "CLIPTextEncode",
"pos": [
150.60000610351562,
0.6999998688697815
],
"size": [
397.89935302734375,
120.82927703857422
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 62
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
41
],
"slot_index": 0
}
],
"title": "CLIP Text Encode (Positive Prompt)",
"properties": {
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"The pair of images highlights a clothing and its styling on a model, high resolution, 4K, 8K; [IMAGE1] Detailed product shot of a clothing [IMAGE2] The same cloth is worn by a model in a lifestyle setting."
]
},
{
"id": 52,
"type": "ImageToMask",
"pos": [
-160,
1050
],
"size": [
210,
58
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 115
}
],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
118
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "ImageToMask"
},
"widgets_values": [
"red"
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 53,
"type": "PreviewImage",
"pos": [
-160,
1180
],
"size": [
210,
246
],
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 116
}
],
"outputs": [],
"properties": {
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
},
{
"id": 51,
"type": "segformer_b2_clothes",
"pos": [
-510,
1050
],
"size": [
315,
346
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 146
}
],
"outputs": [
{
"name": "mask_image",
"type": "IMAGE",
"links": [
115,
116
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "segformer_b2_clothes"
},
"widgets_values": [
false,
false,
false,
true,
false,
false,
false,
false,
false,
false,
false,
false,
false
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 56,
"type": "LoadImage",
"pos": [
-870,
1010
],
"size": [
290,
510
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
146,
147
],
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": null
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"pexels-photo-3155565.jpg",
"image"
]
},
{
"id": 48,
"type": "LoraLoaderModelOnly",
"pos": [
866.4467163085938,
-173.85031127929688
],
"size": [
315,
82
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 107
}
],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
108
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "LoraLoaderModelOnly"
},
"widgets_values": [
"Flux\\catvton-flux-lora-alpha.safetensors",
1
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 38,
"type": "InpaintModelConditioning",
"pos": [
900,
60
],
"size": [
311,
138
],
"flags": {},
"order": 15,
"mode": 0,
"inputs": [
{
"name": "positive",
"type": "CONDITIONING",
"link": 80
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 81
},
{
"name": "vae",
"type": "VAE",
"link": 82
},
{
"name": "pixels",
"type": "IMAGE",
"link": 109
},
{
"name": "mask",
"type": "MASK",
"link": 110
}
],
"outputs": [
{
"name": "positive",
"type": "CONDITIONING",
"links": [
77
],
"slot_index": 0
},
{
"name": "negative",
"type": "CONDITIONING",
"links": [
78
],
"slot_index": 1
},
{
"name": "latent",
"type": "LATENT",
"links": [
88
],
"slot_index": 2
}
],
"properties": {
"Node name for S&R": "InpaintModelConditioning"
},
"widgets_values": [
true
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 26,
"type": "FluxGuidance",
"pos": [
603.0430908203125,
3.384554386138916
],
"size": [
242.8545684814453,
58
],
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "conditioning",
"type": "CONDITIONING",
"link": 41
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
80
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "FluxGuidance"
},
"widgets_values": [
30
]
},
{
"id": 49,
"type": "AddMaskForICLora",
"pos": [
526.6661376953125,
418.3485412597656
],
"size": [
330,
246
],
"flags": {},
"order": 14,
"mode": 0,
"inputs": [
{
"name": "first_image",
"type": "IMAGE",
"link": 145
},
{
"name": "first_mask",
"type": "MASK",
"link": null,
"shape": 7
},
{
"name": "second_image",
"type": "IMAGE",
"link": 147,
"shape": 7
},
{
"name": "second_mask",
"type": "MASK",
"link": 143,
"shape": 7
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
109,
133
],
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": [
110,
134
],
"slot_index": 1
},
{
"name": "x_offset",
"type": "INT",
"links": null
},
{
"name": "y_offset",
"type": "INT",
"links": null
},
{
"name": "target_width",
"type": "INT",
"links": null
},
{
"name": "target_height",
"type": "INT",
"links": null
},
{
"name": "total_width",
"type": "INT",
"links": null
},
{
"name": "total_height",
"type": "INT",
"links": null
}
],
"properties": {
"Node name for S&R": "AddMaskForICLora"
},
"widgets_values": [
"auto",
1536,
"#FF0000"
]
},
{
"id": 34,
"type": "DualCLIPLoader",
"pos": [
-212.79994201660156,
106.50000762939453
],
"size": [
315,
106
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
"type": "CLIP",
"links": [
62,
63
]
}
],
"properties": {
"Node name for S&R": "DualCLIPLoader"
},
"widgets_values": [
"clip_l.safetensors",
"t5xxl_fp8_e4m3fn.safetensors",
"flux"
]
},
{
"id": 32,
"type": "VAELoader",
"pos": [
597.4476928710938,
254.066162109375
],
"size": [
248.4499969482422,
58
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
60,
82
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"FLUXvae.safetensors"
]
},
{
"id": 57,
"type": "InvertMask",
"pos": [
70,
1050
],
"size": [
140,
26
],
"flags": {},
"order": 12,
"mode": 0,
"inputs": [
{
"name": "mask",
"type": "MASK",
"link": 118
}
],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
142
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "InvertMask"
},
"widgets_values": [],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 3,
"type": "KSampler",
"pos": [
1250,
40
],
"size": [
315,
262
],
"flags": {},
"order": 17,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 108
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 77
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 78
},
{
"name": "latent_image",
"type": "LATENT",
"link": 88
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
7
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "KSampler"
},
"widgets_values": [
1234,
"fixed",
30,
1,
"euler",
"normal",
1
]
},
{
"id": 55,
"type": "LoadImage",
"pos": [
-870,
430
],
"size": [
290,
498.96368408203125
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
145
],
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": null
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"example_garment_00035_00.jpg",
"image"
]
},
{
"id": 65,
"type": "GrowMask",
"pos": [
236.827392578125,
1050
],
"size": [
222.82362365722656,
82
],
"flags": {},
"order": 13,
"mode": 0,
"inputs": [
{
"name": "mask",
"type": "MASK",
"link": 142
}
],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
143
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "GrowMask"
},
"widgets_values": [
20,
true
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 45,
"type": "PreviewImage",
"pos": [
1827.5045166015625,
40
],
"size": [
550,
420
],
"flags": {},
"order": 20,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 102
}
],
"outputs": [],
"properties": {
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
},
{
"id": 63,
"type": "PreviewImage",
"pos": [
1081.99609375,
481.18829345703125
],
"size": [
361.63531494140625,
259.43603515625
],
"flags": {},
"order": 18,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 141
}
],
"outputs": [],
"properties": {
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
},
{
"id": 62,
"type": "JoinImageWithAlpha",
"pos": [
885.78173828125,
481.5660705566406
],
"size": [
176.39999389648438,
46
],
"flags": {},
"order": 16,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 133
},
{
"name": "alpha",
"type": "MASK",
"link": 134
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
141
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "JoinImageWithAlpha"
},
"widgets_values": []
},
{
"id": 31,
"type": "UNETLoader",
"pos": [
517.320556640625,
-173.85031127929688
],
"size": [
311,
82
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
107
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "UNETLoader"
},
"widgets_values": [
"Flux\\flux1-fill-dev.safetensors",
"fp8_e4m3fn"
],
"color": "#232",
"bgcolor": "#353"
}
],
"links": [
[
7,
3,
0,
8,
0,
"LATENT"
],
[
41,
23,
0,
26,
0,
"CONDITIONING"
],
[
60,
32,
0,
8,
1,
"VAE"
],
[
62,
34,
0,
23,
0,
"CLIP"
],
[
63,
34,
0,
7,
0,
"CLIP"
],
[
77,
38,
0,
3,
1,
"CONDITIONING"
],
[
78,
38,
1,
3,
2,
"CONDITIONING"
],
[
80,
26,
0,
38,
0,
"CONDITIONING"
],
[
81,
7,
0,
38,
1,
"CONDITIONING"
],
[
82,
32,
0,
38,
2,
"VAE"
],
[
88,
38,
2,
3,
3,
"LATENT"
],
[
102,
8,
0,
45,
0,
"IMAGE"
],
[
107,
31,
0,
48,
0,
"MODEL"
],
[
108,
48,
0,
3,
0,
"MODEL"
],
[
109,
49,
0,
38,
3,
"IMAGE"
],
[
110,
49,
1,
38,
4,
"MASK"
],
[
115,
51,
0,
52,
0,
"IMAGE"
],
[
116,
51,
0,
53,
0,
"IMAGE"
],
[
118,
52,
0,
57,
0,
"MASK"
],
[
133,
49,
0,
62,
0,
"IMAGE"
],
[
134,
49,
1,
62,
1,
"MASK"
],
[
141,
62,
0,
63,
0,
"IMAGE"
],
[
142,
57,
0,
65,
0,
"MASK"
],
[
143,
65,
0,
49,
3,
"MASK"
],
[
145,
55,
0,
49,
0,
"IMAGE"
],
[
146,
56,
0,
51,
0,
"IMAGE"
],
[
147,
56,
0,
49,
2,
"IMAGE"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 1.2284597357367277,
"offset": [
-695.7444488005788,
165.3071399176139
]
}
},
"version": 0.4
}
- 左侧:人物图像
- 右侧:想穿的衣服图像 + 蒙版
模型一边看着两侧,一边生成“左边的人物穿着右边的衣服的图像”。
指示基图像编辑(并排)
不支持多参考的基于指令的图像编辑模型,本来是无法做到“将图像 A 的要素带到图像 B”的。
但是,通过与 IC-LoRA / ACE++ 时同样的 并排技巧 以及为此学习的 LoRA,可以做到类似的事情。

{
"id": "18404b37-92b0-4d11-a39c-ae941838eb83",
"revision": 0,
"last_node_id": 88,
"last_link_id": 144,
"nodes": [
{
"id": 33,
"type": "CLIPTextEncode",
"pos": [
517.7193603515625,
378
],
"size": [
336.888427734375,
103.97698974609375
],
"flags": {
"collapsed": true
},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 118
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
99
]
}
],
"title": "CLIP Text Encode (Negative Prompt)",
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
""
]
},
{
"id": 52,
"type": "VAEEncode",
"pos": [
719.3842163085938,
468.98004150390625
],
"size": [
140,
46
],
"flags": {},
"order": 13,
"mode": 0,
"inputs": [
{
"name": "pixels",
"type": "IMAGE",
"link": 127
},
{
"name": "vae",
"type": "VAE",
"link": 77
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
76,
116
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.41",
"Node name for S&R": "VAEEncode"
},
"widgets_values": []
},
{
"id": 77,
"type": "PreviewImage",
"pos": [
744.884033203125,
699.7015991210938
],
"size": [
406.20001220703125,
348.29998779296875
],
"flags": {},
"order": 14,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 134
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.46",
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
},
{
"id": 68,
"type": "FluxGuidance",
"pos": [
1115.2528076171875,
190
],
"size": [
211.3223114013672,
58
],
"flags": {},
"order": 16,
"mode": 0,
"inputs": [
{
"name": "conditioning",
"type": "CONDITIONING",
"link": 114
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
115
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.41",
"Node name for S&R": "FluxGuidance"
},
"widgets_values": [
30
]
},
{
"id": 75,
"type": "ImageStitch",
"pos": [
165.8333740234375,
468.98004150390625
],
"size": [
270,
150
],
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "image1",
"type": "IMAGE",
"link": 131
},
{
"name": "image2",
"shape": 7,
"type": "IMAGE",
"link": 132
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
130
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.46",
"Node name for S&R": "ImageStitch"
},
"widgets_values": [
"right",
true,
0,
"white"
]
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
1700.061767578125,
194.12423706054688
],
"size": [
140,
46
],
"flags": {},
"order": 18,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 52
},
{
"name": "vae",
"type": "VAE",
"link": 62
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
135
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 78,
"type": "SaveImage",
"pos": [
1887.16259765625,
192.1141815185547
],
"size": [
733.415771484375,
629.1437377929688
],
"flags": {},
"order": 19,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 135
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.56"
},
"widgets_values": [
"ComfyUI"
]
},
{
"id": 73,
"type": "FluxKontextImageScale",
"pos": [
483.7315673828125,
468.98004150390625
],
"size": [
187.75448608398438,
26
],
"flags": {},
"order": 12,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 130
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
127,
134
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.43",
"Node name for S&R": "FluxKontextImageScale"
},
"widgets_values": []
},
{
"id": 69,
"type": "DualCLIPLoader",
"pos": [
174.92930603027344,
261.95574951171875
],
"size": [
270,
130
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
"type": "CLIP",
"links": [
117,
118
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.41",
"Node name for S&R": "DualCLIPLoader"
},
"widgets_values": [
"clip_l.safetensors",
"t5xxl_fp8_e4m3fn.safetensors",
"flux",
"default"
]
},
{
"id": 43,
"type": "VAELoader",
"pos": [
462.1297302246094,
613.5346069335938
],
"size": [
234.05543518066406,
58
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
62,
77
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"ae.safetensors"
]
},
{
"id": 51,
"type": "ReferenceLatent",
"pos": [
893.0234375,
190
],
"size": [
197.712890625,
46
],
"flags": {
"collapsed": false
},
"order": 15,
"mode": 0,
"inputs": [
{
"name": "conditioning",
"type": "CONDITIONING",
"link": 74
},
{
"name": "latent",
"shape": 7,
"type": "LATENT",
"link": 76
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
114
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.41",
"Node name for S&R": "ReferenceLatent"
},
"widgets_values": []
},
{
"id": 74,
"type": "LoraLoaderModelOnly",
"pos": [
1055.795166015625,
38.038578033447266
],
"size": [
261.9280090332031,
82
],
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 128
}
],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
129
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.46",
"Node name for S&R": "LoraLoaderModelOnly"
},
"widgets_values": [
"Flux.1 Kontext\\03\\Cross-Image Try-On Flux Kontext_v0.2.safetensors",
1
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 87,
"type": "MarkdownNote",
"pos": [
-454.7641906738281,
988.8134155273438
],
"size": [
275.29998779296875,
88
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [],
"properties": {},
"widgets_values": [
"Load the reference image for the clothing."
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 88,
"type": "MarkdownNote",
"pos": [
-144.00006103515625,
1108.5716552734375
],
"size": [
275.29998779296875,
88
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [],
"properties": {},
"widgets_values": [
"Load the image of the person whose clothes will be changed."
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 6,
"type": "CLIPTextEncode",
"pos": [
516.5379638671875,
190
],
"size": [
339.84503173828125,
123.01304626464844
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 117
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
74
]
}
],
"title": "CLIP Text Encode (Positive Prompt)",
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"Change all clothes on the right to match the left."
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 67,
"type": "MarkdownNote",
"pos": [
307.58013916015625,
-186.25665283203125
],
"size": [
406.0926818847656,
282.7126159667969
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [],
"outputs": [],
"properties": {},
"widgets_values": [
"## models\n\n- [flux1-kontext-dev.gguf](https://huggingface.co/QuantStack/FLUX.1-Kontext-dev-GGUF/tree/main)\n- [Cross-Image Try-On Flux Kontext_v0.2.safetensors](https://huggingface.co/nomadoor/crossimage-tryon-fluxkontext/blob/main/Cross-Image%20Try-On%20Flux%20Kontext_v0.2.safetensors)\n- [clip_l.safetensors](https://huggingface.co/comfyanonymous/flux_text_encoders/blob/main/clip_l.safetensors)\n- [t5xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/comfyanonymous/flux_text_encoders/blob/main/t5xxl_fp8_e4m3fn_scaled.safetensors)\n- [ae.safetensors](https://huggingface.co/Comfy-Org/Omnigen2_ComfyUI_repackaged/tree/main/split_files/vae)\n\n```\n📂ComfyUI/\n└── 📂models/\n ├── 📂clip/\n │ ├── clip_l.safetensors\n │ └── t5xxl_fp8_e4m3fn.safetensors\n ├── 📂loras/\n │ └── Cross-Image Try-On Flux Kontext_v0.2.safetensors\n ├── 📂unet/\n │ └── flux1-kontext-dev.gguf\n └── 📂vae/\n └── ae.safetensors\n```"
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 71,
"type": "UnetLoaderGGUF",
"pos": [
744.73046875,
38.038578033447266
],
"size": [
270,
58
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
128
]
}
],
"properties": {
"cnr_id": "ComfyUI-GGUF",
"ver": "b3ec875a68d94b758914fd48d30571d953bb7a54",
"Node name for S&R": "UnetLoaderGGUF"
},
"widgets_values": [
"FLUX_gguf\\flux1-kontext-dev-Q4_K_M.gguf"
]
},
{
"id": 31,
"type": "KSampler",
"pos": [
1355.8184814453125,
194.12423706054688
],
"size": [
315,
262
],
"flags": {},
"order": 17,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 129
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 115
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 99
},
{
"name": "latent_image",
"type": "LATENT",
"link": 116
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
52
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "KSampler"
},
"widgets_values": [
603008709320546,
"randomize",
20,
1,
"euler",
"simple",
1
]
},
{
"id": 53,
"type": "LoadImage",
"pos": [
-450.5650329589844,
468.98004150390625
],
"size": [
277.51690673828125,
455.66180419921875
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
131
]
},
{
"name": "MASK",
"type": "MASK",
"links": null
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.41",
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"pexels-photo-33163411.jpg",
"image"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 76,
"type": "LoadImage",
"pos": [
-142.36582946777344,
583.6825561523438
],
"size": [
277.51690673828125,
455.66180419921875
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
132
]
},
{
"name": "MASK",
"type": "MASK",
"links": null
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.41",
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"woman.png",
"image"
],
"color": "#232",
"bgcolor": "#353"
}
],
"links": [
[
52,
31,
0,
8,
0,
"LATENT"
],
[
62,
43,
0,
8,
1,
"VAE"
],
[
74,
6,
0,
51,
0,
"CONDITIONING"
],
[
76,
52,
0,
51,
1,
"LATENT"
],
[
77,
43,
0,
52,
1,
"VAE"
],
[
99,
33,
0,
31,
2,
"CONDITIONING"
],
[
114,
51,
0,
68,
0,
"CONDITIONING"
],
[
115,
68,
0,
31,
1,
"CONDITIONING"
],
[
116,
52,
0,
31,
3,
"LATENT"
],
[
117,
69,
0,
6,
0,
"CLIP"
],
[
118,
69,
0,
33,
0,
"CLIP"
],
[
127,
73,
0,
52,
0,
"IMAGE"
],
[
128,
71,
0,
74,
0,
"MODEL"
],
[
129,
74,
0,
31,
0,
"MODEL"
],
[
130,
75,
0,
73,
0,
"IMAGE"
],
[
131,
53,
0,
75,
0,
"IMAGE"
],
[
132,
76,
0,
75,
1,
"IMAGE"
],
[
134,
73,
0,
77,
0,
"IMAGE"
],
[
135,
8,
0,
78,
0,
"IMAGE"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.6830134553650709,
"offset": [
240.85639710021658,
74.53751462207948
]
},
"frontendVersion": "1.26.8",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true
},
"version": 0.4
}
- nomadoor/crossimage-tryon-fluxkontext
- 左侧:人物图像
- 右侧:想穿的衣服图像 + 蒙版
模型一边看着两侧,一边生成“左边的人物穿着右边的衣服的图像”。
因为想炫耀一下,所以我把我自己制作的 LoRA 作为参考拿了出来,但 1 天后就发布了性能远超它的 Qwen-Image-Edit 用 LoRA ☹️ Clothes Try On (Clothing Transfer) - Qwen Edit
使用基于指令的图像编辑的最大优点是 不需要蒙版。
例如,想给穿迷你裙的人物穿上牛仔裤时,通常的 VTON 不仅要把迷你裙部分,连变成牛仔裤的腿部部分也必须包含在内做成蒙版。 自动生成合并了这两个区域的蒙版是非常困难的。
相对的,基于指令的图像编辑不需要蒙版,因此无需在意那些麻烦事就能进行换装。
指示基图像编辑(多参考)
如果是支持多参考的基于指令的图像编辑模型,那就简单了。
将想换装的人物和服装分别传递给不同的插槽,只要发出“让这个人物穿上这件衣服”的指令,就能进行换装。