What is Region-Limited Generation?
Layout is an important element in design.
Simply generating an image with the prompt "a woman leaning against a wall in the city" will not create an attractive work.
A brick wall on the right edge of the screen, a woman leaning against it, a street lamp right in front of it, and reducing objects on the left side of the screen to create white space...
The technology for generating images with objects placed where you want them is "Region-Limited Generation."
Instructing Position with Prompts
The simplest method is to write the positional relationship directly in the prompt.

{
"id": "18404b37-92b0-4d11-a39c-ae941838eb83",
"revision": 0,
"last_node_id": 45,
"last_link_id": 64,
"nodes": [
{
"id": 35,
"type": "FluxGuidance",
"pos": [
836,
190
],
"size": [
211.60000610351562,
58
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "conditioning",
"type": "CONDITIONING",
"link": 56
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
57
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "FluxGuidance"
},
"widgets_values": [
3.5
],
"color": "#2a363b",
"bgcolor": "#3f5159"
},
{
"id": 38,
"type": "PreviewImage",
"pos": [
1568,
190
],
"size": [
430.8328552246094,
446.9476623535156
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 58
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
1408,
190
],
"size": [
140,
46
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 52
},
{
"name": "vae",
"type": "VAE",
"link": 62
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
58
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 43,
"type": "VAELoader",
"pos": [
1112.7188720703125,
507.60614013671875
],
"size": [
270,
58
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
62
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"ae.safetensors"
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 33,
"type": "CLIPTextEncode",
"pos": [
518,
378
],
"size": [
414.71820068359375,
108.47611236572266
],
"flags": {
"collapsed": true
},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 60
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
55
]
}
],
"title": "CLIP Text Encode (Negative Prompt)",
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
""
]
},
{
"id": 27,
"type": "EmptySD3LatentImage",
"pos": [
722,
471
],
"size": [
315,
106
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
51
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "EmptySD3LatentImage"
},
"widgets_values": [
1024,
1024,
1
]
},
{
"id": 42,
"type": "DualCLIPLoader",
"pos": [
185.0587921142578,
235.1116485595703
],
"size": [
270,
130
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
"type": "CLIP",
"links": [
59,
60
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "DualCLIPLoader"
},
"widgets_values": [
"clip_l.safetensors",
"t5xxl_fp8_e4m3fn.safetensors",
"flux",
"default"
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 45,
"type": "UnetLoaderGGUF",
"pos": [
779.269287109375,
59.45874786376953
],
"size": [
270,
58
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
64
]
}
],
"properties": {
"cnr_id": "ComfyUI-GGUF",
"ver": "b3ec875a68d94b758914fd48d30571d953bb7a54",
"Node name for S&R": "UnetLoaderGGUF"
},
"widgets_values": [
"FLUX_gguf\\flux1-dev-Q5_0.gguf"
]
},
{
"id": 6,
"type": "CLIPTextEncode",
"pos": [
507,
190
],
"size": [
301.84503173828125,
128.01304626464844
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 59
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
56
]
}
],
"title": "CLIP Text Encode (Positive Prompt)",
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"A photo showing a banana on the left and an apple on the right placed on a desk\n"
]
},
{
"id": 31,
"type": "KSampler",
"pos": [
1070,
190
],
"size": [
315,
262
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 64
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 57
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 55
},
{
"name": "latent_image",
"type": "LATENT",
"link": 51
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
52
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "KSampler"
},
"widgets_values": [
12345,
"fixed",
20,
1,
"euler",
"normal",
1
]
}
],
"links": [
[
51,
27,
0,
31,
3,
"LATENT"
],
[
52,
31,
0,
8,
0,
"LATENT"
],
[
55,
33,
0,
31,
2,
"CONDITIONING"
],
[
56,
6,
0,
35,
0,
"CONDITIONING"
],
[
57,
35,
0,
31,
1,
"CONDITIONING"
],
[
58,
8,
0,
38,
0,
"IMAGE"
],
[
59,
42,
0,
6,
0,
"CLIP"
],
[
60,
42,
0,
33,
0,
"CLIP"
],
[
62,
43,
0,
8,
1,
"VAE"
],
[
64,
45,
0,
31,
0,
"MODEL"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.9090909090909091,
"offset": [
-85.05879211425781,
40.54125213623047
]
},
"frontendVersion": "1.23.4",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true
},
"version": 0.4
}
"Banana on the left, apple on the right"
Stable Diffusion's text encoder could hardly understand positional relationships, but models since Flux have started to reflect positional relationships to some extent.
Still, it tends to break down with complex compositions, and it is a means of conveying loose layout wishes rather than strict region specification.
Repeating Inpainting
A method of generating an image once and then repeating Inpainting many times.

{
"id": "18404b37-92b0-4d11-a39c-ae941838eb83",
"revision": 0,
"last_node_id": 47,
"last_link_id": 72,
"nodes": [
{
"id": 8,
"type": "VAEDecode",
"pos": [
1774.15185546875,
167.77081298828125
],
"size": [
140,
46
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 52
},
{
"name": "vae",
"type": "VAE",
"link": 62
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
58
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 42,
"type": "DualCLIPLoader",
"pos": [
185.0587921142578,
235.1116485595703
],
"size": [
270,
130
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
"type": "CLIP",
"links": [
59,
60
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "DualCLIPLoader"
},
"widgets_values": [
"clip_l.safetensors",
"t5xxl_fp8_e4m3fn.safetensors",
"flux",
"default"
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 35,
"type": "FluxGuidance",
"pos": [
836,
190
],
"size": [
211.60000610351562,
58
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "conditioning",
"type": "CONDITIONING",
"link": 56
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
65
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "FluxGuidance"
},
"widgets_values": [
3.5
],
"color": "#2a363b",
"bgcolor": "#3f5159"
},
{
"id": 33,
"type": "CLIPTextEncode",
"pos": [
518,
378
],
"size": [
414.71820068359375,
108.47611236572266
],
"flags": {
"collapsed": true
},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 60
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
66
]
}
],
"title": "CLIP Text Encode (Negative Prompt)",
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
""
]
},
{
"id": 47,
"type": "InpaintModelConditioning",
"pos": [
1136.67138671875,
187.0927276611328
],
"size": [
270,
138
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "positive",
"type": "CONDITIONING",
"link": 65
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 66
},
{
"name": "vae",
"type": "VAE",
"link": 72
},
{
"name": "pixels",
"type": "IMAGE",
"link": 67
},
{
"name": "mask",
"type": "MASK",
"link": 68
}
],
"outputs": [
{
"name": "positive",
"type": "CONDITIONING",
"links": [
69
]
},
{
"name": "negative",
"type": "CONDITIONING",
"links": [
70
]
},
{
"name": "latent",
"type": "LATENT",
"links": [
71
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.43",
"Node name for S&R": "InpaintModelConditioning"
},
"widgets_values": [
true
]
},
{
"id": 43,
"type": "VAELoader",
"pos": [
767.5374755859375,
432.8213806152344
],
"size": [
270,
58
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
62,
72
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"ae.safetensors"
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 38,
"type": "PreviewImage",
"pos": [
1934.15185546875,
167.77081298828125
],
"size": [
444.1662292480469,
578.058837890625
],
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 58
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
},
{
"id": 45,
"type": "UnetLoaderGGUF",
"pos": [
1133.6611328125,
68.74612426757812
],
"size": [
270,
58
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
64
]
}
],
"properties": {
"cnr_id": "ComfyUI-GGUF",
"ver": "b3ec875a68d94b758914fd48d30571d953bb7a54",
"Node name for S&R": "UnetLoaderGGUF"
},
"widgets_values": [
"FLUX_gguf\\flux1-fill-dev-Q4_K_S.gguf"
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 46,
"type": "LoadImage",
"pos": [
753.7998657226562,
559.425048828125
],
"size": [
296.30230712890625,
481.77783203125
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
67
]
},
{
"name": "MASK",
"type": "MASK",
"links": [
68
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.43",
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"clipspace/clipspace-mask-2027464.8999999762.png [input]",
"image"
]
},
{
"id": 6,
"type": "CLIPTextEncode",
"pos": [
507,
190
],
"size": [
301.84503173828125,
128.01304626464844
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 59
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
56
]
}
],
"title": "CLIP Text Encode (Positive Prompt)",
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"the red cone on top of the cube"
]
},
{
"id": 31,
"type": "KSampler",
"pos": [
1436.15185546875,
167.77081298828125
],
"size": [
315,
262
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 64
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 69
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 70
},
{
"name": "latent_image",
"type": "LATENT",
"link": 71
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
52
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.39",
"Node name for S&R": "KSampler"
},
"widgets_values": [
123,
"fixed",
20,
1,
"euler",
"normal",
1
]
}
],
"links": [
[
52,
31,
0,
8,
0,
"LATENT"
],
[
56,
6,
0,
35,
0,
"CONDITIONING"
],
[
58,
8,
0,
38,
0,
"IMAGE"
],
[
59,
42,
0,
6,
0,
"CLIP"
],
[
60,
42,
0,
33,
0,
"CLIP"
],
[
62,
43,
0,
8,
1,
"VAE"
],
[
64,
45,
0,
31,
0,
"MODEL"
],
[
65,
35,
0,
47,
0,
"CONDITIONING"
],
[
66,
33,
0,
47,
1,
"CONDITIONING"
],
[
67,
46,
0,
47,
3,
"IMAGE"
],
[
68,
46,
1,
47,
4,
"MASK"
],
[
69,
47,
0,
31,
1,
"CONDITIONING"
],
[
70,
47,
1,
31,
2,
"CONDITIONING"
],
[
71,
47,
2,
31,
3,
"LATENT"
],
[
72,
43,
0,
47,
2,
"VAE"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.620921323059155,
"offset": [
-85.05879211425781,
31.253875732421875
]
},
"frontendVersion": "1.23.4",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true
},
"version": 0.4
}
-
- Generate a base image
-
- Mask the area you want to edit and Inpaint
-
- Change the mask to another area and Inpaint as needed
You might think it's not very smart, but it is a reliable and stable method. Prompts do not mix, and LoRA basically does not affect outside the mask. Each region can be treated as a completely independent step.
The weakness is that since they are generated separately, interactions between subjects are not possible. Images of people shaking hands, for example, tend to have mismatched lines of sight or look unnatural.
Conditioning Set Area (Regional Prompting)
A method that attempts to apply different text conditions to each position of the image. It uses the Cross-Attention layer to use different prompts for each region.

{
"id": "e524c983-e762-4a7d-a5cb-d0f3a36bde28",
"revision": 0,
"last_node_id": 21,
"last_link_id": 27,
"nodes": [
{
"id": 15,
"type": "ConditioningCombine",
"pos": [
795.6497192382812,
-233.876220703125
],
"size": [
211.060546875,
46
],
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "conditioning_1",
"type": "CONDITIONING",
"link": 18
},
{
"name": "conditioning_2",
"type": "CONDITIONING",
"link": 19
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
22
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "ConditioningCombine"
},
"widgets_values": [],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 19,
"type": "ConditioningCombine",
"pos": [
1058.4512939453125,
60.54191207885742
],
"size": [
211.060546875,
46
],
"flags": {},
"order": 12,
"mode": 0,
"inputs": [
{
"name": "conditioning_1",
"type": "CONDITIONING",
"link": 22
},
{
"name": "conditioning_2",
"type": "CONDITIONING",
"link": 27
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
24
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "ConditioningCombine"
},
"widgets_values": [],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 13,
"type": "CLIPTextEncode",
"pos": [
62.78541564941406,
45.40777587890625
],
"size": [
341.8740234375,
152.28765869140625
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 15
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
14
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"RAW photo of an apple"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 6,
"type": "CLIPTextEncode",
"pos": [
62.78541564941406,
-569.9358520507812
],
"size": [
341.8740234375,
152.28765869140625
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 3
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
11
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"RAW photo of a banana"
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 21,
"type": "LoadImageMask",
"pos": [
62.78541564941406,
262.41204833984375
],
"size": [
341.8740234375,
333.62274169921875
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
26
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "LoadImageMask"
},
"widgets_values": [
"apple (2).png",
"red",
"image"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 20,
"type": "LoadImageMask",
"pos": [
62.78541564941406,
-352.93157958984375
],
"size": [
341.8740234375,
333.62274169921875
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
25
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "LoadImageMask"
},
"widgets_values": [
"banana (2).png",
"red",
"image"
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 17,
"type": "CLIPTextEncode",
"pos": [
62.78541564941406,
660.7514038085938
],
"size": [
341.8740234375,
152.28765869140625
],
"flags": {
"collapsed": false
},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 21
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
27
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"A beautiful analog-style photograph,wood table"
],
"color": "#223",
"bgcolor": "#335"
},
{
"id": 7,
"type": "CLIPTextEncode",
"pos": [
62.78541564941406,
877.7556762695312
],
"size": [
341.8740234375,
104.42913055419922
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 5
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
6
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"text, watermark, low quality"
]
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
1721.425537109375,
180.1886444091797
],
"size": [
177.93228149414062,
46
],
"flags": {},
"order": 14,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 7
},
{
"name": "vae",
"type": "VAE",
"link": 13
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
10
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 5,
"type": "EmptyLatentImage",
"pos": [
1020.22705078125,
334.2445068359375
],
"size": [
315,
106
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
2
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "EmptyLatentImage"
},
"widgets_values": [
768,
768,
1
]
},
{
"id": 14,
"type": "ConditioningSetMask",
"pos": [
458.8059997558594,
45.40777587890625
],
"size": [
270,
102
],
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "conditioning",
"type": "CONDITIONING",
"link": 14
},
{
"name": "mask",
"type": "MASK",
"link": 26
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
19
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "ConditioningSetMask"
},
"widgets_values": [
1.5,
"default"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 11,
"type": "ConditioningSetMask",
"pos": [
465.2082824707031,
-569.9358520507812
],
"size": [
270,
102
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "conditioning",
"type": "CONDITIONING",
"link": 11
},
{
"name": "mask",
"type": "MASK",
"link": 25
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
18
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "ConditioningSetMask"
},
"widgets_values": [
1.5,
"default"
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 4,
"type": "CheckpointLoaderSimple",
"pos": [
-419.08935546875,
178.1886444091797
],
"size": [
315,
98
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"slot_index": 0,
"links": [
1
]
},
{
"name": "CLIP",
"type": "CLIP",
"slot_index": 1,
"links": [
3,
5,
15,
21
]
},
{
"name": "VAE",
"type": "VAE",
"slot_index": 2,
"links": []
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CheckpointLoaderSimple",
"models": [
{
"name": "v1-5-pruned-emaonly-fp16.safetensors",
"url": "https://huggingface.co/Comfy-Org/stable-diffusion-v1-5-archive/resolve/main/v1-5-pruned-emaonly-fp16.safetensors?download=true",
"directory": "checkpoints"
}
]
},
"widgets_values": [
"📷-v1.x\\real-dream-15.safetensors"
]
},
{
"id": 12,
"type": "VAELoader",
"pos": [
1723.3863525390625,
289.3052978515625
],
"size": [
270,
58
],
"flags": {
"collapsed": true
},
"order": 4,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
13
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"vae-ft-mse-840000-ema-pruned.safetensors"
]
},
{
"id": 3,
"type": "KSampler",
"pos": [
1375.4254150390625,
178.1886444091797
],
"size": [
315,
262
],
"flags": {},
"order": 13,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 1
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 24
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 6
},
{
"name": "latent_image",
"type": "LATENT",
"link": 2
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
7
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "KSampler"
},
"widgets_values": [
4444,
"fixed",
20,
8,
"dpmpp_2m",
"karras",
1
]
},
{
"id": 10,
"type": "PreviewImage",
"pos": [
1984.327880859375,
-58.26746368408203
],
"size": [
582.5722045898438,
626.802978515625
],
"flags": {},
"order": 15,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 10
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
}
],
"links": [
[
1,
4,
0,
3,
0,
"MODEL"
],
[
2,
5,
0,
3,
3,
"LATENT"
],
[
3,
4,
1,
6,
0,
"CLIP"
],
[
5,
4,
1,
7,
0,
"CLIP"
],
[
6,
7,
0,
3,
2,
"CONDITIONING"
],
[
7,
3,
0,
8,
0,
"LATENT"
],
[
10,
8,
0,
10,
0,
"IMAGE"
],
[
11,
6,
0,
11,
0,
"CONDITIONING"
],
[
13,
12,
0,
8,
1,
"VAE"
],
[
14,
13,
0,
14,
0,
"CONDITIONING"
],
[
15,
4,
1,
13,
0,
"CLIP"
],
[
18,
11,
0,
15,
0,
"CONDITIONING"
],
[
19,
14,
0,
15,
1,
"CONDITIONING"
],
[
21,
4,
1,
17,
0,
"CLIP"
],
[
22,
15,
0,
19,
0,
"CONDITIONING"
],
[
24,
19,
0,
3,
1,
"CONDITIONING"
],
[
25,
20,
0,
11,
1,
"MASK"
],
[
26,
21,
0,
14,
1,
"MASK"
],
[
27,
17,
0,
19,
1,
"CONDITIONING"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 1.030876132930519,
"offset": [
-181.07522170324756,
348.5181404697794
]
},
"frontendVersion": "1.24.1",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true
},
"version": 0.4
}
The theory is beautiful, but in reality, boundaries often blur or do not switch cleanly, and practicality is not high.
Also, you cannot specify LoRA by region.
Latent Composite (Composition in Latent Space)
A method of synthesizing images at the latent space stage.

{
"id": "e524c983-e762-4a7d-a5cb-d0f3a36bde28",
"revision": 0,
"last_node_id": 47,
"last_link_id": 69,
"nodes": [
{
"id": 6,
"type": "CLIPTextEncode",
"pos": [
-74.14247131347656,
-228.59242248535156
],
"size": [
341.8740234375,
152.28765869140625
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 3
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
37
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"RAW photo of a banana"
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 47,
"type": "VAELoader",
"pos": [
1464.0460205078125,
-286.2972106933594
],
"size": [
270,
58
],
"flags": {
"collapsed": true
},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
66
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"vae-ft-mse-840000-ema-pruned.safetensors"
]
},
{
"id": 12,
"type": "VAELoader",
"pos": [
2289.68603515625,
149.98648071289062
],
"size": [
270,
58
],
"flags": {
"collapsed": true
},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
13
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"vae-ft-mse-840000-ema-pruned.safetensors"
]
},
{
"id": 43,
"type": "EmptyLatentImage",
"pos": [
-74.14247131347656,
3.208286762237549
],
"size": [
341.8740234375,
106
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
64
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "EmptyLatentImage"
},
"widgets_values": [
768,
512,
1
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 17,
"type": "CLIPTextEncode",
"pos": [
-74.14247131347656,
-654.4688720703125
],
"size": [
341.8740234375,
152.28765869140625
],
"flags": {
"collapsed": false
},
"order": 12,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 21
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
40
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"A beautiful analog-style photograph,table top"
],
"color": "#223",
"bgcolor": "#335"
},
{
"id": 42,
"type": "EmptyLatentImage",
"pos": [
-74.14247131347656,
-414.10546875
],
"size": [
341.8740234375,
106
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
63
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "EmptyLatentImage"
},
"widgets_values": [
768,
768,
1
],
"color": "#223",
"bgcolor": "#335"
},
{
"id": 13,
"type": "CLIPTextEncode",
"pos": [
247.21710205078125,
380.4800109863281
],
"size": [
341.8740234375,
152.28765869140625
],
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 15
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
44
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"RAW photo of an apple"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 44,
"type": "EmptyLatentImage",
"pos": [
247.21710205078125,
612.2808837890625
],
"size": [
341.8740234375,
106
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
65
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "EmptyLatentImage"
},
"widgets_values": [
768,
512,
1
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 38,
"type": "PrimitiveNode",
"pos": [
-74.14247131347656,
188.72134399414062
],
"size": [
341.8740234375,
82
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "INT",
"type": "INT",
"widget": {
"name": "end_at_step"
},
"links": [
46,
47,
48,
50
]
}
],
"title": "end_at_step",
"properties": {
"Run widget replace on values": false
},
"widgets_values": [
10,
"fixed"
]
},
{
"id": 33,
"type": "LatentCompositeMasked",
"pos": [
833.8609008789062,
-348.69537353515625
],
"size": [
270,
146
],
"flags": {},
"order": 17,
"mode": 0,
"inputs": [
{
"name": "destination",
"type": "LATENT",
"link": 52
},
{
"name": "source",
"type": "LATENT",
"link": 53
},
{
"name": "mask",
"shape": 7,
"type": "MASK",
"link": 54
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
55
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "LatentCompositeMasked"
},
"widgets_values": [
0,
0,
true
]
},
{
"id": 40,
"type": "LatentCompositeMasked",
"pos": [
1161.2132568359375,
-135.64053344726562
],
"size": [
270,
146
],
"flags": {},
"order": 18,
"mode": 0,
"inputs": [
{
"name": "destination",
"type": "LATENT",
"link": 55
},
{
"name": "source",
"type": "LATENT",
"link": 57
},
{
"name": "mask",
"shape": 7,
"type": "MASK",
"link": 56
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
58,
69
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "LatentCompositeMasked"
},
"widgets_values": [
0,
0,
true
]
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
2287.659423828125,
38.063751220703125
],
"size": [
177.93228149414062,
46
],
"flags": {},
"order": 21,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 51
},
{
"name": "vae",
"type": "VAE",
"link": 13
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
10
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 10,
"type": "PreviewImage",
"pos": [
2515.8095703125,
38.063751220703125
],
"size": [
582.5722045898438,
626.802978515625
],
"flags": {},
"order": 23,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 10
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
},
{
"id": 35,
"type": "LoadImageMask",
"pos": [
837.768310546875,
-145.2086639404297
],
"size": [
214.080078125,
330
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
54
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "LoadImageMask"
},
"widgets_values": [
"banana (2).png",
"red",
"image"
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 34,
"type": "LoadImageMask",
"pos": [
1166.2705078125,
73.07683563232422
],
"size": [
214.080078125,
330
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
56
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "LoadImageMask"
},
"widgets_values": [
"apple (2).png",
"red",
"image"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 45,
"type": "VAEDecode",
"pos": [
1454.0460205078125,
-376.2972106933594
],
"size": [
177.93228149414062,
46
],
"flags": {},
"order": 20,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 69
},
{
"name": "vae",
"type": "VAE",
"link": 66
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
67
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 46,
"type": "PreviewImage",
"pos": [
1674.0460205078125,
-376.2972106933594
],
"size": [
210,
258
],
"flags": {},
"order": 22,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 67
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
},
{
"id": 7,
"type": "CLIPTextEncode",
"pos": [
-78.64060974121094,
875.3549194335938
],
"size": [
341.8740234375,
104.42913055419922
],
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 5
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
38,
41,
45,
61
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"text, watermark, low quality"
]
},
{
"id": 41,
"type": "CLIPTextEncode",
"pos": [
1516.0802001953125,
126.6052017211914
],
"size": [
341.8740234375,
152.28765869140625
],
"flags": {},
"order": 13,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 59
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
60
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"RAW photo of a beautiful analog-style photograph,wood table,an apple,a banan"
]
},
{
"id": 4,
"type": "CheckpointLoaderSimple",
"pos": [
-613.933349609375,
38.063751220703125
],
"size": [
315,
98
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"slot_index": 0,
"links": [
39,
42,
43,
62
]
},
{
"name": "CLIP",
"type": "CLIP",
"slot_index": 1,
"links": [
3,
5,
15,
21,
59
]
},
{
"name": "VAE",
"type": "VAE",
"slot_index": 2,
"links": []
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CheckpointLoaderSimple",
"models": [
{
"name": "v1-5-pruned-emaonly-fp16.safetensors",
"url": "https://huggingface.co/Comfy-Org/stable-diffusion-v1-5-archive/resolve/main/v1-5-pruned-emaonly-fp16.safetensors?download=true",
"directory": "checkpoints"
}
]
},
"widgets_values": [
"📷-v1.x\\real-dream-15.safetensors"
]
},
{
"id": 36,
"type": "KSamplerAdvanced",
"pos": [
438.809326171875,
-654.4688720703125
],
"size": [
304.748046875,
334
],
"flags": {},
"order": 16,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 42
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 40
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 41
},
{
"name": "latent_image",
"type": "LATENT",
"link": 63
},
{
"name": "end_at_step",
"type": "INT",
"widget": {
"name": "end_at_step"
},
"link": 48
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
52
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "KSamplerAdvanced"
},
"widgets_values": [
"enable",
777,
"fixed",
20,
8,
"dpmpp_2m",
"karras",
0,
10,
"enable"
],
"color": "#223",
"bgcolor": "#335"
},
{
"id": 32,
"type": "KSamplerAdvanced",
"pos": [
438.809326171875,
-228.59242248535156
],
"size": [
304.748046875,
334
],
"flags": {},
"order": 14,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 39
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 37
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 38
},
{
"name": "latent_image",
"type": "LATENT",
"link": 64
},
{
"name": "end_at_step",
"type": "INT",
"widget": {
"name": "end_at_step"
},
"link": 46
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
53
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "KSamplerAdvanced"
},
"widgets_values": [
"enable",
777,
"fixed",
20,
8,
"dpmpp_2m",
"karras",
0,
10,
"enable"
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 37,
"type": "KSamplerAdvanced",
"pos": [
760.1702270507812,
294.8553466796875
],
"size": [
304.748046875,
334
],
"flags": {},
"order": 15,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 43
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 44
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 45
},
{
"name": "latent_image",
"type": "LATENT",
"link": 65
},
{
"name": "end_at_step",
"type": "INT",
"widget": {
"name": "end_at_step"
},
"link": 47
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
57
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "KSamplerAdvanced"
},
"widgets_values": [
"enable",
777,
"fixed",
20,
8,
"dpmpp_2m",
"karras",
0,
10,
"enable"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 39,
"type": "KSamplerAdvanced",
"pos": [
1953.4892578125,
38.063751220703125
],
"size": [
304.748046875,
334
],
"flags": {},
"order": 19,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 62
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 60
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 61
},
{
"name": "latent_image",
"type": "LATENT",
"link": 58
},
{
"name": "start_at_step",
"type": "INT",
"widget": {
"name": "start_at_step"
},
"link": 50
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
51
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "KSamplerAdvanced"
},
"widgets_values": [
"disable",
777,
"fixed",
20,
8,
"dpmpp_2m",
"karras",
10,
20,
"disable"
]
}
],
"links": [
[
3,
4,
1,
6,
0,
"CLIP"
],
[
5,
4,
1,
7,
0,
"CLIP"
],
[
10,
8,
0,
10,
0,
"IMAGE"
],
[
13,
12,
0,
8,
1,
"VAE"
],
[
15,
4,
1,
13,
0,
"CLIP"
],
[
21,
4,
1,
17,
0,
"CLIP"
],
[
37,
6,
0,
32,
1,
"CONDITIONING"
],
[
38,
7,
0,
32,
2,
"CONDITIONING"
],
[
39,
4,
0,
32,
0,
"MODEL"
],
[
40,
17,
0,
36,
1,
"CONDITIONING"
],
[
41,
7,
0,
36,
2,
"CONDITIONING"
],
[
42,
4,
0,
36,
0,
"MODEL"
],
[
43,
4,
0,
37,
0,
"MODEL"
],
[
44,
13,
0,
37,
1,
"CONDITIONING"
],
[
45,
7,
0,
37,
2,
"CONDITIONING"
],
[
46,
38,
0,
32,
4,
"INT"
],
[
47,
38,
0,
37,
4,
"INT"
],
[
48,
38,
0,
36,
4,
"INT"
],
[
50,
38,
0,
39,
4,
"INT"
],
[
51,
39,
0,
8,
0,
"LATENT"
],
[
52,
36,
0,
33,
0,
"LATENT"
],
[
53,
32,
0,
33,
1,
"LATENT"
],
[
54,
35,
0,
33,
2,
"MASK"
],
[
55,
33,
0,
40,
0,
"LATENT"
],
[
56,
34,
0,
40,
2,
"MASK"
],
[
57,
37,
0,
40,
1,
"LATENT"
],
[
58,
40,
0,
39,
3,
"LATENT"
],
[
59,
4,
1,
41,
0,
"CLIP"
],
[
60,
41,
0,
39,
1,
"CONDITIONING"
],
[
61,
7,
0,
39,
2,
"CONDITIONING"
],
[
62,
4,
0,
39,
0,
"MODEL"
],
[
63,
42,
0,
36,
3,
"LATENT"
],
[
64,
43,
0,
32,
3,
"LATENT"
],
[
65,
44,
0,
37,
3,
"LATENT"
],
[
66,
47,
0,
45,
1,
"VAE"
],
[
67,
45,
0,
46,
0,
"IMAGE"
],
[
69,
40,
0,
45,
0,
"LATENT"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.5290024565035728,
"offset": [
713.933349609375,
754.4688720703125
]
},
"frontendVersion": "1.24.1",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true
},
"version": 0.4
}
-
- First, generate multiple images (banana image, apple image, background image, etc.)
-
- Paste each latent onto a single latent using masks
-
- Run the remaining sampling steps on the synthesized latent
Each object can be generated with separate conditions, and finally blended as a "single image."
However, there are many situations where it is more reliable to Inpaint multiple times and finally image2image the whole thing than to use this.
Latent Couple / Attention Couple
Latent Couple
A method where the latent space is completely divided for each region, generated with separate settings (prompts, LoRA, etc.), and then combined.
- Ideal in that completely different settings can be used for each region
- Computational complexity is high because it is the same as generating as many images as there are regions
There is no direct implementation in ComfyUI at the moment.
Attention Couple
While Latent Couple calculated the entire UNet, this calculates only the Cross-Attention layer.
The computational complexity is much lower, but LoRA cannot be specified by region.

{
"id": "bd31dc25-50da-43d7-b1cb-e53f5d146f11",
"revision": 0,
"last_node_id": 19,
"last_link_id": 25,
"nodes": [
{
"id": 8,
"type": "VAEDecode",
"pos": [
1573.7244873046875,
560.4592895507812
],
"size": [
150.13829040527344,
46
],
"flags": {},
"order": 14,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 7
},
{
"name": "vae",
"type": "VAE",
"link": 25
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
10
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 6,
"type": "CLIPTextEncode",
"pos": [
466.21844482421875,
52.61830520629883
],
"size": [
306.9939880371094,
121.5467300415039
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 3
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
4,
13
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"A beautiful analog-style photograph,table top"
],
"color": "#223",
"bgcolor": "#335"
},
{
"id": 12,
"type": "SolidMask",
"pos": [
466.21844482421875,
234.42247009277344
],
"size": [
306.9939880371094,
106
],
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "width",
"type": "INT",
"widget": {
"name": "width"
},
"link": 16
},
{
"name": "height",
"type": "INT",
"widget": {
"name": "height"
},
"link": 18
}
],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
14
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "SolidMask"
},
"widgets_values": [
1,
768,
768
],
"color": "#223",
"bgcolor": "#335"
},
{
"id": 17,
"type": "LoadImageMask",
"pos": [
466.21844482421875,
582.4840698242188
],
"size": [
306.9939880371094,
330
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
21
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "LoadImageMask"
},
"widgets_values": [
"banana (2).png",
"red",
"image"
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 15,
"type": "CLIPTextEncode",
"pos": [
466.21844482421875,
400.6799011230469
],
"size": [
306.9939880371094,
121.5467300415039
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 19
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
20
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"RAW photo of a banana"
],
"color": "#322",
"bgcolor": "#533"
},
{
"id": 18,
"type": "CLIPTextEncode",
"pos": [
466.21844482421875,
972.7415161132812
],
"size": [
306.9939880371094,
121.5467300415039
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 22
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
23
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"RAW photo of an apple"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 14,
"type": "PrimitiveNode",
"pos": [
-6.389673233032227,
859.0269165039062
],
"size": [
210,
82
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "INT",
"type": "INT",
"widget": {
"name": "height"
},
"links": [
17,
18
]
}
],
"title": "height",
"properties": {
"Run widget replace on values": false
},
"widgets_values": [
768,
"fixed"
]
},
{
"id": 16,
"type": "LoadImageMask",
"pos": [
466.21844482421875,
1154.545654296875
],
"size": [
306.9939880371094,
330
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MASK",
"type": "MASK",
"links": [
24
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "LoadImageMask"
},
"widgets_values": [
"apple (2).png",
"red",
"image"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 7,
"type": "CLIPTextEncode",
"pos": [
466.21844482421875,
1544.8031005859375
],
"size": [
306.9939880371094,
89.91178131103516
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 5
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
6
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"ow qualitytext, watermark, "
]
},
{
"id": 10,
"type": "AttentionCouplePPM",
"pos": [
986.5342407226562,
560.4592895507812
],
"size": [
186.96835327148438,
186
],
"flags": {},
"order": 12,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 12
},
{
"name": "base_cond",
"type": "CONDITIONING",
"link": 13
},
{
"name": "base_mask",
"type": "MASK",
"link": 14
},
{
"name": "cond_1",
"type": "CONDITIONING",
"link": 20
},
{
"name": "mask_1",
"type": "MASK",
"link": 21
},
{
"name": "cond_2",
"type": "CONDITIONING",
"link": 23
},
{
"name": "mask_2",
"type": "MASK",
"link": 24
},
{
"name": "cond_3",
"type": "CONDITIONING",
"link": null
},
{
"name": "mask_3",
"type": "MASK",
"link": null
}
],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
11
]
}
],
"properties": {
"cnr_id": "comfyui-ppm",
"ver": "7a28475ba26ebb76687c6cc343437b6deff36232",
"Node name for S&R": "AttentionCouplePPM"
},
"widgets_values": [],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 11,
"type": "PreviewImage",
"pos": [
1766.4737548828125,
560.4592895507812
],
"size": [
529.1884155273438,
593.6753540039062
],
"flags": {},
"order": 15,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 10
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
},
{
"id": 19,
"type": "VAELoader",
"pos": [
1577.5760498046875,
560.4592895507812
],
"size": [
268.0599060058594,
58.97004699707031
],
"flags": {
"collapsed": true
},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
25
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"sdxl_vae.safetensors"
]
},
{
"id": 4,
"type": "CheckpointLoaderSimple",
"pos": [
-109.82740020751953,
560.4592895507812
],
"size": [
315,
98
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"slot_index": 0,
"links": [
12
]
},
{
"name": "CLIP",
"type": "CLIP",
"slot_index": 1,
"links": [
3,
5,
19,
22
]
},
{
"name": "VAE",
"type": "VAE",
"slot_index": 2,
"links": []
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "CheckpointLoaderSimple",
"models": [
{
"name": "v1-5-pruned-emaonly-fp16.safetensors",
"url": "https://huggingface.co/Comfy-Org/stable-diffusion-v1-5-archive/resolve/main/v1-5-pruned-emaonly-fp16.safetensors?download=true",
"directory": "checkpoints"
}
]
},
"widgets_values": [
"📷-XL\\RealVisXL_V4.0.safetensors"
]
},
{
"id": 3,
"type": "KSampler",
"pos": [
1216.113525390625,
560.4592895507812
],
"size": [
315,
262
],
"flags": {},
"order": 13,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 11
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 4
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 6
},
{
"name": "latent_image",
"type": "LATENT",
"link": 2
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
7
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "KSampler"
},
"widgets_values": [
13022,
"fixed",
20,
8,
"dpmpp_2m",
"karras",
1
]
},
{
"id": 5,
"type": "EmptyLatentImage",
"pos": [
946.1537475585938,
846.269287109375
],
"size": [
227.35647583007812,
106
],
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "width",
"type": "INT",
"widget": {
"name": "width"
},
"link": 15
},
{
"name": "height",
"type": "INT",
"widget": {
"name": "height"
},
"link": 17
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
2
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.44",
"Node name for S&R": "EmptyLatentImage"
},
"widgets_values": [
768,
768,
1
]
},
{
"id": 13,
"type": "PrimitiveNode",
"pos": [
-6.389673233032227,
729.040283203125
],
"size": [
210,
82
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "INT",
"type": "INT",
"widget": {
"name": "width"
},
"links": [
15,
16
]
}
],
"title": "width",
"properties": {
"Run widget replace on values": false
},
"widgets_values": [
768,
"fixed"
]
}
],
"links": [
[
2,
5,
0,
3,
3,
"LATENT"
],
[
3,
4,
1,
6,
0,
"CLIP"
],
[
4,
6,
0,
3,
1,
"CONDITIONING"
],
[
5,
4,
1,
7,
0,
"CLIP"
],
[
6,
7,
0,
3,
2,
"CONDITIONING"
],
[
7,
3,
0,
8,
0,
"LATENT"
],
[
10,
8,
0,
11,
0,
"IMAGE"
],
[
11,
10,
0,
3,
0,
"MODEL"
],
[
12,
4,
0,
10,
0,
"MODEL"
],
[
13,
6,
0,
10,
1,
"CONDITIONING"
],
[
14,
12,
0,
10,
2,
"MASK"
],
[
15,
13,
0,
5,
0,
"INT"
],
[
16,
13,
0,
12,
0,
"INT"
],
[
17,
14,
0,
5,
1,
"INT"
],
[
18,
14,
0,
12,
1,
"INT"
],
[
19,
4,
1,
15,
0,
"CLIP"
],
[
20,
15,
0,
10,
3,
"CONDITIONING"
],
[
21,
17,
0,
10,
4,
"MASK"
],
[
22,
4,
1,
18,
0,
"CLIP"
],
[
23,
18,
0,
10,
5,
"CONDITIONING"
],
[
24,
16,
0,
10,
6,
"MASK"
],
[
25,
19,
0,
8,
1,
"VAE"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 1.0308761329305212,
"offset": [
-632.0231183731564,
-69.89356683867605
]
},
"frontendVersion": "1.24.1",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true
},
"version": 0.4
}
Refining Rough Collages (Recommended)
A method of creating a rough collage image and having it remade into a natural picture based on it.

You can specify the position very intuitively, and since you just need to paste appropriate objects for what you want to generate, it is actually a highly recommended method.
Details → Refining Rough Collages