What is Hires.fix?

It has a cool name, but what it does is not that complex.
First, generate an image with text2image, then resize that image by 1.5 to 2 times. Put that enlarged image into image2image and have it redrawn again.
It is simply a consolidation of this procedure.
Why was this method born?
The recommended resolution for Stable Diffusion 1.5 was 512 × 512px, and it could not generate large images.
There are two main reasons for this.
Problem of Calculation Cost
As the resolution increases, the required VRAM and calculation time increase dramatically. When image generation first appeared, it was not as optimized as it is now, and generating large images suddenly was a very heavy process.
Problem of Image Size Used for Training
More fundamentally, it is about "what size images the model was trained with".

Stable Diffusion 1.5 is trained almost exclusively with 512 × 512px images. In other words, it is good at drawing pictures around this size, but it hasn't practiced other resolutions at all.
Suppose you ask a manga artist to suddenly draw a picture filling a gymnasium wall. Since they usually draw on manuscript paper size, they would probably blindly line up small panels and characters with that sense.
They haven't practiced drawing "one huge picture using the whole wall", and the idea doesn't even occur to them.
Birth of Hires.fix
So, first have the model draw at around 512 × 512px, which it is good at, then enlarge it, and have it redraw again using the enlarged image as a draft.
This two-stage approach was born. This idea of "going through the model's comfortable resolution once and then lifting it to high resolution" is the concept behind Hires.fix.
Basic Method

{
"id": "8b9f7796-0873-4025-be3c-0f997f67f866",
"revision": 0,
"last_node_id": 17,
"last_link_id": 37,
"nodes": [
{
"id": 5,
"type": "EmptyLatentImage",
"pos": [
582.1350317382813,
606.5799999999999
],
"size": [
244.81999999999994,
106
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
2
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "EmptyLatentImage"
},
"widgets_values": [
512,
512,
1
]
},
{
"id": 15,
"type": "VAEDecode",
"pos": [
2192.0144598529414,
190.6545154746329
],
"size": [
192,
46
],
"flags": {},
"order": 11,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 29
},
{
"name": "vae",
"type": "VAE",
"link": 34
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
30
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "VAEDecode"
}
},
{
"id": 4,
"type": "CheckpointLoaderSimple",
"pos": [
35.04463803391465,
305.99511645379476
],
"size": [
315,
98
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"slot_index": 0,
"links": [
16,
31
]
},
{
"name": "CLIP",
"type": "CLIP",
"slot_index": 1,
"links": [
17,
18
]
},
{
"name": "VAE",
"type": "VAE",
"slot_index": 2,
"links": []
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CheckpointLoaderSimple"
},
"widgets_values": [
"v1-5-pruned-emaonly-fp16.safetensors"
]
},
{
"id": 16,
"type": "SaveImage",
"pos": [
2413.5562680422718,
190.54464832962913
],
"size": [
440.8026035004723,
492.16667321788407
],
"flags": {},
"order": 12,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 30
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33"
},
"widgets_values": [
"ComfyUI"
]
},
{
"id": 7,
"type": "CLIPTextEncode",
"pos": [
416.1970166015625,
392.37848510742185
],
"size": [
410.75801513671877,
158.82607910156253
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 18
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
6,
22
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"worst quality, text, watermark"
]
},
{
"id": 10,
"type": "VAELoader",
"pos": [
896.9256198347109,
68.77178286934158
],
"size": [
281.0743801652891,
58
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
10,
33,
34
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"vae-ft-mse-840000-ema-pruned.safetensors"
]
},
{
"id": 17,
"type": "PreviewImage",
"pos": [
1423.6732583128128,
328.6264740212463
],
"size": [
245.18407212622105,
286.5709992486851
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 37
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "PreviewImage"
},
"widgets_values": []
},
{
"id": 3,
"type": "KSampler",
"pos": [
863,
186
],
"size": [
315,
262
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 16
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 4
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 6
},
{
"name": "latent_image",
"type": "LATENT",
"link": 2
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
7
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "KSampler"
},
"widgets_values": [
10000,
"fixed",
20,
8,
"euler",
"normal",
1
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 12,
"type": "ImageScaleBy",
"pos": [
1424.2369484504152,
186
],
"size": [
210,
82
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 19
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
24
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "ImageScaleBy"
},
"widgets_values": [
"nearest-exact",
1.5
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 6,
"type": "CLIPTextEncode",
"pos": [
415,
186
],
"size": [
411.95503173828126,
151.0030493164063
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 17
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
4,
32
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"high quality,high detailed, RAW photo of a white fluffy puppy,rimlight,on the desk,blurry background,house plant"
]
},
{
"id": 14,
"type": "VAEEncode",
"pos": [
1661.3554226756228,
186
],
"size": [
164.5454545454545,
46
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "pixels",
"type": "IMAGE",
"link": 24
},
{
"name": "vae",
"type": "VAE",
"link": 33
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
26
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "VAEEncode"
}
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
1205.1184742252076,
186
],
"size": [
179.27272727272702,
46
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 7
},
{
"name": "vae",
"type": "VAE",
"link": 10
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
19,
37
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 13,
"type": "KSampler",
"pos": [
1846.4738969008304,
187
],
"size": [
315,
262
],
"flags": {},
"order": 10,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 31
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 32
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 22
},
{
"name": "latent_image",
"type": "LATENT",
"link": 26
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
29
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "KSampler"
},
"widgets_values": [
10000,
"fixed",
20,
8,
"euler",
"normal",
0.6
],
"color": "#432",
"bgcolor": "#653"
}
],
"links": [
[
2,
5,
0,
3,
3,
"LATENT"
],
[
4,
6,
0,
3,
1,
"CONDITIONING"
],
[
6,
7,
0,
3,
2,
"CONDITIONING"
],
[
7,
3,
0,
8,
0,
"LATENT"
],
[
10,
10,
0,
8,
1,
"VAE"
],
[
16,
4,
0,
3,
0,
"MODEL"
],
[
17,
4,
1,
6,
0,
"CLIP"
],
[
18,
4,
1,
7,
0,
"CLIP"
],
[
19,
8,
0,
12,
0,
"IMAGE"
],
[
22,
7,
0,
13,
2,
"CONDITIONING"
],
[
24,
12,
0,
14,
0,
"IMAGE"
],
[
26,
14,
0,
13,
3,
"LATENT"
],
[
29,
13,
0,
15,
0,
"LATENT"
],
[
30,
15,
0,
16,
0,
"IMAGE"
],
[
31,
4,
0,
13,
0,
"MODEL"
],
[
32,
6,
0,
13,
1,
"CONDITIONING"
],
[
33,
10,
0,
14,
1,
"VAE"
],
[
34,
10,
0,
15,
1,
"VAE"
],
[
37,
8,
0,
17,
0,
"IMAGE"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.6830134553650705,
"offset": [
64.95536196608535,
32.692317130658424
]
},
"frontendVersion": "1.34.6",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true,
"linkExtensions": [
{
"id": 31,
"parentId": 2
},
{
"id": 33,
"parentId": 5
},
{
"id": 34,
"parentId": 6
}
],
"reroutes": [
{
"id": 1,
"pos": [
441.0480618422728,
19.664788129629585
],
"linkIds": [
31
]
},
{
"id": 2,
"parentId": 1,
"pos": [
1771.4727618422721,
24.549888129629558
],
"linkIds": [
31
]
},
{
"id": 5,
"pos": [
1624.7392361419663,
86.84280922035722
],
"linkIds": [
33,
34
]
},
{
"id": 6,
"parentId": 5,
"pos": [
2154.447837765052,
96.76734244275261
],
"linkIds": [
34
]
}
]
},
"version": 0.4
}
- 🟪 text2image
- 🟦 Enlarge the decoded image by 1.5 times with the
Upscale Image Bynode - 🟨 Input the enlarged image into image2image
Method of Enlarging as Latent
In the previous workflow, the flow was to decode the text2image image into a pixel image once, enlarge it, convert it back to latent, and then image2image.
Here, the idea comes up: "Can't we enlarge it as latent without bothering to return it to a pixel image?"
However, simply enlarging latent causes unacceptable degradation. Therefore, it was not practical for a long time, but a custom node that performs "latent enlargement with suppressed degradation" has appeared.
- Goktug/ComfyUI_NNLatentUpscale (forked from Ttl)
- Upscales latent using a neural network.

{
"id": "8b9f7796-0873-4025-be3c-0f997f67f866",
"revision": 0,
"last_node_id": 18,
"last_link_id": 40,
"nodes": [
{
"id": 5,
"type": "EmptyLatentImage",
"pos": [
582.1350317382813,
606.5799999999999
],
"size": [
244.81999999999994,
106
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
2
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "EmptyLatentImage"
},
"widgets_values": [
512,
512,
1
]
},
{
"id": 15,
"type": "VAEDecode",
"pos": [
1797.4334510317049,
183.00700000000006
],
"size": [
192,
46
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 29
},
{
"name": "vae",
"type": "VAE",
"link": 34
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"slot_index": 0,
"links": [
30
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "VAEDecode"
},
"widgets_values": []
},
{
"id": 4,
"type": "CheckpointLoaderSimple",
"pos": [
35.04463803391465,
305.99511645379476
],
"size": [
315,
98
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"slot_index": 0,
"links": [
16,
31
]
},
{
"name": "CLIP",
"type": "CLIP",
"slot_index": 1,
"links": [
17,
18
]
},
{
"name": "VAE",
"type": "VAE",
"slot_index": 2,
"links": []
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CheckpointLoaderSimple"
},
"widgets_values": [
"v1-5-pruned-emaonly-fp16.safetensors"
]
},
{
"id": 16,
"type": "SaveImage",
"pos": [
2020.9112680422732,
183.00700000000006
],
"size": [
440.8026035004723,
492.16667321788407
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 30
}
],
"outputs": [],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33"
},
"widgets_values": [
"ComfyUI"
]
},
{
"id": 7,
"type": "CLIPTextEncode",
"pos": [
416.1970166015625,
392.37848510742185
],
"size": [
410.75801513671877,
158.82607910156253
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 18
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
6,
22
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"worst quality, text, watermark"
]
},
{
"id": 6,
"type": "CLIPTextEncode",
"pos": [
415,
183.00700000000006
],
"size": [
411.95503173828126,
151.0030493164063
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 17
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"slot_index": 0,
"links": [
4,
32
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"high quality,high detailed, RAW photo of a white fluffy puppy,rimlight,on the desk,blurry background,house plant"
]
},
{
"id": 3,
"type": "KSampler",
"pos": [
863,
183.00700000000006
],
"size": [
315,
262
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 16
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 4
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 6
},
{
"name": "latent_image",
"type": "LATENT",
"link": 2
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
38
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "KSampler"
},
"widgets_values": [
10000,
"fixed",
20,
8,
"euler",
"normal",
1
],
"color": "#323",
"bgcolor": "#535"
},
{
"id": 13,
"type": "KSampler",
"pos": [
1450.9556340211366,
183.00700000000006
],
"size": [
315,
262
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 31
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 32
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 22
},
{
"name": "latent_image",
"type": "LATENT",
"link": 39
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"slot_index": 0,
"links": [
29
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.33",
"Node name for S&R": "KSampler"
},
"widgets_values": [
10000,
"fixed",
20,
8,
"euler",
"normal",
0.6
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 18,
"type": "NNLatentUpscale",
"pos": [
1209.4778170105683,
183.00700000000006
],
"size": [
210,
82
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "latent",
"type": "LATENT",
"link": 38
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
39
]
}
],
"properties": {
"cnr_id": "comfyui_nnlatentupscale",
"ver": "7657841c7113345ef407c498985c141ffff38eba",
"Node name for S&R": "NNLatentUpscale"
},
"widgets_values": [
"SD 1.x",
1.5
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 10,
"type": "VAELoader",
"pos": [
1484.8812538558475,
66.90901890016424
],
"size": [
281.0743801652891,
58
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
34
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.3.76",
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"vae-ft-mse-840000-ema-pruned.safetensors"
]
}
],
"links": [
[
2,
5,
0,
3,
3,
"LATENT"
],
[
4,
6,
0,
3,
1,
"CONDITIONING"
],
[
6,
7,
0,
3,
2,
"CONDITIONING"
],
[
16,
4,
0,
3,
0,
"MODEL"
],
[
17,
4,
1,
6,
0,
"CLIP"
],
[
18,
4,
1,
7,
0,
"CLIP"
],
[
22,
7,
0,
13,
2,
"CONDITIONING"
],
[
29,
13,
0,
15,
0,
"LATENT"
],
[
30,
15,
0,
16,
0,
"IMAGE"
],
[
31,
4,
0,
13,
0,
"MODEL"
],
[
32,
6,
0,
13,
1,
"CONDITIONING"
],
[
34,
10,
0,
15,
1,
"VAE"
],
[
38,
3,
0,
18,
0,
"LATENT"
],
[
39,
18,
0,
13,
3,
"LATENT"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.8264462809917354,
"offset": [
64.95536196608535,
33.09098109983576
]
},
"frontendVersion": "1.34.6",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true,
"reroutes": [
{
"id": 1,
"pos": [
410.0480618422728,
106.66478812962959
],
"linkIds": [
31
]
},
{
"id": 2,
"parentId": 1,
"pos": [
1411.8277618422733,
107.55688812962956
],
"linkIds": [
31
]
}
],
"linkExtensions": [
{
"id": 31,
"parentId": 2
}
]
},
"version": 0.4
}
- 🟩 Enlarge the latent coming out of text2image directly with the
NNLatentUpscalenode - 🟨 Flow the enlarged latent directly into image2image