領域指定生成

領域指定生成とは？

デザインにおいて、レイアウトは重要な要素です。

単に「街の中で壁に寄りかかっている女性」というプロンプトで画像を生成するだけでは、魅力的な作品は作れません。

画面の右端にレンガの壁があり、そこに寄りかかっている女性、そのすぐ手前に街灯を配置し、画面左はオブジェクトを減らして余白を作り出す……。

好きな場所に好きなオブジェクトを置いた画像を生成するための技術が「領域指定」です。

プロンプトで位置を指示する

最もシンプルな方法は、プロンプトにそのまま位置関係を書く方法です。

Flux.1_dev.json

{
  "id": "18404b37-92b0-4d11-a39c-ae941838eb83",
  "revision": 0,
  "last_node_id": 45,
  "last_link_id": 64,
  "nodes": [
    {
      "id": 35,
      "type": "FluxGuidance",
      "pos": [
        836,
        190
      ],
      "size": [
        211.60000610351562,
        58
      ],
      "flags": {},
      "order": 6,
      "mode": 0,
      "inputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "link": 56
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            57
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "FluxGuidance"
      },
      "widgets_values": [
        3.5
      ],
      "color": "#2a363b",
      "bgcolor": "#3f5159"
    },
    {
      "id": 38,
      "type": "PreviewImage",
      "pos": [
        1568,
        190
      ],
      "size": [
        430.8328552246094,
        446.9476623535156
      ],
      "flags": {},
      "order": 9,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 58
        }
      ],
      "outputs": [],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "PreviewImage"
      },
      "widgets_values": []
    },
    {
      "id": 8,
      "type": "VAEDecode",
      "pos": [
        1408,
        190
      ],
      "size": [
        140,
        46
      ],
      "flags": {},
      "order": 8,
      "mode": 0,
      "inputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "link": 52
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": 62
        }
      ],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "slot_index": 0,
          "links": [
            58
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "VAEDecode"
      },
      "widgets_values": []
    },
    {
      "id": 43,
      "type": "VAELoader",
      "pos": [
        1112.7188720703125,
        507.60614013671875
      ],
      "size": [
        270,
        58
      ],
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "VAE",
          "type": "VAE",
          "links": [
            62
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "VAELoader"
      },
      "widgets_values": [
        "ae.safetensors"
      ],
      "color": "#322",
      "bgcolor": "#533"
    },
    {
      "id": 33,
      "type": "CLIPTextEncode",
      "pos": [
        518,
        378
      ],
      "size": [
        414.71820068359375,
        108.47611236572266
      ],
      "flags": {
        "collapsed": true
      },
      "order": 5,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 60
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            55
          ]
        }
      ],
      "title": "CLIP Text Encode (Negative Prompt)",
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        ""
      ]
    },
    {
      "id": 27,
      "type": "EmptySD3LatentImage",
      "pos": [
        722,
        471
      ],
      "size": [
        315,
        106
      ],
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "slot_index": 0,
          "links": [
            51
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "EmptySD3LatentImage"
      },
      "widgets_values": [
        1024,
        1024,
        1
      ]
    },
    {
      "id": 42,
      "type": "DualCLIPLoader",
      "pos": [
        185.0587921142578,
        235.1116485595703
      ],
      "size": [
        270,
        130
      ],
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "CLIP",
          "type": "CLIP",
          "links": [
            59,
            60
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "DualCLIPLoader"
      },
      "widgets_values": [
        "clip_l.safetensors",
        "t5xxl_fp8_e4m3fn.safetensors",
        "flux",
        "default"
      ],
      "color": "#432",
      "bgcolor": "#653"
    },
    {
      "id": 45,
      "type": "UnetLoaderGGUF",
      "pos": [
        779.269287109375,
        59.45874786376953
      ],
      "size": [
        270,
        58
      ],
      "flags": {},
      "order": 3,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MODEL",
          "type": "MODEL",
          "links": [
            64
          ]
        }
      ],
      "properties": {
        "cnr_id": "ComfyUI-GGUF",
        "ver": "b3ec875a68d94b758914fd48d30571d953bb7a54",
        "Node name for S&R": "UnetLoaderGGUF"
      },
      "widgets_values": [
        "FLUX_gguf\\flux1-dev-Q5_0.gguf"
      ]
    },
    {
      "id": 6,
      "type": "CLIPTextEncode",
      "pos": [
        507,
        190
      ],
      "size": [
        301.84503173828125,
        128.01304626464844
      ],
      "flags": {},
      "order": 4,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 59
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            56
          ]
        }
      ],
      "title": "CLIP Text Encode (Positive Prompt)",
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "A photo showing a banana on the left and an apple on the right placed on a desk\n"
      ]
    },
    {
      "id": 31,
      "type": "KSampler",
      "pos": [
        1070,
        190
      ],
      "size": [
        315,
        262
      ],
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 64
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 57
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 55
        },
        {
          "name": "latent_image",
          "type": "LATENT",
          "link": 51
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "slot_index": 0,
          "links": [
            52
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "KSampler"
      },
      "widgets_values": [
        12345,
        "fixed",
        20,
        1,
        "euler",
        "normal",
        1
      ]
    }
  ],
  "links": [
    [
      51,
      27,
      0,
      31,
      3,
      "LATENT"
    ],
    [
      52,
      31,
      0,
      8,
      0,
      "LATENT"
    ],
    [
      55,
      33,
      0,
      31,
      2,
      "CONDITIONING"
    ],
    [
      56,
      6,
      0,
      35,
      0,
      "CONDITIONING"
    ],
    [
      57,
      35,
      0,
      31,
      1,
      "CONDITIONING"
    ],
    [
      58,
      8,
      0,
      38,
      0,
      "IMAGE"
    ],
    [
      59,
      42,
      0,
      6,
      0,
      "CLIP"
    ],
    [
      60,
      42,
      0,
      33,
      0,
      "CLIP"
    ],
    [
      62,
      43,
      0,
      8,
      1,
      "VAE"
    ],
    [
      64,
      45,
      0,
      31,
      0,
      "MODEL"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
      "scale": 0.9090909090909091,
      "offset": [
        -85.05879211425781,
        40.54125213623047
      ]
    },
    "frontendVersion": "1.23.4",
    "VHS_latentpreview": false,
    "VHS_latentpreviewrate": 0,
    "VHS_MetadataImage": true,
    "VHS_KeepIntermediate": true
  },
  "version": 0.4
}

「左にバナナ、右にリンゴ」

Stable Diffusionのテキストエンコーダは位置関係をほとんど理解できなかったのですが、Flux以降のモデルでは、ある程度は位置関係を反映してくれるようになってきています。

それでも、複雑な構図になると破綻しやすく、厳密な領域指定というよりは、ゆるいレイアウトの希望を伝える手段です。

Inpaintingを繰り返す

一度画像を生成してから、Inpaintingを何度も繰り返す方法です。

Flux.1_fill.json

{
  "id": "18404b37-92b0-4d11-a39c-ae941838eb83",
  "revision": 0,
  "last_node_id": 47,
  "last_link_id": 72,
  "nodes": [
    {
      "id": 8,
      "type": "VAEDecode",
      "pos": [
        1774.15185546875,
        167.77081298828125
      ],
      "size": [
        140,
        46
      ],
      "flags": {},
      "order": 9,
      "mode": 0,
      "inputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "link": 52
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": 62
        }
      ],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "slot_index": 0,
          "links": [
            58
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "VAEDecode"
      },
      "widgets_values": []
    },
    {
      "id": 42,
      "type": "DualCLIPLoader",
      "pos": [
        185.0587921142578,
        235.1116485595703
      ],
      "size": [
        270,
        130
      ],
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "CLIP",
          "type": "CLIP",
          "links": [
            59,
            60
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "DualCLIPLoader"
      },
      "widgets_values": [
        "clip_l.safetensors",
        "t5xxl_fp8_e4m3fn.safetensors",
        "flux",
        "default"
      ],
      "color": "#432",
      "bgcolor": "#653"
    },
    {
      "id": 35,
      "type": "FluxGuidance",
      "pos": [
        836,
        190
      ],
      "size": [
        211.60000610351562,
        58
      ],
      "flags": {},
      "order": 6,
      "mode": 0,
      "inputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "link": 56
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            65
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "FluxGuidance"
      },
      "widgets_values": [
        3.5
      ],
      "color": "#2a363b",
      "bgcolor": "#3f5159"
    },
    {
      "id": 33,
      "type": "CLIPTextEncode",
      "pos": [
        518,
        378
      ],
      "size": [
        414.71820068359375,
        108.47611236572266
      ],
      "flags": {
        "collapsed": true
      },
      "order": 5,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 60
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            66
          ]
        }
      ],
      "title": "CLIP Text Encode (Negative Prompt)",
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        ""
      ]
    },
    {
      "id": 47,
      "type": "InpaintModelConditioning",
      "pos": [
        1136.67138671875,
        187.0927276611328
      ],
      "size": [
        270,
        138
      ],
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 65
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 66
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": 72
        },
        {
          "name": "pixels",
          "type": "IMAGE",
          "link": 67
        },
        {
          "name": "mask",
          "type": "MASK",
          "link": 68
        }
      ],
      "outputs": [
        {
          "name": "positive",
          "type": "CONDITIONING",
          "links": [
            69
          ]
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "links": [
            70
          ]
        },
        {
          "name": "latent",
          "type": "LATENT",
          "links": [
            71
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.43",
        "Node name for S&R": "InpaintModelConditioning"
      },
      "widgets_values": [
        true
      ]
    },
    {
      "id": 43,
      "type": "VAELoader",
      "pos": [
        767.5374755859375,
        432.8213806152344
      ],
      "size": [
        270,
        58
      ],
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "VAE",
          "type": "VAE",
          "links": [
            62,
            72
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "VAELoader"
      },
      "widgets_values": [
        "ae.safetensors"
      ],
      "color": "#322",
      "bgcolor": "#533"
    },
    {
      "id": 38,
      "type": "PreviewImage",
      "pos": [
        1934.15185546875,
        167.77081298828125
      ],
      "size": [
        444.1662292480469,
        578.058837890625
      ],
      "flags": {},
      "order": 10,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 58
        }
      ],
      "outputs": [],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "PreviewImage"
      },
      "widgets_values": []
    },
    {
      "id": 45,
      "type": "UnetLoaderGGUF",
      "pos": [
        1133.6611328125,
        68.74612426757812
      ],
      "size": [
        270,
        58
      ],
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MODEL",
          "type": "MODEL",
          "links": [
            64
          ]
        }
      ],
      "properties": {
        "cnr_id": "ComfyUI-GGUF",
        "ver": "b3ec875a68d94b758914fd48d30571d953bb7a54",
        "Node name for S&R": "UnetLoaderGGUF"
      },
      "widgets_values": [
        "FLUX_gguf\\flux1-fill-dev-Q4_K_S.gguf"
      ],
      "color": "#323",
      "bgcolor": "#535"
    },
    {
      "id": 46,
      "type": "LoadImage",
      "pos": [
        753.7998657226562,
        559.425048828125
      ],
      "size": [
        296.30230712890625,
        481.77783203125
      ],
      "flags": {},
      "order": 3,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "links": [
            67
          ]
        },
        {
          "name": "MASK",
          "type": "MASK",
          "links": [
            68
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.43",
        "Node name for S&R": "LoadImage"
      },
      "widgets_values": [
        "clipspace/clipspace-mask-2027464.8999999762.png [input]",
        "image"
      ]
    },
    {
      "id": 6,
      "type": "CLIPTextEncode",
      "pos": [
        507,
        190
      ],
      "size": [
        301.84503173828125,
        128.01304626464844
      ],
      "flags": {},
      "order": 4,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 59
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            56
          ]
        }
      ],
      "title": "CLIP Text Encode (Positive Prompt)",
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "the red cone on top of the cube"
      ]
    },
    {
      "id": 31,
      "type": "KSampler",
      "pos": [
        1436.15185546875,
        167.77081298828125
      ],
      "size": [
        315,
        262
      ],
      "flags": {},
      "order": 8,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 64
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 69
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 70
        },
        {
          "name": "latent_image",
          "type": "LATENT",
          "link": 71
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "slot_index": 0,
          "links": [
            52
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "KSampler"
      },
      "widgets_values": [
        123,
        "fixed",
        20,
        1,
        "euler",
        "normal",
        1
      ]
    }
  ],
  "links": [
    [
      52,
      31,
      0,
      8,
      0,
      "LATENT"
    ],
    [
      56,
      6,
      0,
      35,
      0,
      "CONDITIONING"
    ],
    [
      58,
      8,
      0,
      38,
      0,
      "IMAGE"
    ],
    [
      59,
      42,
      0,
      6,
      0,
      "CLIP"
    ],
    [
      60,
      42,
      0,
      33,
      0,
      "CLIP"
    ],
    [
      62,
      43,
      0,
      8,
      1,
      "VAE"
    ],
    [
      64,
      45,
      0,
      31,
      0,
      "MODEL"
    ],
    [
      65,
      35,
      0,
      47,
      0,
      "CONDITIONING"
    ],
    [
      66,
      33,
      0,
      47,
      1,
      "CONDITIONING"
    ],
    [
      67,
      46,
      0,
      47,
      3,
      "IMAGE"
    ],
    [
      68,
      46,
      1,
      47,
      4,
      "MASK"
    ],
    [
      69,
      47,
      0,
      31,
      1,
      "CONDITIONING"
    ],
    [
      70,
      47,
      1,
      31,
      2,
      "CONDITIONING"
    ],
    [
      71,
      47,
      2,
      31,
      3,
      "LATENT"
    ],
    [
      72,
      43,
      0,
      47,
      2,
      "VAE"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
      "scale": 0.620921323059155,
      "offset": [
        -85.05879211425781,
        31.253875732421875
      ]
    },
    "frontendVersion": "1.23.4",
    "VHS_latentpreview": false,
    "VHS_latentpreviewrate": 0,
    "VHS_MetadataImage": true,
    "VHS_KeepIntermediate": true
  },
  "version": 0.4
}

1. ベースとなる画像を生成する
1. 編集したい領域にマスクをかけてInpaintingする
1. 必要に応じて、別の領域にもマスクを変えてInpaintingする

あまりスマートではないと思うかもしれませんが、確実で安定した手法です。プロンプトが混ざらず、LoRAもマスク外には基本的に影響しません。各領域を完全に独立したステップとして扱えます。

弱点は、別々に生成するため対象同士の絡み合いができないこと。人物同士が握手している画像などは、目線が合わなかったり違和感が出やすいです。

Conditioning Set Area（Regional Prompting系）

画像の各位置に異なるテキスト条件を適用しようとする手法です。Cross-Attention層を利用して、領域ごとに別のプロンプトを使います。

Conditioning_(Set_Mask).json

{
  "id": "e524c983-e762-4a7d-a5cb-d0f3a36bde28",
  "revision": 0,
  "last_node_id": 21,
  "last_link_id": 27,
  "nodes": [
    {
      "id": 15,
      "type": "ConditioningCombine",
      "pos": [
        795.6497192382812,
        -233.876220703125
      ],
      "size": [
        211.060546875,
        46
      ],
      "flags": {},
      "order": 11,
      "mode": 0,
      "inputs": [
        {
          "name": "conditioning_1",
          "type": "CONDITIONING",
          "link": 18
        },
        {
          "name": "conditioning_2",
          "type": "CONDITIONING",
          "link": 19
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "links": [
            22
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "ConditioningCombine"
      },
      "widgets_values": [],
      "color": "#432",
      "bgcolor": "#653"
    },
    {
      "id": 19,
      "type": "ConditioningCombine",
      "pos": [
        1058.4512939453125,
        60.54191207885742
      ],
      "size": [
        211.060546875,
        46
      ],
      "flags": {},
      "order": 12,
      "mode": 0,
      "inputs": [
        {
          "name": "conditioning_1",
          "type": "CONDITIONING",
          "link": 22
        },
        {
          "name": "conditioning_2",
          "type": "CONDITIONING",
          "link": 27
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "links": [
            24
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "ConditioningCombine"
      },
      "widgets_values": [],
      "color": "#432",
      "bgcolor": "#653"
    },
    {
      "id": 13,
      "type": "CLIPTextEncode",
      "pos": [
        62.78541564941406,
        45.40777587890625
      ],
      "size": [
        341.8740234375,
        152.28765869140625
      ],
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 15
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            14
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "RAW photo of an apple"
      ],
      "color": "#232",
      "bgcolor": "#353"
    },
    {
      "id": 6,
      "type": "CLIPTextEncode",
      "pos": [
        62.78541564941406,
        -569.9358520507812
      ],
      "size": [
        341.8740234375,
        152.28765869140625
      ],
      "flags": {},
      "order": 5,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 3
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            11
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "RAW photo of a banana"
      ],
      "color": "#322",
      "bgcolor": "#533"
    },
    {
      "id": 21,
      "type": "LoadImageMask",
      "pos": [
        62.78541564941406,
        262.41204833984375
      ],
      "size": [
        341.8740234375,
        333.62274169921875
      ],
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MASK",
          "type": "MASK",
          "links": [
            26
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "LoadImageMask"
      },
      "widgets_values": [
        "apple (2).png",
        "red",
        "image"
      ],
      "color": "#232",
      "bgcolor": "#353"
    },
    {
      "id": 20,
      "type": "LoadImageMask",
      "pos": [
        62.78541564941406,
        -352.93157958984375
      ],
      "size": [
        341.8740234375,
        333.62274169921875
      ],
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MASK",
          "type": "MASK",
          "links": [
            25
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "LoadImageMask"
      },
      "widgets_values": [
        "banana (2).png",
        "red",
        "image"
      ],
      "color": "#322",
      "bgcolor": "#533"
    },
    {
      "id": 17,
      "type": "CLIPTextEncode",
      "pos": [
        62.78541564941406,
        660.7514038085938
      ],
      "size": [
        341.8740234375,
        152.28765869140625
      ],
      "flags": {
        "collapsed": false
      },
      "order": 8,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 21
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            27
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "A beautiful analog-style photograph,wood table"
      ],
      "color": "#223",
      "bgcolor": "#335"
    },
    {
      "id": 7,
      "type": "CLIPTextEncode",
      "pos": [
        62.78541564941406,
        877.7556762695312
      ],
      "size": [
        341.8740234375,
        104.42913055419922
      ],
      "flags": {},
      "order": 6,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 5
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            6
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "text, watermark, low quality"
      ]
    },
    {
      "id": 8,
      "type": "VAEDecode",
      "pos": [
        1721.425537109375,
        180.1886444091797
      ],
      "size": [
        177.93228149414062,
        46
      ],
      "flags": {},
      "order": 14,
      "mode": 0,
      "inputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "link": 7
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": 13
        }
      ],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "slot_index": 0,
          "links": [
            10
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "VAEDecode"
      },
      "widgets_values": []
    },
    {
      "id": 5,
      "type": "EmptyLatentImage",
      "pos": [
        1020.22705078125,
        334.2445068359375
      ],
      "size": [
        315,
        106
      ],
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "slot_index": 0,
          "links": [
            2
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "EmptyLatentImage"
      },
      "widgets_values": [
        768,
        768,
        1
      ]
    },
    {
      "id": 14,
      "type": "ConditioningSetMask",
      "pos": [
        458.8059997558594,
        45.40777587890625
      ],
      "size": [
        270,
        102
      ],
      "flags": {},
      "order": 10,
      "mode": 0,
      "inputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "link": 14
        },
        {
          "name": "mask",
          "type": "MASK",
          "link": 26
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "links": [
            19
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "ConditioningSetMask"
      },
      "widgets_values": [
        1.5,
        "default"
      ],
      "color": "#232",
      "bgcolor": "#353"
    },
    {
      "id": 11,
      "type": "ConditioningSetMask",
      "pos": [
        465.2082824707031,
        -569.9358520507812
      ],
      "size": [
        270,
        102
      ],
      "flags": {},
      "order": 9,
      "mode": 0,
      "inputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "link": 11
        },
        {
          "name": "mask",
          "type": "MASK",
          "link": 25
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "links": [
            18
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "ConditioningSetMask"
      },
      "widgets_values": [
        1.5,
        "default"
      ],
      "color": "#322",
      "bgcolor": "#533"
    },
    {
      "id": 4,
      "type": "CheckpointLoaderSimple",
      "pos": [
        -419.08935546875,
        178.1886444091797
      ],
      "size": [
        315,
        98
      ],
      "flags": {},
      "order": 3,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MODEL",
          "type": "MODEL",
          "slot_index": 0,
          "links": [
            1
          ]
        },
        {
          "name": "CLIP",
          "type": "CLIP",
          "slot_index": 1,
          "links": [
            3,
            5,
            15,
            21
          ]
        },
        {
          "name": "VAE",
          "type": "VAE",
          "slot_index": 2,
          "links": []
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CheckpointLoaderSimple",
        "models": [
          {
            "name": "v1-5-pruned-emaonly-fp16.safetensors",
            "url": "https://huggingface.co/Comfy-Org/stable-diffusion-v1-5-archive/resolve/main/v1-5-pruned-emaonly-fp16.safetensors?download=true",
            "directory": "checkpoints"
          }
        ]
      },
      "widgets_values": [
        "📷-v1.x\\real-dream-15.safetensors"
      ]
    },
    {
      "id": 12,
      "type": "VAELoader",
      "pos": [
        1723.3863525390625,
        289.3052978515625
      ],
      "size": [
        270,
        58
      ],
      "flags": {
        "collapsed": true
      },
      "order": 4,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "VAE",
          "type": "VAE",
          "links": [
            13
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "VAELoader"
      },
      "widgets_values": [
        "vae-ft-mse-840000-ema-pruned.safetensors"
      ]
    },
    {
      "id": 3,
      "type": "KSampler",
      "pos": [
        1375.4254150390625,
        178.1886444091797
      ],
      "size": [
        315,
        262
      ],
      "flags": {},
      "order": 13,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 1
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 24
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 6
        },
        {
          "name": "latent_image",
          "type": "LATENT",
          "link": 2
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "slot_index": 0,
          "links": [
            7
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "KSampler"
      },
      "widgets_values": [
        4444,
        "fixed",
        20,
        8,
        "dpmpp_2m",
        "karras",
        1
      ]
    },
    {
      "id": 10,
      "type": "PreviewImage",
      "pos": [
        1984.327880859375,
        -58.26746368408203
      ],
      "size": [
        582.5722045898438,
        626.802978515625
      ],
      "flags": {},
      "order": 15,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 10
        }
      ],
      "outputs": [],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "PreviewImage"
      },
      "widgets_values": []
    }
  ],
  "links": [
    [
      1,
      4,
      0,
      3,
      0,
      "MODEL"
    ],
    [
      2,
      5,
      0,
      3,
      3,
      "LATENT"
    ],
    [
      3,
      4,
      1,
      6,
      0,
      "CLIP"
    ],
    [
      5,
      4,
      1,
      7,
      0,
      "CLIP"
    ],
    [
      6,
      7,
      0,
      3,
      2,
      "CONDITIONING"
    ],
    [
      7,
      3,
      0,
      8,
      0,
      "LATENT"
    ],
    [
      10,
      8,
      0,
      10,
      0,
      "IMAGE"
    ],
    [
      11,
      6,
      0,
      11,
      0,
      "CONDITIONING"
    ],
    [
      13,
      12,
      0,
      8,
      1,
      "VAE"
    ],
    [
      14,
      13,
      0,
      14,
      0,
      "CONDITIONING"
    ],
    [
      15,
      4,
      1,
      13,
      0,
      "CLIP"
    ],
    [
      18,
      11,
      0,
      15,
      0,
      "CONDITIONING"
    ],
    [
      19,
      14,
      0,
      15,
      1,
      "CONDITIONING"
    ],
    [
      21,
      4,
      1,
      17,
      0,
      "CLIP"
    ],
    [
      22,
      15,
      0,
      19,
      0,
      "CONDITIONING"
    ],
    [
      24,
      19,
      0,
      3,
      1,
      "CONDITIONING"
    ],
    [
      25,
      20,
      0,
      11,
      1,
      "MASK"
    ],
    [
      26,
      21,
      0,
      14,
      1,
      "MASK"
    ],
    [
      27,
      17,
      0,
      19,
      1,
      "CONDITIONING"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
      "scale": 1.030876132930519,
      "offset": [
        -181.07522170324756,
        348.5181404697794
      ]
    },
    "frontendVersion": "1.24.1",
    "VHS_latentpreview": false,
    "VHS_latentpreviewrate": 0,
    "VHS_MetadataImage": true,
    "VHS_KeepIntermediate": true
  },
  "version": 0.4
}

理屈はきれいですが、実際には境界がにじんだり、きっちり切り替わらないことが多く、実用性は高くありません。

また、LoRAを領域指定することはできません。

Latent Composite（潜在空間での合成）

潜在空間の段階で画像を合成する方法です。

Latent_Composite.json

{
  "id": "e524c983-e762-4a7d-a5cb-d0f3a36bde28",
  "revision": 0,
  "last_node_id": 47,
  "last_link_id": 69,
  "nodes": [
    {
      "id": 6,
      "type": "CLIPTextEncode",
      "pos": [
        -74.14247131347656,
        -228.59242248535156
      ],
      "size": [
        341.8740234375,
        152.28765869140625
      ],
      "flags": {},
      "order": 9,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 3
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            37
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "RAW photo of a banana"
      ],
      "color": "#322",
      "bgcolor": "#533"
    },
    {
      "id": 47,
      "type": "VAELoader",
      "pos": [
        1464.0460205078125,
        -286.2972106933594
      ],
      "size": [
        270,
        58
      ],
      "flags": {
        "collapsed": true
      },
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "VAE",
          "type": "VAE",
          "links": [
            66
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "VAELoader"
      },
      "widgets_values": [
        "vae-ft-mse-840000-ema-pruned.safetensors"
      ]
    },
    {
      "id": 12,
      "type": "VAELoader",
      "pos": [
        2289.68603515625,
        149.98648071289062
      ],
      "size": [
        270,
        58
      ],
      "flags": {
        "collapsed": true
      },
      "order": 1,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "VAE",
          "type": "VAE",
          "links": [
            13
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "VAELoader"
      },
      "widgets_values": [
        "vae-ft-mse-840000-ema-pruned.safetensors"
      ]
    },
    {
      "id": 43,
      "type": "EmptyLatentImage",
      "pos": [
        -74.14247131347656,
        3.208286762237549
      ],
      "size": [
        341.8740234375,
        106
      ],
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "links": [
            64
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "EmptyLatentImage"
      },
      "widgets_values": [
        768,
        512,
        1
      ],
      "color": "#322",
      "bgcolor": "#533"
    },
    {
      "id": 17,
      "type": "CLIPTextEncode",
      "pos": [
        -74.14247131347656,
        -654.4688720703125
      ],
      "size": [
        341.8740234375,
        152.28765869140625
      ],
      "flags": {
        "collapsed": false
      },
      "order": 12,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 21
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            40
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "A beautiful analog-style photograph,table top"
      ],
      "color": "#223",
      "bgcolor": "#335"
    },
    {
      "id": 42,
      "type": "EmptyLatentImage",
      "pos": [
        -74.14247131347656,
        -414.10546875
      ],
      "size": [
        341.8740234375,
        106
      ],
      "flags": {},
      "order": 3,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "links": [
            63
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "EmptyLatentImage"
      },
      "widgets_values": [
        768,
        768,
        1
      ],
      "color": "#223",
      "bgcolor": "#335"
    },
    {
      "id": 13,
      "type": "CLIPTextEncode",
      "pos": [
        247.21710205078125,
        380.4800109863281
      ],
      "size": [
        341.8740234375,
        152.28765869140625
      ],
      "flags": {},
      "order": 11,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 15
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            44
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "RAW photo of an apple"
      ],
      "color": "#232",
      "bgcolor": "#353"
    },
    {
      "id": 44,
      "type": "EmptyLatentImage",
      "pos": [
        247.21710205078125,
        612.2808837890625
      ],
      "size": [
        341.8740234375,
        106
      ],
      "flags": {},
      "order": 4,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "links": [
            65
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "EmptyLatentImage"
      },
      "widgets_values": [
        768,
        512,
        1
      ],
      "color": "#232",
      "bgcolor": "#353"
    },
    {
      "id": 38,
      "type": "PrimitiveNode",
      "pos": [
        -74.14247131347656,
        188.72134399414062
      ],
      "size": [
        341.8740234375,
        82
      ],
      "flags": {},
      "order": 5,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "INT",
          "type": "INT",
          "widget": {
            "name": "end_at_step"
          },
          "links": [
            46,
            47,
            48,
            50
          ]
        }
      ],
      "title": "end_at_step",
      "properties": {
        "Run widget replace on values": false
      },
      "widgets_values": [
        10,
        "fixed"
      ]
    },
    {
      "id": 33,
      "type": "LatentCompositeMasked",
      "pos": [
        833.8609008789062,
        -348.69537353515625
      ],
      "size": [
        270,
        146
      ],
      "flags": {},
      "order": 17,
      "mode": 0,
      "inputs": [
        {
          "name": "destination",
          "type": "LATENT",
          "link": 52
        },
        {
          "name": "source",
          "type": "LATENT",
          "link": 53
        },
        {
          "name": "mask",
          "shape": 7,
          "type": "MASK",
          "link": 54
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "links": [
            55
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "LatentCompositeMasked"
      },
      "widgets_values": [
        0,
        0,
        true
      ]
    },
    {
      "id": 40,
      "type": "LatentCompositeMasked",
      "pos": [
        1161.2132568359375,
        -135.64053344726562
      ],
      "size": [
        270,
        146
      ],
      "flags": {},
      "order": 18,
      "mode": 0,
      "inputs": [
        {
          "name": "destination",
          "type": "LATENT",
          "link": 55
        },
        {
          "name": "source",
          "type": "LATENT",
          "link": 57
        },
        {
          "name": "mask",
          "shape": 7,
          "type": "MASK",
          "link": 56
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "links": [
            58,
            69
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "LatentCompositeMasked"
      },
      "widgets_values": [
        0,
        0,
        true
      ]
    },
    {
      "id": 8,
      "type": "VAEDecode",
      "pos": [
        2287.659423828125,
        38.063751220703125
      ],
      "size": [
        177.93228149414062,
        46
      ],
      "flags": {},
      "order": 21,
      "mode": 0,
      "inputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "link": 51
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": 13
        }
      ],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "slot_index": 0,
          "links": [
            10
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "VAEDecode"
      },
      "widgets_values": []
    },
    {
      "id": 10,
      "type": "PreviewImage",
      "pos": [
        2515.8095703125,
        38.063751220703125
      ],
      "size": [
        582.5722045898438,
        626.802978515625
      ],
      "flags": {},
      "order": 23,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 10
        }
      ],
      "outputs": [],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "PreviewImage"
      },
      "widgets_values": []
    },
    {
      "id": 35,
      "type": "LoadImageMask",
      "pos": [
        837.768310546875,
        -145.2086639404297
      ],
      "size": [
        214.080078125,
        330
      ],
      "flags": {},
      "order": 6,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MASK",
          "type": "MASK",
          "links": [
            54
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "LoadImageMask"
      },
      "widgets_values": [
        "banana (2).png",
        "red",
        "image"
      ],
      "color": "#322",
      "bgcolor": "#533"
    },
    {
      "id": 34,
      "type": "LoadImageMask",
      "pos": [
        1166.2705078125,
        73.07683563232422
      ],
      "size": [
        214.080078125,
        330
      ],
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MASK",
          "type": "MASK",
          "links": [
            56
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "LoadImageMask"
      },
      "widgets_values": [
        "apple (2).png",
        "red",
        "image"
      ],
      "color": "#232",
      "bgcolor": "#353"
    },
    {
      "id": 45,
      "type": "VAEDecode",
      "pos": [
        1454.0460205078125,
        -376.2972106933594
      ],
      "size": [
        177.93228149414062,
        46
      ],
      "flags": {},
      "order": 20,
      "mode": 0,
      "inputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "link": 69
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": 66
        }
      ],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "slot_index": 0,
          "links": [
            67
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "VAEDecode"
      },
      "widgets_values": []
    },
    {
      "id": 46,
      "type": "PreviewImage",
      "pos": [
        1674.0460205078125,
        -376.2972106933594
      ],
      "size": [
        210,
        258
      ],
      "flags": {},
      "order": 22,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 67
        }
      ],
      "outputs": [],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "PreviewImage"
      },
      "widgets_values": []
    },
    {
      "id": 7,
      "type": "CLIPTextEncode",
      "pos": [
        -78.64060974121094,
        875.3549194335938
      ],
      "size": [
        341.8740234375,
        104.42913055419922
      ],
      "flags": {},
      "order": 10,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 5
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            38,
            41,
            45,
            61
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "text, watermark, low quality"
      ]
    },
    {
      "id": 41,
      "type": "CLIPTextEncode",
      "pos": [
        1516.0802001953125,
        126.6052017211914
      ],
      "size": [
        341.8740234375,
        152.28765869140625
      ],
      "flags": {},
      "order": 13,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 59
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            60
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "RAW photo of a beautiful analog-style photograph,wood table,an apple,a banan"
      ]
    },
    {
      "id": 4,
      "type": "CheckpointLoaderSimple",
      "pos": [
        -613.933349609375,
        38.063751220703125
      ],
      "size": [
        315,
        98
      ],
      "flags": {},
      "order": 8,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MODEL",
          "type": "MODEL",
          "slot_index": 0,
          "links": [
            39,
            42,
            43,
            62
          ]
        },
        {
          "name": "CLIP",
          "type": "CLIP",
          "slot_index": 1,
          "links": [
            3,
            5,
            15,
            21,
            59
          ]
        },
        {
          "name": "VAE",
          "type": "VAE",
          "slot_index": 2,
          "links": []
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CheckpointLoaderSimple",
        "models": [
          {
            "name": "v1-5-pruned-emaonly-fp16.safetensors",
            "url": "https://huggingface.co/Comfy-Org/stable-diffusion-v1-5-archive/resolve/main/v1-5-pruned-emaonly-fp16.safetensors?download=true",
            "directory": "checkpoints"
          }
        ]
      },
      "widgets_values": [
        "📷-v1.x\\real-dream-15.safetensors"
      ]
    },
    {
      "id": 36,
      "type": "KSamplerAdvanced",
      "pos": [
        438.809326171875,
        -654.4688720703125
      ],
      "size": [
        304.748046875,
        334
      ],
      "flags": {},
      "order": 16,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 42
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 40
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 41
        },
        {
          "name": "latent_image",
          "type": "LATENT",
          "link": 63
        },
        {
          "name": "end_at_step",
          "type": "INT",
          "widget": {
            "name": "end_at_step"
          },
          "link": 48
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "links": [
            52
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "KSamplerAdvanced"
      },
      "widgets_values": [
        "enable",
        777,
        "fixed",
        20,
        8,
        "dpmpp_2m",
        "karras",
        0,
        10,
        "enable"
      ],
      "color": "#223",
      "bgcolor": "#335"
    },
    {
      "id": 32,
      "type": "KSamplerAdvanced",
      "pos": [
        438.809326171875,
        -228.59242248535156
      ],
      "size": [
        304.748046875,
        334
      ],
      "flags": {},
      "order": 14,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 39
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 37
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 38
        },
        {
          "name": "latent_image",
          "type": "LATENT",
          "link": 64
        },
        {
          "name": "end_at_step",
          "type": "INT",
          "widget": {
            "name": "end_at_step"
          },
          "link": 46
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "links": [
            53
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "KSamplerAdvanced"
      },
      "widgets_values": [
        "enable",
        777,
        "fixed",
        20,
        8,
        "dpmpp_2m",
        "karras",
        0,
        10,
        "enable"
      ],
      "color": "#322",
      "bgcolor": "#533"
    },
    {
      "id": 37,
      "type": "KSamplerAdvanced",
      "pos": [
        760.1702270507812,
        294.8553466796875
      ],
      "size": [
        304.748046875,
        334
      ],
      "flags": {},
      "order": 15,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 43
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 44
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 45
        },
        {
          "name": "latent_image",
          "type": "LATENT",
          "link": 65
        },
        {
          "name": "end_at_step",
          "type": "INT",
          "widget": {
            "name": "end_at_step"
          },
          "link": 47
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "links": [
            57
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "KSamplerAdvanced"
      },
      "widgets_values": [
        "enable",
        777,
        "fixed",
        20,
        8,
        "dpmpp_2m",
        "karras",
        0,
        10,
        "enable"
      ],
      "color": "#232",
      "bgcolor": "#353"
    },
    {
      "id": 39,
      "type": "KSamplerAdvanced",
      "pos": [
        1953.4892578125,
        38.063751220703125
      ],
      "size": [
        304.748046875,
        334
      ],
      "flags": {},
      "order": 19,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 62
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 60
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 61
        },
        {
          "name": "latent_image",
          "type": "LATENT",
          "link": 58
        },
        {
          "name": "start_at_step",
          "type": "INT",
          "widget": {
            "name": "start_at_step"
          },
          "link": 50
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "links": [
            51
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "KSamplerAdvanced"
      },
      "widgets_values": [
        "disable",
        777,
        "fixed",
        20,
        8,
        "dpmpp_2m",
        "karras",
        10,
        20,
        "disable"
      ]
    }
  ],
  "links": [
    [
      3,
      4,
      1,
      6,
      0,
      "CLIP"
    ],
    [
      5,
      4,
      1,
      7,
      0,
      "CLIP"
    ],
    [
      10,
      8,
      0,
      10,
      0,
      "IMAGE"
    ],
    [
      13,
      12,
      0,
      8,
      1,
      "VAE"
    ],
    [
      15,
      4,
      1,
      13,
      0,
      "CLIP"
    ],
    [
      21,
      4,
      1,
      17,
      0,
      "CLIP"
    ],
    [
      37,
      6,
      0,
      32,
      1,
      "CONDITIONING"
    ],
    [
      38,
      7,
      0,
      32,
      2,
      "CONDITIONING"
    ],
    [
      39,
      4,
      0,
      32,
      0,
      "MODEL"
    ],
    [
      40,
      17,
      0,
      36,
      1,
      "CONDITIONING"
    ],
    [
      41,
      7,
      0,
      36,
      2,
      "CONDITIONING"
    ],
    [
      42,
      4,
      0,
      36,
      0,
      "MODEL"
    ],
    [
      43,
      4,
      0,
      37,
      0,
      "MODEL"
    ],
    [
      44,
      13,
      0,
      37,
      1,
      "CONDITIONING"
    ],
    [
      45,
      7,
      0,
      37,
      2,
      "CONDITIONING"
    ],
    [
      46,
      38,
      0,
      32,
      4,
      "INT"
    ],
    [
      47,
      38,
      0,
      37,
      4,
      "INT"
    ],
    [
      48,
      38,
      0,
      36,
      4,
      "INT"
    ],
    [
      50,
      38,
      0,
      39,
      4,
      "INT"
    ],
    [
      51,
      39,
      0,
      8,
      0,
      "LATENT"
    ],
    [
      52,
      36,
      0,
      33,
      0,
      "LATENT"
    ],
    [
      53,
      32,
      0,
      33,
      1,
      "LATENT"
    ],
    [
      54,
      35,
      0,
      33,
      2,
      "MASK"
    ],
    [
      55,
      33,
      0,
      40,
      0,
      "LATENT"
    ],
    [
      56,
      34,
      0,
      40,
      2,
      "MASK"
    ],
    [
      57,
      37,
      0,
      40,
      1,
      "LATENT"
    ],
    [
      58,
      40,
      0,
      39,
      3,
      "LATENT"
    ],
    [
      59,
      4,
      1,
      41,
      0,
      "CLIP"
    ],
    [
      60,
      41,
      0,
      39,
      1,
      "CONDITIONING"
    ],
    [
      61,
      7,
      0,
      39,
      2,
      "CONDITIONING"
    ],
    [
      62,
      4,
      0,
      39,
      0,
      "MODEL"
    ],
    [
      63,
      42,
      0,
      36,
      3,
      "LATENT"
    ],
    [
      64,
      43,
      0,
      32,
      3,
      "LATENT"
    ],
    [
      65,
      44,
      0,
      37,
      3,
      "LATENT"
    ],
    [
      66,
      47,
      0,
      45,
      1,
      "VAE"
    ],
    [
      67,
      45,
      0,
      46,
      0,
      "IMAGE"
    ],
    [
      69,
      40,
      0,
      45,
      0,
      "LATENT"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
      "scale": 0.5290024565035728,
      "offset": [
        713.933349609375,
        754.4688720703125
      ]
    },
    "frontendVersion": "1.24.1",
    "VHS_latentpreview": false,
    "VHS_latentpreviewrate": 0,
    "VHS_MetadataImage": true,
    "VHS_KeepIntermediate": true
  },
  "version": 0.4
}

1. まず複数の画像（バナナ画像、リンゴ画像、背景画像など）を生成する
1. それぞれのlatentを、マスクを使って1枚分のlatentに貼り合わせる
1. その合成されたlatentに対して、残りのサンプリングステップを回す

各オブジェクトを別々の条件で生成でき、最後に「一枚の画像」としてなじませられます。

ただし、これを使うくらいなら複数回Inpaintingして、最後に全体をimage2imageしたほうが確実な場面も多いです。

Latent Couple / Attention Couple

Latent Couple

潜在空間を領域ごとに完全に分割し、それぞれ別々の設定（プロンプト・LoRAなど）で生成してから結合する方法です。

各領域にまったく別の設定を使えるという点では理想的
領域の数だけ画像を生成しているのと同じなので、計算量が大きい

現時点でComfyUIに直接の実装はありません

Attention Couple

Lantent CoupleではUNet丸々計算していましたが、こちらはCross-Attention層のみ計算します

その分計算量は大分少なくなりますが、LoRAの領域別指定はできません

Attention_Couple.json

{
  "id": "bd31dc25-50da-43d7-b1cb-e53f5d146f11",
  "revision": 0,
  "last_node_id": 19,
  "last_link_id": 25,
  "nodes": [
    {
      "id": 8,
      "type": "VAEDecode",
      "pos": [
        1573.7244873046875,
        560.4592895507812
      ],
      "size": [
        150.13829040527344,
        46
      ],
      "flags": {},
      "order": 14,
      "mode": 0,
      "inputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "link": 7
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": 25
        }
      ],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "slot_index": 0,
          "links": [
            10
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "VAEDecode"
      },
      "widgets_values": []
    },
    {
      "id": 6,
      "type": "CLIPTextEncode",
      "pos": [
        466.21844482421875,
        52.61830520629883
      ],
      "size": [
        306.9939880371094,
        121.5467300415039
      ],
      "flags": {},
      "order": 6,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 3
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            4,
            13
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "A beautiful analog-style photograph,table top"
      ],
      "color": "#223",
      "bgcolor": "#335"
    },
    {
      "id": 12,
      "type": "SolidMask",
      "pos": [
        466.21844482421875,
        234.42247009277344
      ],
      "size": [
        306.9939880371094,
        106
      ],
      "flags": {},
      "order": 11,
      "mode": 0,
      "inputs": [
        {
          "name": "width",
          "type": "INT",
          "widget": {
            "name": "width"
          },
          "link": 16
        },
        {
          "name": "height",
          "type": "INT",
          "widget": {
            "name": "height"
          },
          "link": 18
        }
      ],
      "outputs": [
        {
          "name": "MASK",
          "type": "MASK",
          "links": [
            14
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "SolidMask"
      },
      "widgets_values": [
        1,
        768,
        768
      ],
      "color": "#223",
      "bgcolor": "#335"
    },
    {
      "id": 17,
      "type": "LoadImageMask",
      "pos": [
        466.21844482421875,
        582.4840698242188
      ],
      "size": [
        306.9939880371094,
        330
      ],
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MASK",
          "type": "MASK",
          "links": [
            21
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "LoadImageMask"
      },
      "widgets_values": [
        "banana (2).png",
        "red",
        "image"
      ],
      "color": "#322",
      "bgcolor": "#533"
    },
    {
      "id": 15,
      "type": "CLIPTextEncode",
      "pos": [
        466.21844482421875,
        400.6799011230469
      ],
      "size": [
        306.9939880371094,
        121.5467300415039
      ],
      "flags": {},
      "order": 8,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 19
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            20
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "RAW photo of a banana"
      ],
      "color": "#322",
      "bgcolor": "#533"
    },
    {
      "id": 18,
      "type": "CLIPTextEncode",
      "pos": [
        466.21844482421875,
        972.7415161132812
      ],
      "size": [
        306.9939880371094,
        121.5467300415039
      ],
      "flags": {},
      "order": 9,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 22
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            23
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "RAW photo of an apple"
      ],
      "color": "#232",
      "bgcolor": "#353"
    },
    {
      "id": 14,
      "type": "PrimitiveNode",
      "pos": [
        -6.389673233032227,
        859.0269165039062
      ],
      "size": [
        210,
        82
      ],
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "INT",
          "type": "INT",
          "widget": {
            "name": "height"
          },
          "links": [
            17,
            18
          ]
        }
      ],
      "title": "height",
      "properties": {
        "Run widget replace on values": false
      },
      "widgets_values": [
        768,
        "fixed"
      ]
    },
    {
      "id": 16,
      "type": "LoadImageMask",
      "pos": [
        466.21844482421875,
        1154.545654296875
      ],
      "size": [
        306.9939880371094,
        330
      ],
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MASK",
          "type": "MASK",
          "links": [
            24
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "LoadImageMask"
      },
      "widgets_values": [
        "apple (2).png",
        "red",
        "image"
      ],
      "color": "#232",
      "bgcolor": "#353"
    },
    {
      "id": 7,
      "type": "CLIPTextEncode",
      "pos": [
        466.21844482421875,
        1544.8031005859375
      ],
      "size": [
        306.9939880371094,
        89.91178131103516
      ],
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 5
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            6
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "ow qualitytext, watermark, "
      ]
    },
    {
      "id": 10,
      "type": "AttentionCouplePPM",
      "pos": [
        986.5342407226562,
        560.4592895507812
      ],
      "size": [
        186.96835327148438,
        186
      ],
      "flags": {},
      "order": 12,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 12
        },
        {
          "name": "base_cond",
          "type": "CONDITIONING",
          "link": 13
        },
        {
          "name": "base_mask",
          "type": "MASK",
          "link": 14
        },
        {
          "name": "cond_1",
          "type": "CONDITIONING",
          "link": 20
        },
        {
          "name": "mask_1",
          "type": "MASK",
          "link": 21
        },
        {
          "name": "cond_2",
          "type": "CONDITIONING",
          "link": 23
        },
        {
          "name": "mask_2",
          "type": "MASK",
          "link": 24
        },
        {
          "name": "cond_3",
          "type": "CONDITIONING",
          "link": null
        },
        {
          "name": "mask_3",
          "type": "MASK",
          "link": null
        }
      ],
      "outputs": [
        {
          "name": "MODEL",
          "type": "MODEL",
          "links": [
            11
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfyui-ppm",
        "ver": "7a28475ba26ebb76687c6cc343437b6deff36232",
        "Node name for S&R": "AttentionCouplePPM"
      },
      "widgets_values": [],
      "color": "#432",
      "bgcolor": "#653"
    },
    {
      "id": 11,
      "type": "PreviewImage",
      "pos": [
        1766.4737548828125,
        560.4592895507812
      ],
      "size": [
        529.1884155273438,
        593.6753540039062
      ],
      "flags": {},
      "order": 15,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 10
        }
      ],
      "outputs": [],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "PreviewImage"
      },
      "widgets_values": []
    },
    {
      "id": 19,
      "type": "VAELoader",
      "pos": [
        1577.5760498046875,
        560.4592895507812
      ],
      "size": [
        268.0599060058594,
        58.97004699707031
      ],
      "flags": {
        "collapsed": true
      },
      "order": 3,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "VAE",
          "type": "VAE",
          "links": [
            25
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "VAELoader"
      },
      "widgets_values": [
        "sdxl_vae.safetensors"
      ]
    },
    {
      "id": 4,
      "type": "CheckpointLoaderSimple",
      "pos": [
        -109.82740020751953,
        560.4592895507812
      ],
      "size": [
        315,
        98
      ],
      "flags": {},
      "order": 4,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MODEL",
          "type": "MODEL",
          "slot_index": 0,
          "links": [
            12
          ]
        },
        {
          "name": "CLIP",
          "type": "CLIP",
          "slot_index": 1,
          "links": [
            3,
            5,
            19,
            22
          ]
        },
        {
          "name": "VAE",
          "type": "VAE",
          "slot_index": 2,
          "links": []
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "CheckpointLoaderSimple",
        "models": [
          {
            "name": "v1-5-pruned-emaonly-fp16.safetensors",
            "url": "https://huggingface.co/Comfy-Org/stable-diffusion-v1-5-archive/resolve/main/v1-5-pruned-emaonly-fp16.safetensors?download=true",
            "directory": "checkpoints"
          }
        ]
      },
      "widgets_values": [
        "📷-XL\\RealVisXL_V4.0.safetensors"
      ]
    },
    {
      "id": 3,
      "type": "KSampler",
      "pos": [
        1216.113525390625,
        560.4592895507812
      ],
      "size": [
        315,
        262
      ],
      "flags": {},
      "order": 13,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 11
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 4
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 6
        },
        {
          "name": "latent_image",
          "type": "LATENT",
          "link": 2
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "slot_index": 0,
          "links": [
            7
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "KSampler"
      },
      "widgets_values": [
        13022,
        "fixed",
        20,
        8,
        "dpmpp_2m",
        "karras",
        1
      ]
    },
    {
      "id": 5,
      "type": "EmptyLatentImage",
      "pos": [
        946.1537475585938,
        846.269287109375
      ],
      "size": [
        227.35647583007812,
        106
      ],
      "flags": {},
      "order": 10,
      "mode": 0,
      "inputs": [
        {
          "name": "width",
          "type": "INT",
          "widget": {
            "name": "width"
          },
          "link": 15
        },
        {
          "name": "height",
          "type": "INT",
          "widget": {
            "name": "height"
          },
          "link": 17
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "slot_index": 0,
          "links": [
            2
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.44",
        "Node name for S&R": "EmptyLatentImage"
      },
      "widgets_values": [
        768,
        768,
        1
      ]
    },
    {
      "id": 13,
      "type": "PrimitiveNode",
      "pos": [
        -6.389673233032227,
        729.040283203125
      ],
      "size": [
        210,
        82
      ],
      "flags": {},
      "order": 5,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "INT",
          "type": "INT",
          "widget": {
            "name": "width"
          },
          "links": [
            15,
            16
          ]
        }
      ],
      "title": "width",
      "properties": {
        "Run widget replace on values": false
      },
      "widgets_values": [
        768,
        "fixed"
      ]
    }
  ],
  "links": [
    [
      2,
      5,
      0,
      3,
      3,
      "LATENT"
    ],
    [
      3,
      4,
      1,
      6,
      0,
      "CLIP"
    ],
    [
      4,
      6,
      0,
      3,
      1,
      "CONDITIONING"
    ],
    [
      5,
      4,
      1,
      7,
      0,
      "CLIP"
    ],
    [
      6,
      7,
      0,
      3,
      2,
      "CONDITIONING"
    ],
    [
      7,
      3,
      0,
      8,
      0,
      "LATENT"
    ],
    [
      10,
      8,
      0,
      11,
      0,
      "IMAGE"
    ],
    [
      11,
      10,
      0,
      3,
      0,
      "MODEL"
    ],
    [
      12,
      4,
      0,
      10,
      0,
      "MODEL"
    ],
    [
      13,
      6,
      0,
      10,
      1,
      "CONDITIONING"
    ],
    [
      14,
      12,
      0,
      10,
      2,
      "MASK"
    ],
    [
      15,
      13,
      0,
      5,
      0,
      "INT"
    ],
    [
      16,
      13,
      0,
      12,
      0,
      "INT"
    ],
    [
      17,
      14,
      0,
      5,
      1,
      "INT"
    ],
    [
      18,
      14,
      0,
      12,
      1,
      "INT"
    ],
    [
      19,
      4,
      1,
      15,
      0,
      "CLIP"
    ],
    [
      20,
      15,
      0,
      10,
      3,
      "CONDITIONING"
    ],
    [
      21,
      17,
      0,
      10,
      4,
      "MASK"
    ],
    [
      22,
      4,
      1,
      18,
      0,
      "CLIP"
    ],
    [
      23,
      18,
      0,
      10,
      5,
      "CONDITIONING"
    ],
    [
      24,
      16,
      0,
      10,
      6,
      "MASK"
    ],
    [
      25,
      19,
      0,
      8,
      1,
      "VAE"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
      "scale": 1.0308761329305212,
      "offset": [
        -632.0231183731564,
        -69.89356683867605
      ]
    },
    "frontendVersion": "1.24.1",
    "VHS_latentpreview": false,
    "VHS_latentpreviewrate": 0,
    "VHS_MetadataImage": true,
    "VHS_KeepIntermediate": true
  },
  "version": 0.4
}

雑コラのリファイン (おすすめ)

雑なコラージュ画像を作って、それをもとに自然な絵になるように作り直させる方法です。

非常に直感的に位置を指定できますし、生成する物自体も適当なオブジェクトを貼り付けておけばよいので、実際のところかなりおすすめの方法です。

詳しくは → 雑コラのリファイン

領域指定生成