LoRA

LoRAとは？

Textual Inversion は「テキストでは説明しづらい見た目」を 1 つの単語に押し込める技術でしたが、モデルがもともと知らないものをゼロから描けるようにする力はありません。

「モデルが元々描けなかったものも描けるようにしたい！」と思ったとき、従来はモデル全体をファインチューニングする必要がありました。
しかし、学習にかなりのコストがかかります。

そこで使われるようになったのが、もともと LLM で使われていた LoRA（Low-Rank Adaptation） です。

LoRA は、モデルの重みそのものを書き換えるのではなく、「変更分」だけを小さな追加データとして外部に保存する方式です。
ベースモデルに対して、あとから拡張パックを読み込むような感覚で、新しいスタイルやキャラクターを足せます。

LoRAを適用した text2image

LoRAのダウンロード

今回は例として、ピクセルアート風にする LoRA を使ってみます。

8bitdiffuser 64x

 📂ComfyUI/
   └── 📂models/
       └── 📂loras/
           └── PX64NOCAP_epoch_10.safetensors

workflow

SD1.5_lora.json

{
  "id": "8b9f7796-0873-4025-be3c-0f997f67f866",
  "revision": 0,
  "last_node_id": 11,
  "last_link_id": 15,
  "nodes": [
    {
      "id": 8,
      "type": "VAEDecode",
      "pos": [
        1209,
        188
      ],
      "size": [
        210,
        46
      ],
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "link": 7
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": 10
        }
      ],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "slot_index": 0,
          "links": [
            9
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.33",
        "Node name for S&R": "VAEDecode"
      },
      "widgets_values": []
    },
    {
      "id": 9,
      "type": "SaveImage",
      "pos": [
        1451,
        189
      ],
      "size": [
        354.2876035004722,
        433.23967321788405
      ],
      "flags": {},
      "order": 8,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 9
        }
      ],
      "outputs": [],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.33"
      },
      "widgets_values": [
        "ComfyUI"
      ]
    },
    {
      "id": 7,
      "type": "CLIPTextEncode",
      "pos": [
        416.1970166015625,
        392.37848510742185
      ],
      "size": [
        410.75801513671877,
        158.82607910156253
      ],
      "flags": {},
      "order": 5,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 14
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            6
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.33",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "text, watermark"
      ]
    },
    {
      "id": 5,
      "type": "EmptyLatentImage",
      "pos": [
        582.1350317382813,
        606.5799999999999
      ],
      "size": [
        244.81999999999994,
        106
      ],
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "slot_index": 0,
          "links": [
            2
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.33",
        "Node name for S&R": "EmptyLatentImage"
      },
      "widgets_values": [
        512,
        512,
        1
      ]
    },
    {
      "id": 11,
      "type": "LoraLoader",
      "pos": [
        82.16589030803895,
        333.495116453795
      ],
      "size": [
        280.9090909090909,
        126
      ],
      "flags": {},
      "order": 3,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 11
        },
        {
          "name": "clip",
          "type": "CLIP",
          "link": 15
        }
      ],
      "outputs": [
        {
          "name": "MODEL",
          "type": "MODEL",
          "links": [
            12
          ]
        },
        {
          "name": "CLIP",
          "type": "CLIP",
          "links": [
            13,
            14
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.76",
        "Node name for S&R": "LoraLoader"
      },
      "widgets_values": [
        "1.5\\1.5-dpo-LoRA.safetensors",
        1,
        1
      ],
      "color": "#232",
      "bgcolor": "#353"
    },
    {
      "id": 6,
      "type": "CLIPTextEncode",
      "pos": [
        415,
        186
      ],
      "size": [
        411.95503173828126,
        151.0030493164063
      ],
      "flags": {},
      "order": 4,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 13
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            4
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.33",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "pixel_art,high quality,Illustration of a single red rose in a vase"
      ],
      "color": "#432",
      "bgcolor": "#653"
    },
    {
      "id": 10,
      "type": "VAELoader",
      "pos": [
        896.9256198347109,
        68.77178286934158
      ],
      "size": [
        281.0743801652891,
        58
      ],
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "VAE",
          "type": "VAE",
          "links": [
            10
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.76",
        "Node name for S&R": "VAELoader"
      },
      "widgets_values": [
        "vae-ft-mse-840000-ema-pruned.safetensors"
      ]
    },
    {
      "id": 4,
      "type": "CheckpointLoaderSimple",
      "pos": [
        -264.15536196608537,
        333.495116453795
      ],
      "size": [
        315,
        98
      ],
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MODEL",
          "type": "MODEL",
          "slot_index": 0,
          "links": [
            11
          ]
        },
        {
          "name": "CLIP",
          "type": "CLIP",
          "slot_index": 1,
          "links": [
            15
          ]
        },
        {
          "name": "VAE",
          "type": "VAE",
          "slot_index": 2,
          "links": []
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.33",
        "Node name for S&R": "CheckpointLoaderSimple"
      },
      "widgets_values": [
        "v1-5-pruned-emaonly-fp16.safetensors"
      ]
    },
    {
      "id": 3,
      "type": "KSampler",
      "pos": [
        863,
        186
      ],
      "size": [
        315,
        262
      ],
      "flags": {},
      "order": 6,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 12
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 4
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 6
        },
        {
          "name": "latent_image",
          "type": "LATENT",
          "link": 2
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "slot_index": 0,
          "links": [
            7
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.33",
        "Node name for S&R": "KSampler"
      },
      "widgets_values": [
        1234,
        "fixed",
        20,
        8,
        "euler",
        "normal",
        1
      ]
    }
  ],
  "links": [
    [
      2,
      5,
      0,
      3,
      3,
      "LATENT"
    ],
    [
      4,
      6,
      0,
      3,
      1,
      "CONDITIONING"
    ],
    [
      6,
      7,
      0,
      3,
      2,
      "CONDITIONING"
    ],
    [
      7,
      3,
      0,
      8,
      0,
      "LATENT"
    ],
    [
      9,
      8,
      0,
      9,
      0,
      "IMAGE"
    ],
    [
      10,
      10,
      0,
      8,
      1,
      "VAE"
    ],
    [
      11,
      4,
      0,
      11,
      0,
      "MODEL"
    ],
    [
      12,
      11,
      0,
      3,
      0,
      "MODEL"
    ],
    [
      13,
      11,
      1,
      6,
      0,
      "CLIP"
    ],
    [
      14,
      11,
      1,
      7,
      0,
      "CLIP"
    ],
    [
      15,
      4,
      1,
      11,
      1,
      "CLIP"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
      "scale": 0.8264462809917354,
      "offset": [
        364.15536196608537,
        32.43821713065842
      ]
    },
    "frontendVersion": "1.34.6",
    "VHS_latentpreview": false,
    "VHS_latentpreviewrate": 0,
    "VHS_MetadataImage": true,
    "VHS_KeepIntermediate": true
  },
  "version": 0.4
}

🟩Load LoRAノードを追加します。
- Load Checkpoint と CLIP Text Encode / KSampler の間に挟む形で接続します。
- MODEL と CLIP の両方を Load LoRA 経由で通す必要があります。
strength_model / strength_clip : LoRA の適用強度です。基本は 1.0 ですが、効きすぎるときは下げます。
🟨トリガーワード
- LoRA を適用しただけで、内部的にはドット絵を描く能力がベースモデルに上乗せされています。
- ただし、その能力を確実に引き出すには、作者が学習時に使ったワードをプロンプトに含める必要があります。
- これをトリガーワードと呼びます。今回の LoRA では pixel_art がトリガーワードになっています。

Flux.1以降のモデルと LoRA

画像生成AIの設計思想の変更

Stable Diffusion 1.5 や SDXL では、LoRAを適用する際、画像生成の核となる拡散モデルと、プロンプトを解釈するテキストエンコーダの両方を学習対象とするのが一般的でした。

しかし、Flux.1以降のモデルでは、テキストエンコーダにT5やQwenといった大規模な言語モデルが採用されるようになりました。
これらは小さなChatGPTのようなもので、すでに汎用的な言語理解能力があり、画像生成のために再学習させるのは非効率、どころか性能が落ちる可能性すらあります。

そのため、最新のモデルではテキストエンコーダは固定し、拡散モデル本体だけ学習する設計が主流になっています。

LoRAも追随

LoRAもこれに追随します。

SDXLまでは、拡散モデルとテキストエンコーダ両方を学習していましたが、
Flux.1以降のモデルでは、LoRAの学習・適用も、拡散モデルのみになっています。

ComfyUI workflowの変化

Load LoRAノードを使ってもいいんですが、使ってないCLIPにノードを接続するのもあんまり美しくありません。
ということで、代わりに LoraLoaderModelOnlyノードが用意されています。
名前の通り、MODEL（拡散モデル）にのみLoRAを適用するノードです。

Flux.1_lora.json

{
  "id": "18404b37-92b0-4d11-a39c-ae941838eb83",
  "revision": 0,
  "last_node_id": 45,
  "last_link_id": 65,
  "nodes": [
    {
      "id": 35,
      "type": "FluxGuidance",
      "pos": [
        836,
        190
      ],
      "size": [
        211.60000610351562,
        58
      ],
      "flags": {},
      "order": 7,
      "mode": 0,
      "inputs": [
        {
          "name": "conditioning",
          "type": "CONDITIONING",
          "link": 56
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            57
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "FluxGuidance"
      },
      "widgets_values": [
        3.5
      ]
    },
    {
      "id": 33,
      "type": "CLIPTextEncode",
      "pos": [
        518,
        378
      ],
      "size": [
        414.71820068359375,
        108.47611236572266
      ],
      "flags": {
        "collapsed": true
      },
      "order": 5,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 60
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            55
          ]
        }
      ],
      "title": "CLIP Text Encode (Negative Prompt)",
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        ""
      ]
    },
    {
      "id": 42,
      "type": "DualCLIPLoader",
      "pos": [
        185.0587921142578,
        235.1116485595703
      ],
      "size": [
        270,
        130
      ],
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "CLIP",
          "type": "CLIP",
          "links": [
            59,
            60
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "DualCLIPLoader"
      },
      "widgets_values": [
        "clip_l.safetensors",
        "t5xxl_fp8_e4m3fn.safetensors",
        "flux",
        "default"
      ]
    },
    {
      "id": 41,
      "type": "UNETLoader",
      "pos": [
        527.2304526084715,
        34.5730778881735
      ],
      "size": [
        270,
        82
      ],
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "MODEL",
          "type": "MODEL",
          "links": [
            63
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "UNETLoader"
      },
      "widgets_values": [
        "Flux\\flux1-dev-fp8.safetensors",
        "default"
      ]
    },
    {
      "id": 8,
      "type": "VAEDecode",
      "pos": [
        1408,
        190
      ],
      "size": [
        140,
        46
      ],
      "flags": {},
      "order": 9,
      "mode": 0,
      "inputs": [
        {
          "name": "samples",
          "type": "LATENT",
          "link": 52
        },
        {
          "name": "vae",
          "type": "VAE",
          "link": 62
        }
      ],
      "outputs": [
        {
          "name": "IMAGE",
          "type": "IMAGE",
          "slot_index": 0,
          "links": [
            65
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "VAEDecode"
      },
      "widgets_values": []
    },
    {
      "id": 44,
      "type": "LoraLoaderModelOnly",
      "pos": [
        828.5090970126064,
        34.5730778881735
      ],
      "size": [
        219.09090909090924,
        82
      ],
      "flags": {},
      "order": 6,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 63
        }
      ],
      "outputs": [
        {
          "name": "MODEL",
          "type": "MODEL",
          "links": [
            64
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.76",
        "Node name for S&R": "LoraLoaderModelOnly"
      },
      "widgets_values": [
        "Flux.1\\AWPortrait-FL-lora.safetensors",
        0.8
      ],
      "color": "#232",
      "bgcolor": "#353"
    },
    {
      "id": 27,
      "type": "EmptySD3LatentImage",
      "pos": [
        795.1570061035156,
        471
      ],
      "size": [
        252.44299999999998,
        108.66200000000003
      ],
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "slot_index": 0,
          "links": [
            51
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "EmptySD3LatentImage"
      },
      "widgets_values": [
        1024,
        1024,
        1
      ]
    },
    {
      "id": 6,
      "type": "CLIPTextEncode",
      "pos": [
        507,
        190
      ],
      "size": [
        301.84503173828125,
        128.01304626464844
      ],
      "flags": {},
      "order": 4,
      "mode": 0,
      "inputs": [
        {
          "name": "clip",
          "type": "CLIP",
          "link": 59
        }
      ],
      "outputs": [
        {
          "name": "CONDITIONING",
          "type": "CONDITIONING",
          "slot_index": 0,
          "links": [
            56
          ]
        }
      ],
      "title": "CLIP Text Encode (Positive Prompt)",
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "CLIPTextEncode"
      },
      "widgets_values": [
        "Fashion magazine style portrait of a striking young woman with sharp, defined features, confident gaze straight into the camera, minimal but edgy makeup with bold eyeliner and matte lips, sleek blunt bob haircut in deep black, wearing a modern monochrome outfit: structured black blazer over a crisp white top, subtle silver jewelry, standing against a clean architectural background of concrete and glass, slightly off-center composition, shot with an 85mm lens at f/2.0, crisp details on face and clothing, background softly blurred, cool-toned color grading with a hint of teal and orange, high-end editorial lighting with clear contrast and soft shadows, contemporary fashion photography"
      ]
    },
    {
      "id": 31,
      "type": "KSampler",
      "pos": [
        1070,
        190
      ],
      "size": [
        315,
        262
      ],
      "flags": {},
      "order": 8,
      "mode": 0,
      "inputs": [
        {
          "name": "model",
          "type": "MODEL",
          "link": 64
        },
        {
          "name": "positive",
          "type": "CONDITIONING",
          "link": 57
        },
        {
          "name": "negative",
          "type": "CONDITIONING",
          "link": 55
        },
        {
          "name": "latent_image",
          "type": "LATENT",
          "link": 51
        }
      ],
      "outputs": [
        {
          "name": "LATENT",
          "type": "LATENT",
          "slot_index": 0,
          "links": [
            52
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "KSampler"
      },
      "widgets_values": [
        1234,
        "fixed",
        20,
        1,
        "euler",
        "normal",
        1
      ]
    },
    {
      "id": 43,
      "type": "VAELoader",
      "pos": [
        1174.5506464243365,
        71.00368181687476
      ],
      "size": [
        210,
        58
      ],
      "flags": {
        "collapsed": false
      },
      "order": 3,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {
          "name": "VAE",
          "type": "VAE",
          "links": [
            62
          ]
        }
      ],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.39",
        "Node name for S&R": "VAELoader"
      },
      "widgets_values": [
        "ae.safetensors"
      ]
    },
    {
      "id": 45,
      "type": "SaveImage",
      "pos": [
        1579.382263188637,
        190
      ],
      "size": [
        375.4432999999999,
        426.65870000000007
      ],
      "flags": {},
      "order": 10,
      "mode": 0,
      "inputs": [
        {
          "name": "images",
          "type": "IMAGE",
          "link": 65
        }
      ],
      "outputs": [],
      "properties": {
        "cnr_id": "comfy-core",
        "ver": "0.3.76"
      },
      "widgets_values": [
        "ComfyUI"
      ]
    }
  ],
  "links": [
    [
      51,
      27,
      0,
      31,
      3,
      "LATENT"
    ],
    [
      52,
      31,
      0,
      8,
      0,
      "LATENT"
    ],
    [
      55,
      33,
      0,
      31,
      2,
      "CONDITIONING"
    ],
    [
      56,
      6,
      0,
      35,
      0,
      "CONDITIONING"
    ],
    [
      57,
      35,
      0,
      31,
      1,
      "CONDITIONING"
    ],
    [
      59,
      42,
      0,
      6,
      0,
      "CLIP"
    ],
    [
      60,
      42,
      0,
      33,
      0,
      "CLIP"
    ],
    [
      62,
      43,
      0,
      8,
      1,
      "VAE"
    ],
    [
      63,
      41,
      0,
      44,
      0,
      "MODEL"
    ],
    [
      64,
      44,
      0,
      31,
      0,
      "MODEL"
    ],
    [
      65,
      8,
      0,
      45,
      0,
      "IMAGE"
    ]
  ],
  "groups": [],
  "config": {},
  "extra": {
    "ds": {
      "scale": 0.9090909090909091,
      "offset": [
        -85.05879211425781,
        65.4269221118265
      ]
    },
    "frontendVersion": "1.34.5",
    "VHS_latentpreview": false,
    "VHS_latentpreviewrate": 0,
    "VHS_MetadataImage": true,
    "VHS_KeepIntermediate": true
  },
  "version": 0.4
}

新しいモデルでは、このようにLoRAを適用しています。覚えておいてください。

LoRAとは？

LoRAを適用した text2image

LoRAのダウンロード

workflow

Flux.1以降のモデルと LoRA

画像生成AIの設計思想の変更

LoRAも追随

ComfyUI workflowの変化

jsonコピーボタンとは？

修正・誤字報告

記事リクエスト

感想・その他

ありがとうございます