text2imageのように、シンプルにノイズからモデルが現れてくれればよいのですが、動画が時間軸方向に1次元多いように、3D生成は空間方向に次元が増えるため、簡単には実現できません。
先に伝えておくと、3D生成はまだ、プロユースに至るほどの性能は得られていません。
しかし、画像からモデルを作り出し、果ては歩き回れる世界を作り出す技術は着々と成長しています。
元々NeRFといった画像から3D空間・モデルを作る技術は存在しました。
しかし、NeRFで3Dを構築するには、同じオブジェクトを様々な視点から見た画像が必要になります。
{
"last_node_id": 17,
"last_link_id": 21,
"nodes": [
{
"id": 15,
"type": "CLIPVisionLoader",
"pos": [
90,
350
],
"size": {
"0": 315,
"1": 58
},
"flags": {},
"order": 0,
"mode": 0,
"outputs": [
{
"name": "CLIP_VISION",
"type": "CLIP_VISION",
"links": [
18
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "CLIPVisionLoader"
},
"widgets_values": [
"OpenCLIP-ViT-H-14.safetensors"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 10,
"type": "SV3D_Conditioning",
"pos": [
490,
240
],
"size": {
"0": 315,
"1": 170
},
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "clip_vision",
"type": "CLIP_VISION",
"link": 18,
"slot_index": 0
},
{
"name": "init_image",
"type": "IMAGE",
"link": 20,
"slot_index": 1
},
{
"name": "vae",
"type": "VAE",
"link": 16,
"slot_index": 2
}
],
"outputs": [
{
"name": "positive",
"type": "CONDITIONING",
"links": [
11
],
"shape": 3,
"slot_index": 0
},
{
"name": "negative",
"type": "CONDITIONING",
"links": [
12
],
"shape": 3,
"slot_index": 1
},
{
"name": "latent",
"type": "LATENT",
"links": [
13
],
"shape": 3,
"slot_index": 2
}
],
"properties": {
"Node name for S&R": "SV3D_Conditioning"
},
"widgets_values": [
576,
576,
21,
0
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 14,
"type": "VAELoader",
"pos": [
500,
470
],
"size": {
"0": 315,
"1": 58
},
"flags": {
"collapsed": true
},
"order": 1,
"mode": 0,
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
16,
17
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"vae-ft-mse-840000-ema-pruned.safetensors"
]
},
{
"id": 4,
"type": "CheckpointLoaderSimple",
"pos": [
90,
180
],
"size": {
"0": 315,
"1": 98
},
"flags": {},
"order": 2,
"mode": 0,
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
19
],
"slot_index": 0
},
{
"name": "CLIP",
"type": "CLIP",
"links": [],
"slot_index": 1
},
{
"name": "VAE",
"type": "VAE",
"links": [],
"slot_index": 2
}
],
"properties": {
"Node name for S&R": "CheckpointLoaderSimple"
},
"widgets_values": [
"SV3D\\sv3d_p.safetensors"
],
"color": "#232",
"bgcolor": "#353"
},
{
"id": 16,
"type": "LoadImage",
"pos": [
83,
485
],
"size": [
352.31848818847664,
437.0448823632812
],
"flags": {},
"order": 3,
"mode": 0,
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
20
],
"shape": 3
},
{
"name": "MASK",
"type": "MASK",
"links": null,
"shape": 3
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"ComfyUI_01605_.png",
"image"
]
},
{
"id": 8,
"type": "VAEDecode",
"pos": [
1200,
220
],
"size": [
162.6986083984375,
46
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 7
},
{
"name": "vae",
"type": "VAE",
"link": 17
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
21
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VAEDecode"
}
},
{
"id": 17,
"type": "VHS_VideoCombine",
"pos": [
1394,
225
],
"size": [
492.2207889102224,
704.2207889102224
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 21
},
{
"name": "audio",
"type": "VHS_AUDIO",
"link": null
},
{
"name": "batch_manager",
"type": "VHS_BatchManager",
"link": null
}
],
"outputs": [
{
"name": "Filenames",
"type": "VHS_FILENAMES",
"links": null,
"shape": 3
}
],
"properties": {
"Node name for S&R": "VHS_VideoCombine"
},
"widgets_values": {
"frame_rate": 8,
"loop_count": 0,
"filename_prefix": "AnimateDiff",
"format": "image/gif",
"pingpong": false,
"save_output": false,
"videopreview": {
"hidden": false,
"paused": false,
"params": {
"filename": "AnimateDiff_00014.gif",
"subfolder": "",
"type": "temp",
"format": "image/gif"
}
}
}
},
{
"id": 3,
"type": "KSampler",
"pos": [
855,
220
],
"size": {
"0": 315,
"1": 262
},
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 14
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 11
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 12
},
{
"name": "latent_image",
"type": "LATENT",
"link": 13
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
7
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "KSampler"
},
"widgets_values": [
12345,
"fixed",
20,
8,
"dpmpp_2m",
"karras",
1
]
},
{
"id": 11,
"type": "VideoTriangleCFGGuidance",
"pos": [
526,
126
],
"size": [
238,
58
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 19
}
],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
14
],
"shape": 3,
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "VideoTriangleCFGGuidance"
},
"widgets_values": [
1
],
"color": "#232",
"bgcolor": "#353"
}
],
"links": [
[
7,
3,
0,
8,
0,
"LATENT"
],
[
11,
10,
0,
3,
1,
"CONDITIONING"
],
[
12,
10,
1,
3,
2,
"CONDITIONING"
],
[
13,
10,
2,
3,
3,
"LATENT"
],
[
14,
11,
0,
3,
0,
"MODEL"
],
[
16,
14,
0,
10,
2,
"VAE"
],
[
17,
14,
0,
8,
1,
"VAE"
],
[
18,
15,
0,
10,
0,
"CLIP_VISION"
],
[
19,
4,
0,
11,
0,
"MODEL"
],
[
20,
16,
0,
10,
1,
"IMAGE"
],
[
21,
8,
0,
17,
0,
"IMAGE"
]
],
"groups": [],
"config": {},
"extra": {
"0246.VERSION": [
0,
0,
4
]
},
"version": 0.4
}