consistencydecoder How to use this VAE on LCM

Thanks to open your work. Btw, I ran the code:

from diffusers import DiffusionPipeline,StableDiffusionPipeline
import torch
from consistencydecoder import ConsistencyDecoder, save_image, load_image

pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", custom_pipeline="latent_consistency_txt2img", custom_revision="main", revision="fb9c5d")

decoder_consistency = ConsistencyDecoder(device="cuda:0") # Model size: 2.49 GB

pipe.to(torch_device="cuda", torch_dtype=torch.float32)

prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"

# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
num_inference_steps = 4

latent = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, output_type="latent").images[0]

latent=latent.unsqueeze(0).to("cuda:0")

sample_consistency = decoder_consistency(latent)
save_image(sample_consistency, "con.png")

I got the image:

con

What is wrong?

Nov 07 '23 05:11 alfredplpl

Is this right?

from diffusers import DiffusionPipeline,StableDiffusionPipeline
import torch
from consistencydecoder import ConsistencyDecoder
from PIL import Image
import numpy as np

pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", torch_dtype=torch.float32)

# To save GPU memory, torch.float16 can be used, but it may compromise image quality.
pipe.to(torch_device="cuda", torch_dtype=torch.float32)

prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"

# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
num_inference_steps = 4

decoder_consistency = ConsistencyDecoder(device="cuda:0") # Model size: 2.49 GB

# To save GPU memory, torch.float16 can be used, but it may compromise image quality.
pipe.to(torch_device="cuda", torch_dtype=torch.float32)

prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"

# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
num_inference_steps = 8

latent = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, output_type="latent")

latent=latent.images[0]/0.18215
latent=latent.unsqueeze(0)
print(latent.size())
with torch.no_grad():
    consistent_latent = decoder_consistency(latent,schedule=[1.0])

image = consistent_latent[0].cpu().numpy()
image = (image + 1.0) * 127.5
image = image.clip(0, 255).astype(np.uint8)
image = Image.fromarray(image.transpose(1, 2, 0))

image.save("con.png")

con

Nov 07 '23 07:11 alfredplpl

Is this right?

from diffusers import DiffusionPipeline,StableDiffusionPipeline
import torch
from consistencydecoder import ConsistencyDecoder
from PIL import Image
import numpy as np

pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", torch_dtype=torch.float32)

# To save GPU memory, torch.float16 can be used, but it may compromise image quality.
pipe.to(torch_device="cuda", torch_dtype=torch.float32)

prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"

# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
num_inference_steps = 4

decoder_consistency = ConsistencyDecoder(device="cuda:0") # Model size: 2.49 GB

# To save GPU memory, torch.float16 can be used, but it may compromise image quality.
pipe.to(torch_device="cuda", torch_dtype=torch.float32)

prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"

# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
num_inference_steps = 8

latent = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, output_type="latent")

latent=latent.images[0]/0.18215
latent=latent.unsqueeze(0)
print(latent.size())
with torch.no_grad():
    consistent_latent = decoder_consistency(latent,schedule=[1.0])

image = consistent_latent[0].cpu().numpy()
image = (image + 1.0) * 127.5
image = image.clip(0, 255).astype(np.uint8)
image = Image.fromarray(image.transpose(1, 2, 0))

image.save("con.png")

con

Not entirely. You're using the prompt twice! I edited the code and ran this locally and it worked.

from diffusers import DiffusionPipeline,StableDiffusionPipeline
import torch
from consistencydecoder import ConsistencyDecoder
from PIL import Image
import numpy as np

pipe = DiffusionPipeline.from_pretrained("SG161222/Realistic_Vision_V5.1_noVAE", torch_dtype=torch.float32)
pipe.to(torch_device="cuda", torch_dtype=torch.float32) # To save GPU memory, torch.float16 can be used, but it may compromise image quality.

prompt = "masterpiece, best quality, realistic photo of a cat jumping after a string, backlit, 8k"
num_inference_steps = 8 # 1~50 steps, recommended 1~8 steps. LCM support fast inference even <= 4 steps.

decoder_consistency = ConsistencyDecoder(device="cuda:0") # Model size: 2.49 GB

latent = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, output_type="latent")

latent=latent.images[0]/0.18215
latent=latent.unsqueeze(0)
print(latent.size())
with torch.no_grad():
    consistent_latent = decoder_consistency(latent,schedule=[1.0])

image = consistent_latent[0].cpu().numpy()
image = (image + 1.0) * 127.5
image = image.clip(0, 255).astype(np.uint8)
image = Image.fromarray(image.transpose(1, 2, 0))

image.save("diffused.png")

Nov 07 '23 08:11 Kallamamran

Thanks.

Nov 07 '23 10:11 alfredplpl

Any observed quality increase/performance gain when using consistency-decoder instead of LCM's decoder (which is just SD 1.5 VAE decoder)?

Nov 08 '23 09:11 0xbitches

It seems that the text becomes clearer, and the edges become more distinct.

Original Decoder: fans_org

Consistency-Decoder: fans

It seems that the man's wrinkles are very pronounced.

Nov 08 '23 13:11 alfredplpl