How to use this VAE on LCM
Thanks to open your work. Btw, I ran the code:
from diffusers import DiffusionPipeline,StableDiffusionPipeline
import torch
from consistencydecoder import ConsistencyDecoder, save_image, load_image
pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", custom_pipeline="latent_consistency_txt2img", custom_revision="main", revision="fb9c5d")
decoder_consistency = ConsistencyDecoder(device="cuda:0") # Model size: 2.49 GB
pipe.to(torch_device="cuda", torch_dtype=torch.float32)
prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"
# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
num_inference_steps = 4
latent = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, output_type="latent").images[0]
latent=latent.unsqueeze(0).to("cuda:0")
sample_consistency = decoder_consistency(latent)
save_image(sample_consistency, "con.png")
I got the image:
What is wrong?
Is this right?
from diffusers import DiffusionPipeline,StableDiffusionPipeline
import torch
from consistencydecoder import ConsistencyDecoder
from PIL import Image
import numpy as np
pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", torch_dtype=torch.float32)
# To save GPU memory, torch.float16 can be used, but it may compromise image quality.
pipe.to(torch_device="cuda", torch_dtype=torch.float32)
prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"
# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
num_inference_steps = 4
decoder_consistency = ConsistencyDecoder(device="cuda:0") # Model size: 2.49 GB
# To save GPU memory, torch.float16 can be used, but it may compromise image quality.
pipe.to(torch_device="cuda", torch_dtype=torch.float32)
prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"
# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
num_inference_steps = 8
latent = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, output_type="latent")
latent=latent.images[0]/0.18215
latent=latent.unsqueeze(0)
print(latent.size())
with torch.no_grad():
consistent_latent = decoder_consistency(latent,schedule=[1.0])
image = consistent_latent[0].cpu().numpy()
image = (image + 1.0) * 127.5
image = image.clip(0, 255).astype(np.uint8)
image = Image.fromarray(image.transpose(1, 2, 0))
image.save("con.png")
Is this right?
from diffusers import DiffusionPipeline,StableDiffusionPipeline import torch from consistencydecoder import ConsistencyDecoder from PIL import Image import numpy as np pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", torch_dtype=torch.float32) # To save GPU memory, torch.float16 can be used, but it may compromise image quality. pipe.to(torch_device="cuda", torch_dtype=torch.float32) prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k" # Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps. num_inference_steps = 4 decoder_consistency = ConsistencyDecoder(device="cuda:0") # Model size: 2.49 GB # To save GPU memory, torch.float16 can be used, but it may compromise image quality. pipe.to(torch_device="cuda", torch_dtype=torch.float32) prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k" # Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps. num_inference_steps = 8 latent = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, output_type="latent") latent=latent.images[0]/0.18215 latent=latent.unsqueeze(0) print(latent.size()) with torch.no_grad(): consistent_latent = decoder_consistency(latent,schedule=[1.0]) image = consistent_latent[0].cpu().numpy() image = (image + 1.0) * 127.5 image = image.clip(0, 255).astype(np.uint8) image = Image.fromarray(image.transpose(1, 2, 0)) image.save("con.png")
Not entirely. You're using the prompt twice! I edited the code and ran this locally and it worked.
from diffusers import DiffusionPipeline,StableDiffusionPipeline
import torch
from consistencydecoder import ConsistencyDecoder
from PIL import Image
import numpy as np
pipe = DiffusionPipeline.from_pretrained("SG161222/Realistic_Vision_V5.1_noVAE", torch_dtype=torch.float32)
pipe.to(torch_device="cuda", torch_dtype=torch.float32) # To save GPU memory, torch.float16 can be used, but it may compromise image quality.
prompt = "masterpiece, best quality, realistic photo of a cat jumping after a string, backlit, 8k"
num_inference_steps = 8 # 1~50 steps, recommended 1~8 steps. LCM support fast inference even <= 4 steps.
decoder_consistency = ConsistencyDecoder(device="cuda:0") # Model size: 2.49 GB
latent = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, output_type="latent")
latent=latent.images[0]/0.18215
latent=latent.unsqueeze(0)
print(latent.size())
with torch.no_grad():
consistent_latent = decoder_consistency(latent,schedule=[1.0])
image = consistent_latent[0].cpu().numpy()
image = (image + 1.0) * 127.5
image = image.clip(0, 255).astype(np.uint8)
image = Image.fromarray(image.transpose(1, 2, 0))
image.save("diffused.png")
Thanks.
Any observed quality increase/performance gain when using consistency-decoder instead of LCM's decoder (which is just SD 1.5 VAE decoder)?
It seems that the text becomes clearer, and the edges become more distinct.
Original Decoder:
Consistency-Decoder:
It seems that the man's wrinkles are very pronounced.
