how to

unit2-01_finetuning_and_guidance

Jun 10, 2025
notesjulyfun技术学习diffusion-models-class
1 Minutes
180 Words

Generating process:

1
x = torch.randn(4, 3, 256, 256).to(device)
2
for i, t in tqdm(enumerate(scheduler.timesteps)):
3
model_input = scheduler.scale_model_input(x, t)
4
with torch.no_grad():
5
noise_pred = image_pipe.unet(model_input, t)["sample"]
6
x = scheduler.step(noise_pred, t, sample=x).prev_sample

Guidance

1
x = torch.randn(4, 3, 256, 256).to(device)
2
for i, t in tqdm(enumerate(scheduler.timesteps)):
3
x = x.detach().requires_grad_()
4
model_input = scheduler.scale_model_input(x, t)
5
noise_pred = image_pipe.unet(model_input, t)["sample"]
6
7
x0 = scheduler.step(noise_pred, t, x).pred_original_sample
8
loss = <custom_loss>(x0) * <guidance_loss_scale>
9
cond_grad = -torch.autograd.grad(loss, x)[0]
10
x = x.detach() + cond_grad
11
12
x = scheduler.step(noise_pred, t, x).prev_sample

CLIP Guidance

1
with torch.no_grad():
2
text_features = clip_model.encode_text(text)
3
4
for i, t in tqdm(enumerate(scheduler.timesteps)):
5
# print(i, t) # (1, tensor(1000)), (2, tensor(980))...
6
model_input = scheduler.scale_model_input(x, t) # DDIM loaded
7
with torch.no_grad():
8
# image_pipe is loaded by the same name
9
noise_pred = image_pipe.unet(model_input, t)["sample"]
10
cond_grad = 0
11
for cut in range(n_cuts):
12
x = x.detach().requires_grad_()
13
x0 = scheduler.step(noise_pred,t, sample=x).pred_original_sample
14
loss = <clip_loss>(x0, text_features) * guidance_scale
15
cond_grad -= torch.autograd.grad(loss, x)[0] / n_cuts
9 collapsed lines
16
17
if i % 25 == 0:
18
print(f"Steps {i} loss: {loss.item()}")
19
20
alpha_bar = scheduler.alphas_cumprod[i]
21
# `alpha_bar` here is decreasing and works for textures.
22
# Can be changed to some increasing coefficients!
23
x = x.detach() + cond_grad * alpha_bar.sqrt()
24
x = scheduler.step(noise_pred, t, x).prev_sample
Article title:unit2-01_finetuning_and_guidance
Article author:Julyfun
Release time:Jun 10, 2025
Copyright 2025
Sitemap