my-sd/modules_forge/diffusers_patcher.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

51 lines
1.8 KiB
Python
Raw Permalink Normal View History

2024-01-28 18:09:20 +00:00
import torch
import ldm_patched.modules.ops as ops
2024-01-28 18:19:19 +00:00
2024-01-28 18:09:20 +00:00
from ldm_patched.modules.model_patcher import ModelPatcher
from ldm_patched.modules import model_management
from transformers import modeling_utils
2024-01-28 18:19:19 +00:00
class DiffusersModelPatcher:
2024-01-28 18:09:20 +00:00
def __init__(self, pipeline_class, dtype=torch.float16, *args, **kwargs):
load_device = model_management.get_torch_device()
offload_device = torch.device("cpu")
if not model_management.should_use_fp16(device=load_device):
dtype = torch.float32
self.dtype = dtype
2024-02-09 04:02:33 +00:00
with ops.use_patched_ops(ops.manual_cast):
2024-01-28 18:09:20 +00:00
with modeling_utils.no_init_weights():
2024-01-28 18:13:57 +00:00
self.pipeline = pipeline_class.from_pretrained(*args, **kwargs)
2024-01-28 18:09:20 +00:00
2024-01-28 18:19:19 +00:00
if hasattr(self.pipeline, 'unet'):
2024-01-28 18:23:29 +00:00
if hasattr(self.pipeline.unet, 'set_attn_processor'):
from diffusers.models.attention_processor import AttnProcessor2_0
2024-01-28 18:23:29 +00:00
self.pipeline.unet.set_attn_processor(AttnProcessor2_0())
print('Attention optimization applied to DiffusersModelPatcher')
2024-01-28 18:19:19 +00:00
2024-02-02 08:14:37 +00:00
self.pipeline = self.pipeline.to(device=offload_device)
if self.dtype == torch.float16:
self.pipeline = self.pipeline.half()
2024-01-28 18:09:20 +00:00
self.pipeline.eval()
self.patcher = ModelPatcher(
model=self.pipeline,
load_device=load_device,
offload_device=offload_device)
def prepare_memory_before_sampling(self, batchsize, latent_width, latent_height):
area = 2 * batchsize * latent_width * latent_height
inference_memory = (((area * 0.6) / 0.9) + 1024) * (1024 * 1024)
model_management.load_models_gpu(
2024-02-02 08:32:15 +00:00
models=[self.patcher],
2024-01-28 18:09:20 +00:00
memory_required=inference_memory
2024-01-28 18:14:08 +00:00
)
2024-02-02 08:34:18 +00:00
def move_tensor_to_current_device(self, x):
return x.to(device=self.patcher.current_device, dtype=self.dtype)