2024-01-25 14:11:24 +00:00
|
|
|
from modules.sd_hijack_clip import FrozenCLIPEmbedderWithCustomWords
|
2024-01-25 14:53:58 +00:00
|
|
|
from ldm_patched.modules import model_management
|
2024-01-25 18:48:00 +00:00
|
|
|
from modules.shared import opts
|
2024-01-25 14:11:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
class CLIP_SD_15_L(FrozenCLIPEmbedderWithCustomWords):
|
2024-01-25 14:51:38 +00:00
|
|
|
def encode_with_transformers(self, tokens):
|
2024-01-25 20:19:50 +00:00
|
|
|
model_management.load_model_gpu(self.forge_objects.clip.patcher)
|
2024-01-25 18:48:00 +00:00
|
|
|
outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers)
|
|
|
|
|
|
|
|
if opts.CLIP_stop_at_last_layers > 1:
|
|
|
|
z = outputs.hidden_states[-opts.CLIP_stop_at_last_layers]
|
|
|
|
z = self.wrapped.transformer.text_model.final_layer_norm(z)
|
|
|
|
else:
|
|
|
|
z = outputs.last_hidden_state
|
|
|
|
|
|
|
|
return z
|
2024-01-25 14:11:24 +00:00
|
|
|
|
|
|
|
|
2024-01-25 15:33:55 +00:00
|
|
|
class CLIP_SD_21_H(FrozenCLIPEmbedderWithCustomWords):
|
2024-01-25 14:18:36 +00:00
|
|
|
def __init__(self, wrapped, hijack):
|
|
|
|
super().__init__(wrapped, hijack)
|
|
|
|
|
|
|
|
if self.wrapped.layer == "penultimate":
|
|
|
|
self.wrapped.layer = "hidden"
|
|
|
|
self.wrapped.layer_idx = -2
|
2024-01-25 14:11:24 +00:00
|
|
|
|
2024-01-25 18:38:36 +00:00
|
|
|
self.id_start = 49406
|
|
|
|
self.id_end = 49407
|
|
|
|
self.id_pad = 0
|
|
|
|
|
2024-01-25 14:51:38 +00:00
|
|
|
def encode_with_transformers(self, tokens):
|
2024-01-25 20:19:50 +00:00
|
|
|
model_management.load_model_gpu(self.forge_objects.clip.patcher)
|
2024-01-25 18:46:35 +00:00
|
|
|
outputs = self.wrapped.transformer(tokens, output_hidden_states=self.wrapped.layer == "hidden")
|
|
|
|
|
|
|
|
if self.wrapped.layer == "last":
|
|
|
|
z = outputs.last_hidden_state
|
|
|
|
else:
|
|
|
|
z = outputs.hidden_states[self.wrapped.layer_idx]
|
|
|
|
z = self.wrapped.transformer.text_model.final_layer_norm(z)
|
|
|
|
|
|
|
|
return z
|
2024-01-25 14:51:38 +00:00
|
|
|
|
2024-01-25 14:11:24 +00:00
|
|
|
|
|
|
|
class CLIP_SD_XL_L(FrozenCLIPEmbedderWithCustomWords):
|
|
|
|
def __init__(self, wrapped, hijack):
|
|
|
|
super().__init__(wrapped, hijack)
|
|
|
|
|
|
|
|
def encode_with_transformers(self, tokens):
|
2024-01-25 17:03:31 +00:00
|
|
|
outputs = self.wrapped.transformer(tokens, output_hidden_states=self.wrapped.layer == "hidden")
|
2024-01-25 14:11:24 +00:00
|
|
|
|
|
|
|
if self.wrapped.layer == "last":
|
|
|
|
z = outputs.last_hidden_state
|
|
|
|
else:
|
|
|
|
z = outputs.hidden_states[self.wrapped.layer_idx]
|
|
|
|
|
|
|
|
return z
|
|
|
|
|
|
|
|
|
|
|
|
class CLIP_SD_XL_G(FrozenCLIPEmbedderWithCustomWords):
|
|
|
|
def __init__(self, wrapped, hijack):
|
|
|
|
super().__init__(wrapped, hijack)
|
|
|
|
|
2024-01-25 14:16:31 +00:00
|
|
|
if self.wrapped.layer == "penultimate":
|
|
|
|
self.wrapped.layer = "hidden"
|
|
|
|
self.wrapped.layer_idx = -2
|
|
|
|
|
2024-01-25 18:27:53 +00:00
|
|
|
self.id_start = 49406
|
|
|
|
self.id_end = 49407
|
|
|
|
self.id_pad = 0
|
|
|
|
|
2024-01-25 14:11:24 +00:00
|
|
|
def encode_with_transformers(self, tokens):
|
2024-01-25 17:03:31 +00:00
|
|
|
outputs = self.wrapped.transformer(tokens, output_hidden_states=self.wrapped.layer == "hidden")
|
2024-01-25 14:11:24 +00:00
|
|
|
|
|
|
|
if self.wrapped.layer == "last":
|
|
|
|
z = outputs.last_hidden_state
|
|
|
|
else:
|
|
|
|
z = outputs.hidden_states[self.wrapped.layer_idx]
|
|
|
|
|
2024-01-25 17:36:41 +00:00
|
|
|
pooled_output = outputs.pooler_output
|
|
|
|
text_projection = self.wrapped.text_projection
|
|
|
|
pooled_output = pooled_output.float().to(text_projection.device) @ text_projection.float()
|
|
|
|
z.pooled = pooled_output
|
2024-01-25 14:11:24 +00:00
|
|
|
return z
|