From 85fcccc105aa50f1d78de559233eaa9f384608b5 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Wed, 26 Oct 2022 22:24:33 +0900 Subject: [PATCH 1/3] Squashed commit of fixing dropout silently fix dropouts for future hypernetworks add kwargs for Hypernetwork class hypernet UI for gradio input add recommended options remove as options revert adding options in ui --- modules/hypernetworks/hypernetwork.py | 25 +++++++++++++++++-------- modules/ui.py | 4 ++-- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 2997cead..dd921153 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -34,7 +34,8 @@ class HypernetworkModule(torch.nn.Module): } activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) - def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', add_layer_norm=False, use_dropout=False, activate_output=False): + def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', + add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs): super().__init__() assert layer_structure is not None, "layer_structure must not be None" @@ -60,7 +61,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) # Add dropout except last layer - if use_dropout and i < len(layer_structure) - 3: + if 'last_layer_dropout' in kwargs and kwargs['last_layer_dropout'] and use_dropout and i < len(layer_structure) - 2: linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) @@ -126,7 +127,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs): self.filename = None self.name = name self.layers = {} @@ -139,11 +140,14 @@ class Hypernetwork: self.add_layer_norm = add_layer_norm self.use_dropout = use_dropout self.activate_output = activate_output + self.last_layer_dropout = kwargs['last_layer_dropout'] if 'last_layer_dropout' in kwargs else True for size in enable_sizes or []: self.layers[size] = ( - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), ) def weights(self): @@ -172,7 +176,8 @@ class Hypernetwork: state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name state_dict['activate_output'] = self.activate_output - + state_dict['last_layer_dropout'] = self.last_layer_dropout + torch.save(state_dict, filename) def load(self, filename): @@ -193,12 +198,16 @@ class Hypernetwork: self.use_dropout = state_dict.get('use_dropout', False) print(f"Dropout usage is set to {self.use_dropout}" ) self.activate_output = state_dict.get('activate_output', True) + print(f"Activate last layer is set to {self.activate_output}") + self.last_layer_dropout = state_dict.get('last_layer_dropout', False) for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), - HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), + HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), + HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), ) self.name = state_dict.get('name', self.name) diff --git a/modules/ui.py b/modules/ui.py index 0a63e357..55cbe859 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1238,8 +1238,8 @@ def create_ui(wrap_gradio_gpu_call): new_hypernetwork_name = gr.Textbox(label="Name") new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"]) new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure", placeholder="1st and last digit must be 1. ex:'1, 2, 1'") - new_hypernetwork_activation_func = gr.Dropdown(value="relu", label="Select activation function of hypernetwork", choices=modules.hypernetworks.ui.keys) - new_hypernetwork_initialization_option = gr.Dropdown(value = "Normal", label="Select Layer weights initialization. relu-like - Kaiming, sigmoid-like - Xavier is recommended", choices=["Normal", "KaimingUniform", "KaimingNormal", "XavierUniform", "XavierNormal"]) + new_hypernetwork_activation_func = gr.Dropdown(value="relu", label="Select activation function of hypernetwork. Recommended : Swish / Linear(none)", choices=modules.hypernetworks.ui.keys) + new_hypernetwork_initialization_option = gr.Dropdown(value = "Normal", label="Select Layer weights initialization. Normal is default, for experiments, relu-like - Kaiming, sigmoid-like - Xavier is recommended", choices=["Normal", "KaimingUniform", "KaimingNormal", "XavierUniform", "XavierNormal"]) new_hypernetwork_add_layer_norm = gr.Checkbox(label="Add layer normalization") new_hypernetwork_use_dropout = gr.Checkbox(label="Use dropout") overwrite_old_hypernetwork = gr.Checkbox(value=False, label="Overwrite Old Hypernetwork") From cc56df996e95c2c82295ab7b9928da2544791220 Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 23:51:51 +0800 Subject: [PATCH 2/3] Fix dropout logic --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index dd921153..b17598fe 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -35,7 +35,7 @@ class HypernetworkModule(torch.nn.Module): activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', - add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs): + add_layer_norm=False, use_dropout=False, activate_output=False, last_layer_dropout=True): super().__init__() assert layer_structure is not None, "layer_structure must not be None" @@ -61,7 +61,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) # Add dropout except last layer - if 'last_layer_dropout' in kwargs and kwargs['last_layer_dropout'] and use_dropout and i < len(layer_structure) - 2: + if use_dropout and (i < len(layer_structure) - 3 or last_layer_dropout and i < len(layer_structure) - 2): linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) From 029d7c75436558f1e884bb127caed73caaecb83a Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Thu, 27 Oct 2022 14:44:53 +0900 Subject: [PATCH 3/3] Revert unresolved changes in Bias initialization it should be zeros_ or parameterized in future properly. --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index b17598fe..25427a37 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -75,7 +75,7 @@ class HypernetworkModule(torch.nn.Module): w, b = layer.weight.data, layer.bias.data if weight_init == "Normal" or type(layer) == torch.nn.LayerNorm: normal_(w, mean=0.0, std=0.01) - normal_(b, mean=0.0, std=0.005) + normal_(b, mean=0.0, std=0) elif weight_init == 'XavierUniform': xavier_uniform_(w) zeros_(b)