Release/3.1.0 (#4397)

## What type of PR is this? (check all applicable) This is the 3.1.0 release candidate. Minor bugfixes will be applied here during testing and then merged into main upon release.
invoke-ai · Aug 31, 2023 · a74e210 · a74e210
2 parents 2bd3cf2 + ca5689d
commit a74e210
Show file tree

Hide file tree

Showing 30 changed files with 794 additions and 784 deletions.
diff --git a/installer/create_installer.sh b/installer/create_installer.sh
@@ -46,6 +46,7 @@ if [[ $(python -c 'from importlib.util import find_spec; print(find_spec("build"
     pip install --user build
 fi
 
+rm -r ../build
 python -m build --wheel --outdir dist/ ../.
 
 # ----------------------

diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py
@@ -302,6 +302,29 @@ def invoke(self, context: InvocationContext) -> ConditioningOutput:
 
         add_time_ids = torch.tensor([original_size + crop_coords + target_size])
 
+        # [1, 77, 768], [1, 154, 1280]
+        if c1.shape[1] < c2.shape[1]:
+            c1 = torch.cat(
+                [
+                    c1,
+                    torch.zeros(
+                        (c1.shape[0], c2.shape[1] - c1.shape[1], c1.shape[2]), device=c1.device, dtype=c1.dtype
+                    ),
+                ],
+                dim=1,
+            )
+
+        elif c1.shape[1] > c2.shape[1]:
+            c2 = torch.cat(
+                [
+                    c2,
+                    torch.zeros(
+                        (c2.shape[0], c1.shape[1] - c2.shape[1], c2.shape[2]), device=c2.device, dtype=c2.dtype
+                    ),
+                ],
+                dim=1,
+            )
+
         conditioning_data = ConditioningFieldData(
             conditionings=[
                 SDXLConditioningInfo(

diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py
@@ -445,8 +445,14 @@ def invoke(self, context: InvocationContext) -> LatentsOutput:
                 latents = context.services.latents.get(self.latents.latents_name)
                 if seed is None:
                     seed = self.latents.seed
-            else:
+
+                if noise is not None and noise.shape[1:] != latents.shape[1:]:
+                    raise Exception(f"Incompatable 'noise' and 'latents' shapes: {latents.shape=} {noise.shape=}")
+
+            elif noise is not None:
                 latents = torch.zeros_like(noise)
+            else:
+                raise Exception("'latents' or 'noise' must be provided!")
 
             if seed is None:
                 seed = 0

diff --git a/invokeai/app/invocations/primitives.py b/invokeai/app/invocations/primitives.py
@@ -109,7 +109,7 @@ class IntegerCollectionInvocation(BaseInvocation):
     """A collection of integer primitive values"""
 
     collection: list[int] = InputField(
-        default=0, description="The collection of integer values", ui_type=UIType.IntegerCollection
+        default_factory=list, description="The collection of integer values", ui_type=UIType.IntegerCollection
     )
 
     def invoke(self, context: InvocationContext) -> IntegerCollectionOutput:
@@ -261,7 +261,7 @@ class ImageCollectionInvocation(BaseInvocation):
     """A collection of image primitive values"""
 
     collection: list[ImageField] = InputField(
-        default=0, description="The collection of image values", ui_type=UIType.ImageCollection
+        default_factory=list, description="The collection of image values", ui_type=UIType.ImageCollection
     )
 
     def invoke(self, context: InvocationContext) -> ImageCollectionOutput:
@@ -451,7 +451,9 @@ class ConditioningCollectionInvocation(BaseInvocation):
     """A collection of conditioning tensor primitive values"""
 
     collection: list[ConditioningField] = InputField(
-        default=0, description="The collection of conditioning tensors", ui_type=UIType.ConditioningCollection
+        default_factory=list,
+        description="The collection of conditioning tensors",
+        ui_type=UIType.ConditioningCollection,
     )
 
     def invoke(self, context: InvocationContext) -> ConditioningCollectionOutput:

diff --git a/invokeai/app/services/config/__init__.py b/invokeai/app/services/config/__init__.py
@@ -6,3 +6,4 @@
     InvokeAIAppConfig,
     get_invokeai_config,
 )
+from .base import PagingArgumentParser  # noqa F401
diff --git a/invokeai/backend/install/migrate_to_3.py b/invokeai/backend/install/migrate_to_3.py
@@ -492,10 +492,10 @@ def _parse_legacy_yamlfile(root: Path, initfile: Path) -> ModelPaths:
     loras = paths.get("lora_dir", "loras")
     controlnets = paths.get("controlnet_dir", "controlnets")
     return ModelPaths(
-        models=root / models,
-        embeddings=root / embeddings,
-        loras=root / loras,
-        controlnets=root / controlnets,
+        models=root / models if models else None,
+        embeddings=root / embeddings if embeddings else None,
+        loras=root / loras if loras else None,
+        controlnets=root / controlnets if controlnets else None,
     )
 
 

diff --git a/invokeai/backend/stable_diffusion/diffusion/cross_attention_control.py b/invokeai/backend/stable_diffusion/diffusion/cross_attention_control.py
@@ -265,7 +265,7 @@ def einsum_op_mps_v1(self, q, k, v):
         if q.shape[1] <= 4096:  # (512x512) max q.shape[1]: 4096
             return self.einsum_lowest_level(q, k, v, None, None, None)
         else:
-            slice_size = math.floor(2**30 / (q.shape[0] * q.shape[1]))
+            slice_size = math.floor(2 ** 30 / (q.shape[0] * q.shape[1]))
             return self.einsum_op_slice_dim1(q, k, v, slice_size)
 
     def einsum_op_mps_v2(self, q, k, v):

diff --git a/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py b/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py
@@ -215,10 +215,7 @@ def do_controlnet_step(
                                 dim=0,
                             ),
                         }
-                    (
-                        encoder_hidden_states,
-                        encoder_attention_mask,
-                    ) = self._concat_conditionings_for_batch(
+                    (encoder_hidden_states, encoder_attention_mask,) = self._concat_conditionings_for_batch(
                         conditioning_data.unconditioned_embeddings.embeds,
                         conditioning_data.text_embeddings.embeds,
                     )
@@ -280,32 +277,23 @@ def do_unet_step(
         wants_cross_attention_control = len(cross_attention_control_types_to_do) > 0
 
         if wants_cross_attention_control:
-            (
-                unconditioned_next_x,
-                conditioned_next_x,
-            ) = self._apply_cross_attention_controlled_conditioning(
+            (unconditioned_next_x, conditioned_next_x,) = self._apply_cross_attention_controlled_conditioning(
                 sample,
                 timestep,
                 conditioning_data,
                 cross_attention_control_types_to_do,
                 **kwargs,
             )
         elif self.sequential_guidance:
-            (
-                unconditioned_next_x,
-                conditioned_next_x,
-            ) = self._apply_standard_conditioning_sequentially(
+            (unconditioned_next_x, conditioned_next_x,) = self._apply_standard_conditioning_sequentially(
                 sample,
                 timestep,
                 conditioning_data,
                 **kwargs,
             )
 
         else:
-            (
-                unconditioned_next_x,
-                conditioned_next_x,
-            ) = self._apply_standard_conditioning(
+            (unconditioned_next_x, conditioned_next_x,) = self._apply_standard_conditioning(
                 sample,
                 timestep,
                 conditioning_data,

diff --git a/invokeai/backend/stable_diffusion/image_degradation/bsrgan.py b/invokeai/backend/stable_diffusion/image_degradation/bsrgan.py
@@ -395,7 +395,7 @@ def add_Gaussian_noise(img, noise_level1=2, noise_level2=25):
         D = np.diag(np.random.rand(3))
         U = orth(np.random.rand(3, 3))
         conv = np.dot(np.dot(np.transpose(U), D), U)
-        img = img + np.random.multivariate_normal([0, 0, 0], np.abs(L**2 * conv), img.shape[:2]).astype(np.float32)
+        img = img + np.random.multivariate_normal([0, 0, 0], np.abs(L ** 2 * conv), img.shape[:2]).astype(np.float32)
     img = np.clip(img, 0.0, 1.0)
     return img
 
@@ -413,7 +413,7 @@ def add_speckle_noise(img, noise_level1=2, noise_level2=25):
         D = np.diag(np.random.rand(3))
         U = orth(np.random.rand(3, 3))
         conv = np.dot(np.dot(np.transpose(U), D), U)
-        img += img * np.random.multivariate_normal([0, 0, 0], np.abs(L**2 * conv), img.shape[:2]).astype(np.float32)
+        img += img * np.random.multivariate_normal([0, 0, 0], np.abs(L ** 2 * conv), img.shape[:2]).astype(np.float32)
     img = np.clip(img, 0.0, 1.0)
     return img
 

diff --git a/invokeai/backend/stable_diffusion/image_degradation/bsrgan_light.py b/invokeai/backend/stable_diffusion/image_degradation/bsrgan_light.py
@@ -399,7 +399,7 @@ def add_Gaussian_noise(img, noise_level1=2, noise_level2=25):
         D = np.diag(np.random.rand(3))
         U = orth(np.random.rand(3, 3))
         conv = np.dot(np.dot(np.transpose(U), D), U)
-        img = img + np.random.multivariate_normal([0, 0, 0], np.abs(L**2 * conv), img.shape[:2]).astype(np.float32)
+        img = img + np.random.multivariate_normal([0, 0, 0], np.abs(L ** 2 * conv), img.shape[:2]).astype(np.float32)
     img = np.clip(img, 0.0, 1.0)
     return img
 
@@ -417,7 +417,7 @@ def add_speckle_noise(img, noise_level1=2, noise_level2=25):
         D = np.diag(np.random.rand(3))
         U = orth(np.random.rand(3, 3))
         conv = np.dot(np.dot(np.transpose(U), D), U)
-        img += img * np.random.multivariate_normal([0, 0, 0], np.abs(L**2 * conv), img.shape[:2]).astype(np.float32)
+        img += img * np.random.multivariate_normal([0, 0, 0], np.abs(L ** 2 * conv), img.shape[:2]).astype(np.float32)
     img = np.clip(img, 0.0, 1.0)
     return img
 

diff --git a/invokeai/backend/stable_diffusion/image_degradation/utils_image.py b/invokeai/backend/stable_diffusion/image_degradation/utils_image.py
@@ -562,14 +562,18 @@ def rgb2ycbcr(img, only_y=True):
     if only_y:
         rlt = np.dot(img, [65.481, 128.553, 24.966]) / 255.0 + 16.0
     else:
-        rlt = np.matmul(
-            img,
-            [
-                [65.481, -37.797, 112.0],
-                [128.553, -74.203, -93.786],
-                [24.966, 112.0, -18.214],
-            ],
-        ) / 255.0 + [16, 128, 128]
+        rlt = (
+            np.matmul(
+                img,
+                [
+                    [65.481, -37.797, 112.0],
+                    [128.553, -74.203, -93.786],
+                    [24.966, 112.0, -18.214],
+                ],
+            )
+            / 255.0
+            + [16, 128, 128]
+        )
     if in_img_type == np.uint8:
         rlt = rlt.round()
     else:
@@ -588,14 +592,18 @@ def ycbcr2rgb(img):
     if in_img_type != np.uint8:
         img *= 255.0
     # convert
-    rlt = np.matmul(
-        img,
-        [
-            [0.00456621, 0.00456621, 0.00456621],
-            [0, -0.00153632, 0.00791071],
-            [0.00625893, -0.00318811, 0],
-        ],
-    ) * 255.0 + [-222.921, 135.576, -276.836]
+    rlt = (
+        np.matmul(
+            img,
+            [
+                [0.00456621, 0.00456621, 0.00456621],
+                [0, -0.00153632, 0.00791071],
+                [0.00625893, -0.00318811, 0],
+            ],
+        )
+        * 255.0
+        + [-222.921, 135.576, -276.836]
+    )
     if in_img_type == np.uint8:
         rlt = rlt.round()
     else:
@@ -618,14 +626,18 @@ def bgr2ycbcr(img, only_y=True):
     if only_y:
         rlt = np.dot(img, [24.966, 128.553, 65.481]) / 255.0 + 16.0
     else:
-        rlt = np.matmul(
-            img,
-            [
-                [24.966, 112.0, -18.214],
-                [128.553, -74.203, -93.786],
-                [65.481, -37.797, 112.0],
-            ],
-        ) / 255.0 + [16, 128, 128]
+        rlt = (
+            np.matmul(
+                img,
+                [
+                    [24.966, 112.0, -18.214],
+                    [128.553, -74.203, -93.786],
+                    [65.481, -37.797, 112.0],
+                ],
+            )
+            / 255.0
+            + [16, 128, 128]
+        )
     if in_img_type == np.uint8:
         rlt = rlt.round()
     else:
@@ -716,11 +728,11 @@ def ssim(img1, img2):
 
     mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]  # valid
     mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
-    mu1_sq = mu1**2
-    mu2_sq = mu2**2
+    mu1_sq = mu1 ** 2
+    mu2_sq = mu2 ** 2
     mu1_mu2 = mu1 * mu2
-    sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
-    sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
+    sigma1_sq = cv2.filter2D(img1 ** 2, -1, window)[5:-5, 5:-5] - mu1_sq
+    sigma2_sq = cv2.filter2D(img2 ** 2, -1, window)[5:-5, 5:-5] - mu2_sq
     sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
 
     ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
@@ -737,8 +749,8 @@ def ssim(img1, img2):
 # matlab 'imresize' function, now only support 'bicubic'
 def cubic(x):
     absx = torch.abs(x)
-    absx2 = absx**2
-    absx3 = absx**3
+    absx2 = absx ** 2
+    absx3 = absx ** 3
     return (1.5 * absx3 - 2.5 * absx2 + 1) * ((absx <= 1).type_as(absx)) + (
         -0.5 * absx3 + 2.5 * absx2 - 4 * absx + 2
     ) * (((absx > 1) * (absx <= 2)).type_as(absx))

diff --git a/invokeai/backend/training/textual_inversion_training.py b/invokeai/backend/training/textual_inversion_training.py
@@ -475,10 +475,7 @@ def __getitem__(self, i):
 
         if self.center_crop:
             crop = min(img.shape[0], img.shape[1])
-            (
-                h,
-                w,
-            ) = (
+            (h, w,) = (
                 img.shape[0],
                 img.shape[1],
             )

diff --git a/invokeai/backend/util/mps_fixes.py b/invokeai/backend/util/mps_fixes.py
@@ -203,7 +203,7 @@ def get_attention_scores_chunked(self, attn, query, key, attention_mask, hidden_
         if attn.upcast_attention:
             out_item_size = 4
 
-        chunk_size = 2**29
+        chunk_size = 2 ** 29
 
         out_size = query.shape[1] * key.shape[1] * out_item_size
         chunks_count = min(query.shape[1], math.ceil((out_size - 1) / chunk_size))

diff --git a/invokeai/backend/util/util.py b/invokeai/backend/util/util.py
@@ -207,7 +207,7 @@ def parallel_data_prefetch(
         return gather_res
 
 
-def rand_perlin_2d(shape, res, device, fade=lambda t: 6 * t**5 - 15 * t**4 + 10 * t**3):
+def rand_perlin_2d(shape, res, device, fade=lambda t: 6 * t ** 5 - 15 * t ** 4 + 10 * t ** 3):
     delta = (res[0] / shape[0], res[1] / shape[1])
     d = (shape[0] // res[0], shape[1] // res[1])
 

diff --git a/invokeai/configs/INITIAL_MODELS.yaml b/invokeai/configs/INITIAL_MODELS.yaml
@@ -4,34 +4,34 @@ sd-1/main/stable-diffusion-v1-5:
    repo_id: runwayml/stable-diffusion-v1-5
    recommended: True
    default: True
-sd-1/main/stable-diffusion-inpainting:
+sd-1/main/stable-diffusion-v1-5-inpainting:
    description: RunwayML SD 1.5 model optimized for inpainting, diffusers version (4.27 GB)
    repo_id: runwayml/stable-diffusion-inpainting
    recommended: True
 sd-2/main/stable-diffusion-2-1:
    description: Stable Diffusion version 2.1 diffusers model, trained on 768 pixel images (5.21 GB)
    repo_id: stabilityai/stable-diffusion-2-1
-   recommended: True
+   recommended: False
 sd-2/main/stable-diffusion-2-inpainting:
    description: Stable Diffusion version 2.0 inpainting model (5.21 GB)
    repo_id: stabilityai/stable-diffusion-2-inpainting
    recommended: False
 sdxl/main/stable-diffusion-xl-base-1-0:
    description: Stable Diffusion XL base model (12 GB)
    repo_id: stabilityai/stable-diffusion-xl-base-1.0
-   recommended: False
+   recommended: True
 sdxl-refiner/main/stable-diffusion-xl-refiner-1-0:
    description: Stable Diffusion XL refiner model (12 GB)
    repo_id: stabilityai/stable-diffusion-xl-refiner-1.0
-   recommended: false
+   recommended: False
 sdxl/vae/sdxl-1-0-vae-fix:
    description: Fine tuned version of the SDXL-1.0 VAE
    repo_id: madebyollin/sdxl-vae-fp16-fix
-   recommended: true
+   recommended: True
 sd-1/main/Analog-Diffusion:
    description: An SD-1.5 model trained on diverse analog photographs (2.13 GB)
    repo_id: wavymulder/Analog-Diffusion
-   recommended: false
+   recommended: False
 sd-1/main/Deliberate:
    description: Versatile model that produces detailed images up to 768px (4.27 GB)
    repo_id: XpucT/Deliberate