This commit is contained in:
YaoFANGUK
2023-10-25 16:38:16 +08:00
commit 2b9360c299
602 changed files with 152490 additions and 0 deletions

View File

@@ -0,0 +1 @@
from .utils import *

View File

@@ -0,0 +1,101 @@
import cv2
import numpy as np
from typing import Tuple
def resize_and_pad(image: np.ndarray, mask: np.ndarray, target_size: int = 512) -> Tuple[np.ndarray, np.ndarray]:
"""
Resizes an image and its corresponding mask to have the longer side equal to `target_size` and pads them to make them
both have the same size. The resulting image and mask have dimensions (target_size, target_size).
Args:
image: A numpy array representing the image to resize and pad.
mask: A numpy array representing the mask to resize and pad.
target_size: An integer specifying the desired size of the longer side after resizing.
Returns:
A tuple containing two numpy arrays - the resized and padded image and the resized and padded mask.
"""
height, width, _ = image.shape
max_dim = max(height, width)
scale = target_size / max_dim
new_height = int(height * scale)
new_width = int(width * scale)
image_resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
mask_resized = cv2.resize(mask, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
pad_height = target_size - new_height
pad_width = target_size - new_width
top_pad = pad_height // 2
bottom_pad = pad_height - top_pad
left_pad = pad_width // 2
right_pad = pad_width - left_pad
image_padded = np.pad(image_resized, ((top_pad, bottom_pad), (left_pad, right_pad), (0, 0)), mode='constant')
mask_padded = np.pad(mask_resized, ((top_pad, bottom_pad), (left_pad, right_pad)), mode='constant')
return image_padded, mask_padded, (top_pad, bottom_pad, left_pad, right_pad)
def recover_size(image_padded: np.ndarray, mask_padded: np.ndarray, orig_size: Tuple[int, int],
padding_factors: Tuple[int, int, int, int]) -> Tuple[np.ndarray, np.ndarray]:
"""
Resizes a padded and resized image and mask to the original size.
Args:
image_padded: A numpy array representing the padded and resized image.
mask_padded: A numpy array representing the padded and resized mask.
orig_size: A tuple containing two integers - the original height and width of the image before resizing and padding.
Returns:
A tuple containing two numpy arrays - the recovered image and the recovered mask with dimensions `orig_size`.
"""
h,w,c = image_padded.shape
top_pad, bottom_pad, left_pad, right_pad = padding_factors
image = image_padded[top_pad:h-bottom_pad, left_pad:w-right_pad, :]
mask = mask_padded[top_pad:h-bottom_pad, left_pad:w-right_pad]
image_resized = cv2.resize(image, orig_size[::-1], interpolation=cv2.INTER_LINEAR)
mask_resized = cv2.resize(mask, orig_size[::-1], interpolation=cv2.INTER_LINEAR)
return image_resized, mask_resized
if __name__ == '__main__':
# image = cv2.imread('example/boat.jpg')
# mask = cv2.imread('example/boat_mask_2.png', cv2.IMREAD_GRAYSCALE)
# image = cv2.imread('example/groceries.jpg')
# mask = cv2.imread('example/groceries_mask_2.png', cv2.IMREAD_GRAYSCALE)
# image = cv2.imread('example/bridge.jpg')
# mask = cv2.imread('example/bridge_mask_2.png', cv2.IMREAD_GRAYSCALE)
# image = cv2.imread('example/person_umbrella.jpg')
# mask = cv2.imread('example/person_umbrella_mask_2.png', cv2.IMREAD_GRAYSCALE)
# image = cv2.imread('example/hippopotamus.jpg')
# mask = cv2.imread('example/hippopotamus_mask_1.png', cv2.IMREAD_GRAYSCALE)
image = cv2.imread('/data1/yutao/projects/IAM/Inpaint-Anything/example/fill-anything/sample5.jpeg')
mask = cv2.imread('/data1/yutao/projects/IAM/Inpaint-Anything/example/fill-anything/sample5/mask.png', cv2.IMREAD_GRAYSCALE)
print(image.shape)
print(mask.shape)
cv2.imwrite('original_image.jpg', image)
cv2.imwrite('original_mask.jpg', mask)
image_padded, mask_padded, padding_factors = resize_and_pad(image, mask)
cv2.imwrite('padded_image.png', image_padded)
cv2.imwrite('padded_mask.png', mask_padded)
print(image_padded.shape, mask_padded.shape, padding_factors)
# ^ ------------------------------------------------------------------------------------
# ^ Please conduct inpainting or filling here on the cropped image with the cropped mask
# ^ ------------------------------------------------------------------------------------
# resize and pad the image and mask
# perform some operation on the 512x512 image and mask
# ...
# recover the image and mask to the original size
height, width, _ = image.shape
image_resized, mask_resized = recover_size(image_padded, mask_padded, (height, width), padding_factors)
# save the resized and recovered image and mask
cv2.imwrite('resized_and_padded_image.png', image_padded)
cv2.imwrite('resized_and_padded_mask.png', mask_padded)
cv2.imwrite('recovered_image.png', image_resized)
cv2.imwrite('recovered_mask.png', mask_resized)

View File

@@ -0,0 +1,35 @@
import cv2
import imageio
import os
import shutil
from tqdm import tqdm
from glob import glob
from os import path as osp
def frames2video(frames_list, video_path, fps=30, remove_tmp=False):
# frames_list: frames dir or list of images.
if isinstance(frames_list, str):
frames_list = glob(f'{frames_list}/*.jpg')
video_dir = os.path.dirname(video_path)
if not os.path.exists(video_dir):
os.makedirs(video_dir)
# writer = imageio.get_writer(video_path, fps=fps)
writer = imageio.get_writer(video_dir, fps=fps, plugin='ffmpeg')
for frame in tqdm(frames_list, 'Export video'):
if isinstance(frame, str):
frame = imageio.imread(frame)
else:
# convert cv2 (rgb) to PIL
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = imageio.core.util.Array(frame)
writer.append_data(frame)
writer.close()
print(f'find video at {video_path}.')
if remove_tmp and isinstance(frames_list, str):
shutil.rmtree(frames_list)
if __name__ == '__main__':
video_path = './demo/soccerball/original_video.mp4'
frame_path = '/data0/datasets/davis/JPEGImages/480p/soccerball'
fps = 30
frames2video(frame_path, video_path, fps, True)

View File

@@ -0,0 +1,31 @@
import os
import imageio
from imageio_ffmpeg import write_frames
def write_frames(frame_path, fps, size, codec='libx264', quality=8):
for filename in sorted(os.listdir(frame_path)):
if not filename.endswith('.jpg'):
continue
yield imageio.imread(os.path.join(frame_path, filename))
def frames2video(frame_path, video_path, fps, show_progress=False, codec='libx264', quality=8):
sample_frame = imageio.imread(os.path.join(frame_path, os.listdir(frame_path)[0]))
height, width, _ = sample_frame.shape
size = (width, height)
video_dir = os.path.dirname(video_path)
if not os.path.exists(video_dir):
os.makedirs(video_dir)
writer = imageio.get_writer(video_path, fps=fps, codec=codec, quality=quality)
for frame in write_frames(frame_path, fps=fps, size=size, codec=codec, quality=quality):
writer.append_data(frame)
if show_progress:
print(f'Frame {writer.get_length()} written')
writer.close()
if __name__ == '__main__':
frame_path = "/data0/datasets/davis/JPEGImages/480p/dog-gooses/"
video_path = "./demo/dog-gooses/original_video.mp4"
fps = 30
frames2video(frame_path, video_path, fps, False)

View File

@@ -0,0 +1,12 @@
import cv2
def click_event(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print("Point coordinates ({}, {})".format(x, y))
img = cv2.imread("./example/remove-anything/dog.jpg")
cv2.imshow("Image", img)
cv2.setMouseCallback("Image", click_event)
cv2.waitKey(0)
cv2.destroyAllWindows()

View File

@@ -0,0 +1,160 @@
import cv2
from matplotlib import pyplot as plt
import PIL.Image as Image
import numpy as np
def crop_for_filling_pre(image: np.array, mask: np.array, crop_size: int = 512):
# Calculate the aspect ratio of the image
height, width = image.shape[:2]
aspect_ratio = float(width) / float(height)
# If the shorter side is less than 512, resize the image proportionally
if min(height, width) < crop_size:
if height < width:
new_height = crop_size
new_width = int(new_height * aspect_ratio)
else:
new_width = crop_size
new_height = int(new_width / aspect_ratio)
image = cv2.resize(image, (new_width, new_height))
mask = cv2.resize(mask, (new_width, new_height))
# Find the bounding box of the mask
x, y, w, h = cv2.boundingRect(mask)
# Update the height and width of the resized image
height, width = image.shape[:2]
# # If the 512x512 square cannot cover the entire mask, resize the image accordingly
if w > crop_size or h > crop_size:
# padding to square at first
if height < width:
padding = width - height
image = np.pad(image, ((padding // 2, padding - padding // 2), (0, 0), (0, 0)), 'constant')
mask = np.pad(mask, ((padding // 2, padding - padding // 2), (0, 0)), 'constant')
else:
padding = height - width
image = np.pad(image, ((0, 0), (padding // 2, padding - padding // 2), (0, 0)), 'constant')
mask = np.pad(mask, ((0, 0), (padding // 2, padding - padding // 2)), 'constant')
resize_factor = crop_size / max(w, h)
image = cv2.resize(image, (0, 0), fx=resize_factor, fy=resize_factor)
mask = cv2.resize(mask, (0, 0), fx=resize_factor, fy=resize_factor)
x, y, w, h = cv2.boundingRect(mask)
# Calculate the crop coordinates
crop_x = min(max(x + w // 2 - crop_size // 2, 0), width - crop_size)
crop_y = min(max(y + h // 2 - crop_size // 2, 0), height - crop_size)
# Crop the image
cropped_image = image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size]
cropped_mask = mask[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size]
return cropped_image, cropped_mask
def crop_for_filling_post(
image: np.array,
mask: np.array,
filled_image: np.array,
crop_size: int = 512,
):
image_copy = image.copy()
mask_copy = mask.copy()
# Calculate the aspect ratio of the image
height, width = image.shape[:2]
height_ori, width_ori = height, width
aspect_ratio = float(width) / float(height)
# If the shorter side is less than 512, resize the image proportionally
if min(height, width) < crop_size:
if height < width:
new_height = crop_size
new_width = int(new_height * aspect_ratio)
else:
new_width = crop_size
new_height = int(new_width / aspect_ratio)
image = cv2.resize(image, (new_width, new_height))
mask = cv2.resize(mask, (new_width, new_height))
# Find the bounding box of the mask
x, y, w, h = cv2.boundingRect(mask)
# Update the height and width of the resized image
height, width = image.shape[:2]
# # If the 512x512 square cannot cover the entire mask, resize the image accordingly
if w > crop_size or h > crop_size:
flag_padding = True
# padding to square at first
if height < width:
padding = width - height
image = np.pad(image, ((padding // 2, padding - padding // 2), (0, 0), (0, 0)), 'constant')
mask = np.pad(mask, ((padding // 2, padding - padding // 2), (0, 0)), 'constant')
padding_side = 'h'
else:
padding = height - width
image = np.pad(image, ((0, 0), (padding // 2, padding - padding // 2), (0, 0)), 'constant')
mask = np.pad(mask, ((0, 0), (padding // 2, padding - padding // 2)), 'constant')
padding_side = 'w'
resize_factor = crop_size / max(w, h)
image = cv2.resize(image, (0, 0), fx=resize_factor, fy=resize_factor)
mask = cv2.resize(mask, (0, 0), fx=resize_factor, fy=resize_factor)
x, y, w, h = cv2.boundingRect(mask)
else:
flag_padding = False
# Calculate the crop coordinates
crop_x = min(max(x + w // 2 - crop_size // 2, 0), width - crop_size)
crop_y = min(max(y + h // 2 - crop_size // 2, 0), height - crop_size)
# Fill the image
image[crop_y:crop_y + crop_size, crop_x:crop_x + crop_size] = filled_image
if flag_padding:
image = cv2.resize(image, (0, 0), fx=1/resize_factor, fy=1/resize_factor)
if padding_side == 'h':
image = image[padding // 2:padding // 2 + height_ori, :]
else:
image = image[:, padding // 2:padding // 2 + width_ori]
image = cv2.resize(image, (width_ori, height_ori))
image_copy[mask_copy==255] = image[mask_copy==255]
return image_copy
if __name__ == '__main__':
# image = cv2.imread('example/boat.jpg')
# mask = cv2.imread('example/boat_mask_2.png', cv2.IMREAD_GRAYSCALE)
image = cv2.imread('./example/groceries.jpg')
mask = cv2.imread('example/groceries_mask_2.png', cv2.IMREAD_GRAYSCALE)
# image = cv2.imread('example/bridge.jpg')
# mask = cv2.imread('example/bridge_mask_2.png', cv2.IMREAD_GRAYSCALE)
# image = cv2.imread('example/person_umbrella.jpg')
# mask = cv2.imread('example/person_umbrella_mask_2.png', cv2.IMREAD_GRAYSCALE)
# image = cv2.imread('example/hippopotamus.jpg')
# mask = cv2.imread('example/hippopotamus_mask_1.png', cv2.IMREAD_GRAYSCALE)
cropped_image, cropped_mask = crop_for_filling_pre(image, mask)
# ^ ------------------------------------------------------------------------------------
# ^ Please conduct inpainting or filling here on the cropped image with the cropped mask
# ^ ------------------------------------------------------------------------------------
# e.g.
# cropped_image[cropped_mask==255] = 0
cv2.imwrite('cropped_image.jpg', cropped_image)
cv2.imwrite('cropped_mask.jpg', cropped_mask)
print(cropped_image.shape)
print(cropped_mask.shape)
image = crop_for_filling_post(image, mask, cropped_image)
cv2.imwrite('filled_image.jpg', image)
print(image.shape)

View File

@@ -0,0 +1,50 @@
import cv2
import numpy as np
def paste_object(source, source_mask, target, target_coords, resize_scale=1):
assert target_coords[0] < target.shape[1] and target_coords[1] < target.shape[0]
# Find the bounding box of the source_mask
x, y, w, h = cv2.boundingRect(source_mask)
assert h < source.shape[0] and w < source.shape[1]
obj = source[y:y+h, x:x+w]
obj_msk = source_mask[y:y+h, x:x+w]
if resize_scale != 1:
obj = cv2.resize(obj, (0,0), fx=resize_scale, fy=resize_scale)
obj_msk = cv2.resize(obj_msk, (0,0), fx=resize_scale, fy=resize_scale)
_, _, w, h = cv2.boundingRect(obj_msk)
xt = max(0, target_coords[0]-w//2)
yt = max(0, target_coords[1]-h//2)
if target_coords[0]-w//2 < 0:
obj = obj[:, w//2-target_coords[0]:]
obj_msk = obj_msk[:, w//2-target_coords[0]:]
if target_coords[0]+w//2 > target.shape[1]:
obj = obj[:, :target.shape[1]-target_coords[0]+w//2]
obj_msk = obj_msk[:, :target.shape[1]-target_coords[0]+w//2]
if target_coords[1]-h//2 < 0:
obj = obj[h//2-target_coords[1]:, :]
obj_msk = obj_msk[h//2-target_coords[1]:, :]
if target_coords[1]+h//2 > target.shape[0]:
obj = obj[:target.shape[0]-target_coords[1]+h//2, :]
obj_msk = obj_msk[:target.shape[0]-target_coords[1]+h//2, :]
_, _, w, h = cv2.boundingRect(obj_msk)
target[yt:yt+h, xt:xt+w][obj_msk==255] = obj[obj_msk==255]
target_mask = np.zeros_like(target)
target_mask = cv2.cvtColor(target_mask, cv2.COLOR_BGR2GRAY)
target_mask[yt:yt+h, xt:xt+w][obj_msk==255] = 255
return target, target_mask
if __name__ == '__main__':
source = cv2.imread('example/boat.jpg')
source_mask = cv2.imread('example/boat_mask_1.png', 0)
target = cv2.imread('example/hippopotamus.jpg')
print(source.shape, source_mask.shape, target.shape)
target_coords = (700, 400) # (x, y)
resize_scale = 1
target, target_mask = paste_object(source, source_mask, target, target_coords, resize_scale)
cv2.imwrite('target_pasted.png', target)
cv2.imwrite('target_mask.png', target_mask)
print(target.shape, target_mask.shape)

View File

@@ -0,0 +1,85 @@
import cv2
import numpy as np
from PIL import Image
from typing import Any, Dict, List
def load_img_to_array(img_p):
img = Image.open(img_p)
if img.mode == "RGBA":
img = img.convert("RGB")
return np.array(img)
def save_array_to_img(img_arr, img_p):
Image.fromarray(img_arr.astype(np.uint8)).save(img_p)
def dilate_mask(mask, dilate_factor=15):
mask = mask.astype(np.uint8)
mask = cv2.dilate(
mask,
np.ones((dilate_factor, dilate_factor), np.uint8),
iterations=1
)
return mask
def erode_mask(mask, dilate_factor=15):
mask = mask.astype(np.uint8)
mask = cv2.erode(
mask,
np.ones((dilate_factor, dilate_factor), np.uint8),
iterations=1
)
return mask
def show_mask(ax, mask: np.ndarray, random_color=False):
mask = mask.astype(np.uint8)
if np.max(mask) == 255:
mask = mask / 255
if random_color:
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_img = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_img)
def show_points(ax, coords: List[List[float]], labels: List[int], size=375):
coords = np.array(coords)
labels = np.array(labels)
color_table = {0: 'red', 1: 'green'}
for label_value, color in color_table.items():
points = coords[labels == label_value]
ax.scatter(points[:, 0], points[:, 1], color=color, marker='*',
s=size, edgecolor='white', linewidth=1.25)
def get_clicked_point(img_path):
img = cv2.imread(img_path)
cv2.namedWindow("image")
cv2.imshow("image", img)
last_point = []
keep_looping = True
def mouse_callback(event, x, y, flags, param):
nonlocal last_point, keep_looping, img
if event == cv2.EVENT_LBUTTONDOWN:
if last_point:
cv2.circle(img, tuple(last_point), 5, (0, 0, 0), -1)
last_point = [x, y]
cv2.circle(img, tuple(last_point), 5, (0, 0, 255), -1)
cv2.imshow("image", img)
elif event == cv2.EVENT_RBUTTONDOWN:
keep_looping = False
cv2.setMouseCallback("image", mouse_callback)
while keep_looping:
cv2.waitKey(1)
cv2.destroyAllWindows()
return last_point

View File

@@ -0,0 +1,25 @@
import cv2
import imageio
import os
import shutil
from tqdm import tqdm
from glob import glob
from os import path as osp
def video2frames(video_path, frame_path):
video = cv2.VideoCapture(video_path)
os.makedirs(frame_path, exist_ok=True)
frame_num = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
fps = video.get(cv2.CAP_PROP_FPS)
initial_img = None
for idx in tqdm(range(frame_num), 'Extract frames'):
success, image = video.read()
if idx == 0: initial_img = image.copy()
assert success, 'extract the {}th frame in video {} failed!'.format(idx, video_path)
cv2.imwrite("{}/{:05d}.jpg".format(frame_path, idx), image)
return fps, initial_img
if __name__ == '__main__':
video_path = './example/remove-anything-video/breakdance-flare/original_video.mp4'
frame_path = './example/remove-anything-video/breakdance-flare/frames/'
fps, initial_img = video2frames(video_path, frame_path)

View File

@@ -0,0 +1,14 @@
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import cv2
img = plt.imread('../example/fill-anything/sample1.png')
fig, ax = plt.subplots(1)
ax.imshow(img)
x1, y1, x2, y2 = 230, 283, 352, 407
rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor='r', facecolor='none')
ax.add_patch(rect)
plt.savefig('bbox.png')

View File

@@ -0,0 +1,59 @@
import sys
import argparse
import numpy as np
from pathlib import Path
from matplotlib import pyplot as plt
import glob
from backend.inpaint.utils import load_img_to_array, show_mask
def setup_args(parser):
parser.add_argument(
"--input_img", type=str, required=True,
help="Path to a single input img",
)
parser.add_argument(
"--input_mask_glob", type=str, required=True,
help="Glob to input masks",
)
parser.add_argument(
"--output_dir", type=str, required=True,
help="Output path to the directory with results.",
)
if __name__ == "__main__":
"""Example usage:
python visual_mask_on_img.py \
--input_img FA_demo/FA1_dog.png \
--input_mask_glob "results/FA1_dog/mask*.png" \
--output_dir results
"""
parser = argparse.ArgumentParser()
setup_args(parser)
args = parser.parse_args(sys.argv[1:])
img = load_img_to_array(args.input_img)
img_stem = Path(args.input_img).stem
mask_ps = sorted(glob.glob(args.input_mask_glob))
out_dir = Path(args.output_dir) / img_stem
out_dir.mkdir(parents=True, exist_ok=True)
for mask_p in mask_ps:
mask = load_img_to_array(mask_p)
mask = mask.astype(np.uint8)
# path to the results
img_mask_p = out_dir / f"with_{Path(mask_p).name}"
# save the masked image
dpi = plt.rcParams['figure.dpi']
height, width = img.shape[:2]
plt.figure(figsize=(width/dpi/0.77, height/dpi/0.77))
plt.imshow(img)
plt.axis('off')
show_mask(plt.gca(), mask, random_color=False)
plt.savefig(img_mask_p, bbox_inches='tight', pad_inches=0)
plt.close()