In this case, i tackled a unique challenge in video processing: replacing a green screen with a custom image. The project was implemented using the MoviePy library in Python.
The green screen replacement posed the biggest challenge due to its dynamic nature. The program had to accurately detect the green screen and replace it with a custom image.
1. Import necessary libraries and define the green screen color range
After imorting necessary libraries and saving the image path to a variable, first thing to do was to detect the green screen in the video clip. Therefore i needed to find the green pixel values in the video clip:
from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip
import numpy as np
import cv2
# Load the base video
video = VideoFileClip("./video.mp4")
# Define the range of green color in HSV
lower_green = np.array([43, 180, 180], dtype=np.uint8)
upper_green = np.array([63, 255, 255], dtype=np.uint8)
# Load the background image using OpenCV
background_image_bgr = cv2.imread("./image.jpg")
# Minimum size for green screen detection (ignore very small areas)
min_green_area_size = 50 # Adjust this value as needed
The green screen color range was defined in HSV format, which is more suitable for color detection than RGB. The background image was loaded using OpenCV, and a minimum size for green screen detection was set to ignore very small areas.
2. Detecting, Masking, Cleaning and Replacing
Next step was to detect the green screen in the video clip, create a mask for the green areas, find contours of the green areas, clean up the mask to remove any unwanted areas and replace the frame with respect to mask.
def replace_green_with_dynamic_background(frame):
frame_copy = frame.copy() # Make a copy of the frame since we can't modify the original
hsv_frame = cv2.cvtColor(frame_copy, cv2.COLOR_RGB2HSV) # Convert the frame to HSV to detect the green color
mask = cv2.inRange(hsv_frame, lower_green, upper_green) # Create a mask for the green areas
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Find contours of the green areas
for contour in contours: # If contours are found, detect the green screen area
x, y, w, h = cv2.boundingRect(contour) # Get the bounding rectangle for the green screen
if w * h < 50: # Adjust this value as needed
continue
new_background_image_bgr = cv2.resize(background_image_bgr, (w, h), interpolation=cv2.INTER_AREA) # Resize the background image dynamically to match the green screen size
new_background_image_rgb = cv2.cvtColor(new_background_image_bgr, cv2.COLOR_BGR2RGB) # Convert the background image to RGB (since OpenCV loads it as BGR)
frame_copy[y:y+h, x:x+w][mask[y:y+h, x:x+w] != 0] = new_background_image_rgb[mask[y:y+h, x:x+w] != 0] # Apply the background image to the green screen area
return frame_copy
I was able to achieve all of these steps by defining a function that takes a frame as input. I just needed to call this function on each frame of the video clip to replace the green screen with the custom image. In this case, we used this function with fl_image.
# Apply the green screen replacement with dynamic scaling to the video
video_with_dynamic_background = video.fl_image(replace_green_with_dynamic_background)
# Write the final output video
video_with_dynamic_background.write_videofile("localized_output.mp4", codec="libx264")
3. Finding a new Solution
First solution was replacing the green screen perfeclty whithout any green screen noise or any other unwanted areas. However, the solution had flaws such as rendering all of the image onto the observable green screen...
So i decided to deetect the circle in the green screen and replace the green screen with the custom image. This was achieved by using the Hough Circle Transform algorithm in OpenCV.
def process_frame(frame): # Detects the circular green screen area and replaces it with the background image.
frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
h, w = frame.shape[:2]
# Convert to HSV color space for better color segmentation
hsv = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2HSV)
# Create a mask for the green color
mask = cv2.inRange(hsv, lower_green, upper_green)
# Reduce noise in the mask
mask_blurred = cv2.GaussianBlur(mask, (9, 9), 2)
# Detect circles using Hough Circle Transform
circles = cv2.HoughCircles( mask_blurred, cv2.HOUGH_GRADIENT, dp=1.5, minDist=h // 4, param1=50, param2=30, minRadius=50, maxRadius=h // 2 )
if circles is not None:
circles = np.uint16(np.around(circles))
# Assume the first detected circle is the green screen
circle = circles[0][0]
circle_center = (circle[0], circle[1])
circle_radius = circle[2]
# Validate circle parameters
if circle_radius <= 0:
print("Invalid circle radius detected.")
return frame
# Ensure circle is within frame boundaries
if (circle_center[0] < 0 or circle_center[0] >= w or
circle_center[1] < 0 or circle_center[1] >= h):
print("Circle center is outside frame boundaries.")
return frame
else:
# If no circle is detected, return the original frame
print("No circle detected in this frame.")
return frame
# Create a circular mask for the detected circle
Y, X = np.ogrid[:h, :w]
dist_from_center = np.sqrt((X - circle_center[0])**2 + (Y - circle_center[1])**2)
circular_mask = dist_from_center <= circle_radius
# Create a green mask within the circular area
green_mask = mask > 0
green_mask = green_mask & circular_mask
# Resize and crop the background image to fit the detected circle
bg_diameter = 2 * circle_radius
bg_resized = cv2.resize(background_img_cv, (bg_diameter, bg_diameter))
bg_resized_h, bg_resized_w = bg_resized.shape[:2]
# Prepare the background image to overlay
bg_placeholder = np.zeros_like(frame)
# Calculate the coordinates where the background image will be placed
x_start, y_start = int(circle_center[0] - circle_radius), int(circle_center[1] - circle_radius)
x_end, y_end = int(circle_center[0] + circle_radius), int(circle_center[1] + circle_radius)
# Adjust indices to be within frame boundaries
x_start_frame, y_start_frame = max(x_start, 0), max(y_start, 0)
x_end_frame, y_end_frame = min(x_end, w), min(y_end, h)
# Corresponding coordinates in the background image
x_start_bg, y_start_bg = max(-x_start, 0), max(-y_start, 0)
x_end_bg = x_start_bg + (x_end_frame - x_start_frame)
y_end_bg = y_start_bg + (y_end_frame - y_start_frame)
# Ensure that the dimensions match
if x_end_frame <= x_start_frame or y_end_frame <= y_start_frame:
print("Invalid region dimensions; skipping this frame.")
return frame
if x_end_bg <= x_start_bg or y_end_bg <= y_start_bg:
print("Invalid background image dimensions; skipping this frame.")
return frame
# Crop the background image region
bg_region = bg_resized[y_start_bg:y_end_bg, x_start_bg:x_end_bg]
# Place the background image into the placeholder
bg_placeholder[y_start_frame:y_end_frame, x_start_frame:x_end_frame] = bg_region
# Combine the original frame and the background image using the masks
output_frame = frame.copy()
output_frame[green_mask] = bg_placeholder[green_mask]
return output_frame
Flaw in this was the algorithm was utterly dependent on the parameters of HoughCircles which needed detailed fine tuning. This was not a reliable solution as the green screen was not always a perfect circle and the replaced image was shaking a lot since the cirlce detection which seemed very artificial. I even took weigthted average of the position of the detected circle and placed the image accordingly but there was a zoom in and out effect which was an obstacle in this solution.
4. Stabilizing the Algorithm
This again had some flaws such as the green screen replacement was not stable and the custom image was not placed correctly. To stabilize the algorithm, i needed to implement a method that could precalculate the green screen area and adjust the custom image accordingly.
To improve on this solution, i needed to implement a more advanced algorithm that could handle the green screen replacement more accurately. This involved using a more sophisticated method to detect the green screen, clean up the mask, and replace the green screen with the custom image.
My approach this time was to combine both methods with an additional feature to stabilize the algorithm. I detected the frame range where the green screen was fully visible to divide the algorithm into two parts. First part was to precalculate the green screen area using hough circle transform and the second part was to detct using rectangle detection.
def process_frame(frame, frame_number, background_img_cv, video_clip, min_green_area_size, background_image_bgr):
global circle_history
h, w = frame.shape[:2]
# Apply custom logic for frames within the hardcoded range (96 to 146)
if frame_range[0] <= frame_number <= frame_range[1]:
frame_copy = frame.copy() # Make a copy of the frame since we can't modify the original
hsv_frame = cv2.cvtColor(frame_copy, cv2.COLOR_RGB2HSV) # Convert the frame to HSV to detect the green color
mask = cv2.inRange(hsv_frame, lower_green, upper_green) # Create a mask for the green areas
mask = cv2.dilate(mask, np.ones((8,8), np.uint8), iterations=1) # Expand mask diameter
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Find contours of the green areas
for contour in contours: # If contours are found, detect the green screen area
x, y, w, h = cv2.boundingRect(contour) # Get the bounding rectangle for the green screen
if w * h < min_green_area_size: # Ignore small contours (noise or small green areas)
continue
new_background_image_bgr = cv2.resize(background_image_bgr, (w, h), interpolation=cv2.INTER_AREA) # Resize the background image dynamically to match the green screen size
new_background_image_rgb = cv2.cvtColor(new_background_image_bgr, cv2.COLOR_BGR2RGB) # Convert the background image to RGB (since OpenCV loads it as BGR)
frame_copy[y:y+h, x:x+w][mask[y:y+h, x:x+w] != 0] = new_background_image_rgb[mask[y:y+h, x:x+w] != 0] # Apply the background image to the green screen area
return frame_copy
else:
# Original logic for frames outside the specified range
if frame_number < 45 or frame_number > 245:
return frame
# Convert frame to HSV color space and create the green mask
hsv = cv2.cvtColor(frame, cv2.COLOR_RGB2HSV)
mask = cv2.inRange(hsv, lower_green, upper_green)
# Remove small artifacts and apply dilation in one step
mask = remove_small_contours(mask, 100)
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((9, 9), np.uint8))
# Expand mask diameter
mask = cv2.dilate(mask, np.ones((9,9), np.uint8), iterations=1)
# Apply median blur and Gaussian blur to smooth the mask
mask_blurred = cv2.GaussianBlur(mask, (9, 9), 2)
# Detect circles in the current frame
circles = cv2.HoughCircles(mask_blurred, cv2.HOUGH_GRADIENT, dp=1.5, minDist=h // 6, param1=80, param2=35, minRadius=40, maxRadius=h // 3)
if circles is not None:
circles = np.uint16(np.around(circles))
detected_circle = circles[0][0]
circle_history.append(detected_circle)
if len(circle_history) > history_length:
circle_history.pop(0)
else:
detected_circle = find_future_circle(video_clip, frame_number)
if detected_circle is None:
if circle_history:
detected_circle = circle_history[-1]
else:
return frame
circle_history.append(detected_circle)
smoothed_circle = weighted_exponential_average_circle(circle_history)
if smoothed_circle:
circle_center = (smoothed_circle[0], smoothed_circle[1])
circle_radius = int(smoothed_circle[2])
# Initialize a mask with the same size as the frame (full image)
circular_mask = np.zeros((h, w), dtype=np.uint8)
# Create the circular mask ensuring it fits within the frame
Y, X = np.ogrid[:h, :w]
dist_from_center = np.sqrt((X - circle_center[0])**2 + (Y - circle_center[1])**2)
circular_mask[dist_from_center <= circle_radius] = 1
green_mask = mask > 0
green_mask &= (circular_mask > 0)
# Resize and crop the background image to fit the detected circle
bg_resized = cv2.resize(background_img_cv, (2 * circle_radius, 2 * circle_radius))
# Calculate the position where the background image will be placed
x_start, y_start = max(0, circle_center[0] - circle_radius), max(0, circle_center[1] - circle_radius)
x_end, y_end = min(w, circle_center[0] + circle_radius), min(h, circle_center[1] + circle_radius)
if x_end <= x_start or y_end <= y_start:
return frame
# Crop the background image region
bg_region = bg_resized[:(y_end - y_start), :(x_end - x_start)]
bg_placeholder = np.zeros_like(frame)
bg_placeholder[y_start:y_end, x_start:x_end] = bg_region
# Combine the original frame and the background image using the masks
output_frame = frame.copy()
output_frame[green_mask.astype(bool)] = bg_placeholder[green_mask.astype(bool)]
return output_frame
else:
return frame
def process_video_frames(video_clip, background_img_cv, text_to_add, min_green_area_size, background_image_bgr):
frame_number = 0
def wrapper(frame):
nonlocal frame_number
result = process_frame(frame, frame_number, background_img_cv, video_clip, min_green_area_size, background_image_bgr)
frame_number += 1
return result
return add_text_style_to_video(video_clip.fl_image(wrapper), text_to_add)
This approach worked like a charm since it;
- Detected green circular screen beforehand and took advantage of it by placing the image with respect to this info
- Switched to simple replacement of green screen with the image which stablized the image and greatly improved the performance
- Returned the frame immediately if the processed frame number is out of specified range which also made huge impact on the performance
5. Rest of the Code
- Libraries to add, path variables initializations, directory check
import os
import random
import numpy as np
import cv2
import pandas as pd
from PIL import Image
from moviepy.editor import VideoFileClip, TextClip, AudioFileClip, CompositeVideoClip
import pillow_avif
# Paths to the video, image, and text files
video_path = './case/video.mp4'
images_directory = './case/images/'
music_directory = './case/musics/'
text_excel_path = './case/texts.xlsx'
output_directory = './output_videos/'
# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)
# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)
# Load the input video
video = VideoFileClip(video_path)
# Get all image file paths from the images directory
image_files = [os.path.join(images_directory, img) for img in os.listdir(images_directory) if img.endswith(('.avif', '.jpg', '.png', '.jpeg', '.webp'))]
# Get all music file paths from the music directory
music_files = [os.path.join(music_directory, music) for music in os.listdir(music_directory) if music.endswith(('.mp3', '.wav'))]
# Load the text from the first row, first column of the Excel file
df = pd.read_excel(text_excel_path)
text_to_add = [df.iloc[i, 0] for i in range(1,15) ]
# History of detected circles for averaging
circle_history = []
# Maximum number of frames to average for smoothing
history_length = 10
decay_factor = 0.2
# Green color range in HSV
lower_green = np.array([40, 150, 150])
upper_green = np.array([80, 255, 255])
# Hardcoded frame range for specific processing
frame_range = (70, 221)
- Convert Avif to PNG
def convert_avif_to_png(avif_file):
"""Converts a .avif image to .png and returns the path to the new file."""
img = Image.open(avif_file)
new_file_path = avif_file.replace('.avif', '.jpeg')
img.save(new_file_path, format="JPEG")
return new_file_path
- Remove Small Contours
def remove_small_contours(mask, min_contour_area):
"""Removes small contours based on the minimum contour area."""
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
large_contours = [contour for contour in contours if cv2.contourArea(contour) >= min_contour_area]
mask[:] = 0 # Clear the mask
cv2.drawContours(mask, large_contours, -1, 255, thickness=cv2.FILLED)
return mask
- Weighted Exponential Average Circle
def weighted_exponential_average_circle(circles):
"""Calculates the weighted exponential average of the circle parameters for smoothing."""
if not circles:
return None
# Initialize sum variables for weighted average
x_sum, y_sum, radius_sum, weight_sum = 0, 0, 0, 0
for i, circle in enumerate(reversed(circles)): # Reverse to prioritize recent circles
weight = decay_factor ** i
x_sum += circle[0] * weight
y_sum += circle[1] * weight
radius_sum += circle[2] * weight
weight_sum += weight
return [int(x_sum / weight_sum), int(y_sum / weight_sum), int(radius_sum / weight_sum)]
- Find Future Circles
def find_future_circle(video_clip, frame_number, max_search=5):
"""Search future frames for a circle detection within a limit."""
frame_count = int(video_clip.fps * video_clip.duration) # Total frames in the video
future_circle = None
for future_frame_number in range(frame_number + 1, min(frame_number + max_search, frame_count)):
# Extract future frame
future_frame = video_clip.get_frame(future_frame_number / video_clip.fps)
future_hsv = cv2.cvtColor(future_frame, cv2.COLOR_RGB2HSV)
# Same green detection logic
future_mask = cv2.inRange(future_hsv, np.array([40, 180, 180]), np.array([80, 255, 255]))
future_mask_blurred = cv2.GaussianBlur(future_mask, (9, 9), 2)
# Attempt circle detection
future_circles = cv2.HoughCircles(future_mask_blurred, cv2.HOUGH_GRADIENT, dp=1.5, minDist=future_frame.shape[0] // 6,
param1=80, param2=35, minRadius=40, maxRadius=future_frame.shape[0] // 3)
if future_circles is not None:
future_circles = np.uint16(np.around(future_circles))
future_circle = future_circles[0][0] # First detected circle
break # Stop searching after finding a circle
return future_circle
- Add Text Style to Video
def add_text_style_to_video(video_clip, text_to_add):
"""Adds styled text to the video."""
txt_clip = (TextClip(text_to_add, fontsize=70, font='Arial-Bold', color='white',
stroke_color='black', stroke_width=4, align='center',
method='caption', size=(video_clip.w * 0.92, None)))
txt_clip = txt_clip.set_position(('center', 250)).set_duration(video_clip.duration)
return CompositeVideoClip([video_clip, txt_clip])
- Add Background Music
def add_background_music(i, video_clip):
"""Adds music to the video, starting from the point where sound starts in the audio file."""
if i < len(music_files):
selected_music = music_files[i]
audio_background = AudioFileClip(selected_music)
# Create the audio clip starting from the sound_start_time, matching the video's duration
audio_background = audio_background.subclip(0, 0 + video_clip.duration)
# Set the audio to the video
return video_clip.set_audio(audio_background)
return video_clip # Return the original video if no music files are found
- Main Code
def main():
# Process and save a video for each background image
for i, image_file in enumerate(image_files):
if image_file.endswith(".avif"):
image_file = convert_avif_to_png(image_file)
background_img_cv = cv2.imread(image_file)
background_img_cv = cv2.cvtColor(background_img_cv, cv2.COLOR_BGR2RGB)
# Background image for green screen replacement
background_image_bgr = cv2.imread(image_file) # Example background image
min_green_area_size = 1000 # Minimum size to avoid noise
result = process_video_frames(video, background_img_cv, text_to_add[i], min_green_area_size, background_image_bgr)
# Add background music to the video
result_with_music = add_background_music(i,result)
image_name = os.path.splitext(os.path.basename(image_file))[0]
output_video_path = os.path.join(output_directory, f'localized_output_{image_name}.mp4')
result_with_music.write_videofile(output_video_path, codec='libx264', threads=4)
if __name__ == '__main__':
main()
Comments
Be the first one to comment!