Automated localizing video clips with MoviePy

Multimedia Processing 2024-10-23 Python 34 min read

In this case, i tackled a unique challenge in video processing: replacing a green screen with a custom image. The project was implemented using the MoviePy library in Python.

The green screen replacement posed the biggest challenge due to its dynamic nature. The program had to accurately detect the green screen and replace it with a custom image.

1. Import necessary libraries and define the green screen color range

After imorting necessary libraries and saving the image path to a variable, first thing to do was to detect the green screen in the video clip. Therefore i needed to find the green pixel values in the video clip:

from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip
import numpy as np
import cv2

# Load the base video
video = VideoFileClip("./video.mp4")

# Define the range of green color in HSV
lower_green = np.array([43, 180, 180], dtype=np.uint8)
upper_green = np.array([63, 255, 255], dtype=np.uint8)

# Load the background image using OpenCV
background_image_bgr = cv2.imread("./image.jpg")

# Minimum size for green screen detection (ignore very small areas)
min_green_area_size = 50  # Adjust this value as needed

The green screen color range was defined in HSV format, which is more suitable for color detection than RGB. The background image was loaded using OpenCV, and a minimum size for green screen detection was set to ignore very small areas.

2. Detecting, Masking, Cleaning and Replacing

Next step was to detect the green screen in the video clip, create a mask for the green areas, find contours of the green areas, clean up the mask to remove any unwanted areas and replace the frame with respect to mask.

def replace_green_with_dynamic_background(frame):
    frame_copy = frame.copy() # Make a copy of the frame since we can't modify the original
    hsv_frame = cv2.cvtColor(frame_copy, cv2.COLOR_RGB2HSV) # Convert the frame to HSV to detect the green color
    mask = cv2.inRange(hsv_frame, lower_green, upper_green) # Create a mask for the green areas
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Find contours of the green areas
    for contour in contours:  # If contours are found, detect the green screen area
        x, y, w, h = cv2.boundingRect(contour) # Get the bounding rectangle for the green screen
        if w * h < 50: # Adjust this value as needed
            continue
        new_background_image_bgr = cv2.resize(background_image_bgr, (w, h), interpolation=cv2.INTER_AREA) # Resize the background image dynamically to match the green screen size
        new_background_image_rgb = cv2.cvtColor(new_background_image_bgr, cv2.COLOR_BGR2RGB) # Convert the background image to RGB (since OpenCV loads it as BGR)
        frame_copy[y:y+h, x:x+w][mask[y:y+h, x:x+w] != 0] = new_background_image_rgb[mask[y:y+h, x:x+w] != 0] # Apply the background image to the green screen area
    return frame_copy

I was able to achieve all of these steps by defining a function that takes a frame as input. I just needed to call this function on each frame of the video clip to replace the green screen with the custom image. In this case, we used this function with fl_image.

# Apply the green screen replacement with dynamic scaling to the video
video_with_dynamic_background = video.fl_image(replace_green_with_dynamic_background)

# Write the final output video
video_with_dynamic_background.write_videofile("localized_output.mp4", codec="libx264")

3. Finding a new Solution

First solution was replacing the green screen perfeclty whithout any green screen noise or any other unwanted areas. However, the solution had flaws such as rendering all of the image onto the observable green screen...

So i decided to deetect the circle in the green screen and replace the green screen with the custom image. This was achieved by using the Hough Circle Transform algorithm in OpenCV.

def process_frame(frame): # Detects the circular green screen area and replaces it with the background image.
    frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    h, w = frame.shape[:2]

    # Convert to HSV color space for better color segmentation
    hsv = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2HSV)

# Create a mask for the green color
    mask = cv2.inRange(hsv, lower_green, upper_green)

    # Reduce noise in the mask
    mask_blurred = cv2.GaussianBlur(mask, (9, 9), 2)

    # Detect circles using Hough Circle Transform
    circles = cv2.HoughCircles( mask_blurred, cv2.HOUGH_GRADIENT, dp=1.5, minDist=h // 4, param1=50, param2=30, minRadius=50, maxRadius=h // 2 )

    if circles is not None:
        circles = np.uint16(np.around(circles))
        # Assume the first detected circle is the green screen
        circle = circles[0][0]
        circle_center = (circle[0], circle[1])
        circle_radius = circle[2]

        # Validate circle parameters
        if circle_radius <= 0:
            print("Invalid circle radius detected.")
            return frame

        # Ensure circle is within frame boundaries
        if (circle_center[0] < 0 or circle_center[0] >= w or
            circle_center[1] < 0 or circle_center[1] >= h):
            print("Circle center is outside frame boundaries.")
            return frame
    else:
        # If no circle is detected, return the original frame
        print("No circle detected in this frame.")
        return frame

    # Create a circular mask for the detected circle
    Y, X = np.ogrid[:h, :w]
    dist_from_center = np.sqrt((X - circle_center[0])**2 + (Y - circle_center[1])**2)
    circular_mask = dist_from_center <= circle_radius

    # Create a green mask within the circular area
    green_mask = mask > 0
    green_mask = green_mask & circular_mask

    # Resize and crop the background image to fit the detected circle
    bg_diameter = 2 * circle_radius
    bg_resized = cv2.resize(background_img_cv, (bg_diameter, bg_diameter))
    bg_resized_h, bg_resized_w = bg_resized.shape[:2]

    # Prepare the background image to overlay
    bg_placeholder = np.zeros_like(frame)

    # Calculate the coordinates where the background image will be placed
    x_start, y_start = int(circle_center[0] - circle_radius), int(circle_center[1] - circle_radius)
    x_end, y_end = int(circle_center[0] + circle_radius), int(circle_center[1] + circle_radius)

    # Adjust indices to be within frame boundaries
    x_start_frame, y_start_frame = max(x_start, 0), max(y_start, 0)
    x_end_frame, y_end_frame = min(x_end, w), min(y_end, h)

    # Corresponding coordinates in the background image
    x_start_bg, y_start_bg = max(-x_start, 0), max(-y_start, 0)
    x_end_bg = x_start_bg + (x_end_frame - x_start_frame)
    y_end_bg = y_start_bg + (y_end_frame - y_start_frame)

    # Ensure that the dimensions match
    if x_end_frame <= x_start_frame or y_end_frame <= y_start_frame:
        print("Invalid region dimensions; skipping this frame.")
        return frame
    if x_end_bg <= x_start_bg or y_end_bg <= y_start_bg:
        print("Invalid background image dimensions; skipping this frame.")
        return frame

    # Crop the background image region
    bg_region = bg_resized[y_start_bg:y_end_bg, x_start_bg:x_end_bg]

    # Place the background image into the placeholder
    bg_placeholder[y_start_frame:y_end_frame, x_start_frame:x_end_frame] = bg_region

    # Combine the original frame and the background image using the masks
    output_frame = frame.copy()
    output_frame[green_mask] = bg_placeholder[green_mask]

    return output_frame

Flaw in this was the algorithm was utterly dependent on the parameters of HoughCircles which needed detailed fine tuning. This was not a reliable solution as the green screen was not always a perfect circle and the replaced image was shaking a lot since the cirlce detection which seemed very artificial. I even took weigthted average of the position of the detected circle and placed the image accordingly but there was a zoom in and out effect which was an obstacle in this solution.

4. Stabilizing the Algorithm

This again had some flaws such as the green screen replacement was not stable and the custom image was not placed correctly. To stabilize the algorithm, i needed to implement a method that could precalculate the green screen area and adjust the custom image accordingly.

To improve on this solution, i needed to implement a more advanced algorithm that could handle the green screen replacement more accurately. This involved using a more sophisticated method to detect the green screen, clean up the mask, and replace the green screen with the custom image.

My approach this time was to combine both methods with an additional feature to stabilize the algorithm. I detected the frame range where the green screen was fully visible to divide the algorithm into two parts. First part was to precalculate the green screen area using hough circle transform and the second part was to detct using rectangle detection.

def process_frame(frame, frame_number, background_img_cv, video_clip, min_green_area_size, background_image_bgr):
    global circle_history

    h, w = frame.shape[:2]

    # Apply custom logic for frames within the hardcoded range (96 to 146)
    if frame_range[0] <= frame_number <= frame_range[1]:
        frame_copy = frame.copy() # Make a copy of the frame since we can't modify the original
        hsv_frame = cv2.cvtColor(frame_copy, cv2.COLOR_RGB2HSV) # Convert the frame to HSV to detect the green color
        mask = cv2.inRange(hsv_frame, lower_green, upper_green) # Create a mask for the green areas
        mask = cv2.dilate(mask, np.ones((8,8), np.uint8), iterations=1) # Expand mask diameter
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Find contours of the green areas

        for contour in contours: # If contours are found, detect the green screen area
            x, y, w, h = cv2.boundingRect(contour) # Get the bounding rectangle for the green screen
            if w * h < min_green_area_size: # Ignore small contours (noise or small green areas)
                continue
            new_background_image_bgr = cv2.resize(background_image_bgr, (w, h), interpolation=cv2.INTER_AREA) # Resize the background image dynamically to match the green screen size
            new_background_image_rgb = cv2.cvtColor(new_background_image_bgr, cv2.COLOR_BGR2RGB) # Convert the background image to RGB (since OpenCV loads it as BGR)
            frame_copy[y:y+h, x:x+w][mask[y:y+h, x:x+w] != 0] = new_background_image_rgb[mask[y:y+h, x:x+w] != 0] # Apply the background image to the green screen area
        return frame_copy
    else:
        # Original logic for frames outside the specified range
        if frame_number < 45 or frame_number > 245:
            return frame
        # Convert frame to HSV color space and create the green mask
        hsv = cv2.cvtColor(frame, cv2.COLOR_RGB2HSV)
        mask = cv2.inRange(hsv, lower_green, upper_green)

        # Remove small artifacts and apply dilation in one step
        mask = remove_small_contours(mask, 100)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((9, 9), np.uint8))
        
        # Expand mask diameter
        mask = cv2.dilate(mask, np.ones((9,9), np.uint8), iterations=1)

        # Apply median blur and Gaussian blur to smooth the mask
        mask_blurred = cv2.GaussianBlur(mask, (9, 9), 2)

        # Detect circles in the current frame
        circles = cv2.HoughCircles(mask_blurred, cv2.HOUGH_GRADIENT, dp=1.5, minDist=h // 6, param1=80, param2=35, minRadius=40, maxRadius=h // 3)
        
        if circles is not None:
            circles = np.uint16(np.around(circles))
            detected_circle = circles[0][0]
            circle_history.append(detected_circle)

            if len(circle_history) > history_length:
                circle_history.pop(0)
        else:
            detected_circle = find_future_circle(video_clip, frame_number)
            if detected_circle is None:
                if circle_history:
                    detected_circle = circle_history[-1]
                else:
                    return frame

            circle_history.append(detected_circle)

        smoothed_circle = weighted_exponential_average_circle(circle_history)
        if smoothed_circle:
            circle_center = (smoothed_circle[0], smoothed_circle[1])
            circle_radius = int(smoothed_circle[2])

            # Initialize a mask with the same size as the frame (full image)
            circular_mask = np.zeros((h, w), dtype=np.uint8)

            # Create the circular mask ensuring it fits within the frame
            Y, X = np.ogrid[:h, :w]
            dist_from_center = np.sqrt((X - circle_center[0])**2 + (Y - circle_center[1])**2)
            circular_mask[dist_from_center <= circle_radius] = 1

            green_mask = mask > 0
            green_mask &= (circular_mask > 0)

            # Resize and crop the background image to fit the detected circle
            bg_resized = cv2.resize(background_img_cv, (2 * circle_radius, 2 * circle_radius))

            # Calculate the position where the background image will be placed
            x_start, y_start = max(0, circle_center[0] - circle_radius), max(0, circle_center[1] - circle_radius)
            x_end, y_end = min(w, circle_center[0] + circle_radius), min(h, circle_center[1] + circle_radius)

            if x_end <= x_start or y_end <= y_start:
                return frame

            # Crop the background image region
            bg_region = bg_resized[:(y_end - y_start), :(x_end - x_start)]
            bg_placeholder = np.zeros_like(frame)
            bg_placeholder[y_start:y_end, x_start:x_end] = bg_region

            # Combine the original frame and the background image using the masks
            output_frame = frame.copy()
            output_frame[green_mask.astype(bool)] = bg_placeholder[green_mask.astype(bool)]
            return output_frame
        else:
            return frame

def process_video_frames(video_clip, background_img_cv, text_to_add, min_green_area_size, background_image_bgr):
    frame_number = 0
    def wrapper(frame):
        nonlocal frame_number
        result = process_frame(frame, frame_number, background_img_cv, video_clip, min_green_area_size, background_image_bgr)
        frame_number += 1
        return result

    return add_text_style_to_video(video_clip.fl_image(wrapper), text_to_add)

This approach worked like a charm since it;

  1. Detected green circular screen beforehand and took advantage of it by placing the image with respect to this info
  2. Switched to simple replacement of green screen with the image which stablized the image and greatly improved the performance
  3. Returned the frame immediately if the processed frame number is out of specified range which also made huge impact on the performance

5. Rest of the Code

  • Libraries to add, path variables initializations, directory check

import os
import random
import numpy as np
import cv2
import pandas as pd
from PIL import Image
from moviepy.editor import VideoFileClip, TextClip, AudioFileClip, CompositeVideoClip
import pillow_avif

# Paths to the video, image, and text files
video_path = './case/video.mp4'
images_directory = './case/images/'
music_directory = './case/musics/'
text_excel_path = './case/texts.xlsx'
output_directory = './output_videos/'

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Load the input video
video = VideoFileClip(video_path)

# Get all image file paths from the images directory
image_files = [os.path.join(images_directory, img) for img in os.listdir(images_directory) if img.endswith(('.avif', '.jpg', '.png', '.jpeg', '.webp'))]
# Get all music file paths from the music directory
music_files = [os.path.join(music_directory, music) for music in os.listdir(music_directory) if music.endswith(('.mp3', '.wav'))]

# Load the text from the first row, first column of the Excel file
df = pd.read_excel(text_excel_path)
text_to_add = [df.iloc[i, 0] for i in range(1,15) ]

# History of detected circles for averaging
circle_history = []

# Maximum number of frames to average for smoothing
history_length = 10
decay_factor = 0.2

# Green color range in HSV
lower_green = np.array([40, 150, 150])
upper_green = np.array([80, 255, 255])

# Hardcoded frame range for specific processing
frame_range = (70, 221)

  • Convert Avif to PNG

def convert_avif_to_png(avif_file):
    """Converts a .avif image to .png and returns the path to the new file."""
    img = Image.open(avif_file)
    new_file_path = avif_file.replace('.avif', '.jpeg')
    img.save(new_file_path, format="JPEG")
    return new_file_path

  • Remove Small Contours

def remove_small_contours(mask, min_contour_area):
    """Removes small contours based on the minimum contour area."""
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    large_contours = [contour for contour in contours if cv2.contourArea(contour) >= min_contour_area]
    mask[:] = 0  # Clear the mask
    cv2.drawContours(mask, large_contours, -1, 255, thickness=cv2.FILLED)
    return mask

  • Weighted Exponential Average Circle

def weighted_exponential_average_circle(circles):
    """Calculates the weighted exponential average of the circle parameters for smoothing."""
    if not circles:
        return None

    # Initialize sum variables for weighted average
    x_sum, y_sum, radius_sum, weight_sum = 0, 0, 0, 0

    for i, circle in enumerate(reversed(circles)):  # Reverse to prioritize recent circles
        weight = decay_factor ** i
        x_sum += circle[0] * weight
        y_sum += circle[1] * weight
        radius_sum += circle[2] * weight
        weight_sum += weight

    return [int(x_sum / weight_sum), int(y_sum / weight_sum), int(radius_sum / weight_sum)]

  • Find Future Circles

def find_future_circle(video_clip, frame_number, max_search=5):
    """Search future frames for a circle detection within a limit."""
    frame_count = int(video_clip.fps * video_clip.duration)  # Total frames in the video
    future_circle = None

    for future_frame_number in range(frame_number + 1, min(frame_number + max_search, frame_count)):
        # Extract future frame
        future_frame = video_clip.get_frame(future_frame_number / video_clip.fps)
        future_hsv = cv2.cvtColor(future_frame, cv2.COLOR_RGB2HSV)

        # Same green detection logic
        future_mask = cv2.inRange(future_hsv, np.array([40, 180, 180]), np.array([80, 255, 255]))
        future_mask_blurred = cv2.GaussianBlur(future_mask, (9, 9), 2)

        # Attempt circle detection
        future_circles = cv2.HoughCircles(future_mask_blurred, cv2.HOUGH_GRADIENT, dp=1.5, minDist=future_frame.shape[0] // 6,
                                          param1=80, param2=35, minRadius=40, maxRadius=future_frame.shape[0] // 3)

        if future_circles is not None:
            future_circles = np.uint16(np.around(future_circles))
            future_circle = future_circles[0][0]  # First detected circle
            break  # Stop searching after finding a circle

    return future_circle

  • Add Text Style to Video

def add_text_style_to_video(video_clip, text_to_add):
    """Adds styled text to the video."""
    txt_clip = (TextClip(text_to_add, fontsize=70, font='Arial-Bold', color='white', 
                         stroke_color='black', stroke_width=4, align='center', 
                         method='caption', size=(video_clip.w * 0.92, None)))
    
    txt_clip = txt_clip.set_position(('center', 250)).set_duration(video_clip.duration)
    return CompositeVideoClip([video_clip, txt_clip])

  • Add Background Music

def add_background_music(i, video_clip):
    """Adds music to the video, starting from the point where sound starts in the audio file."""
    if i < len(music_files):
        selected_music = music_files[i]
        audio_background = AudioFileClip(selected_music)

        # Create the audio clip starting from the sound_start_time, matching the video's duration
        audio_background = audio_background.subclip(0, 0 + video_clip.duration)

        # Set the audio to the video
        return video_clip.set_audio(audio_background)

    return video_clip  # Return the original video if no music files are found

  • Main Code

def main():
    # Process and save a video for each background image
    for i, image_file in enumerate(image_files):
        if image_file.endswith(".avif"):
            image_file = convert_avif_to_png(image_file)
        background_img_cv = cv2.imread(image_file)
        background_img_cv = cv2.cvtColor(background_img_cv, cv2.COLOR_BGR2RGB)

        # Background image for green screen replacement
        background_image_bgr = cv2.imread(image_file)  # Example background image
        min_green_area_size = 1000  # Minimum size to avoid noise

        result = process_video_frames(video, background_img_cv, text_to_add[i], min_green_area_size, background_image_bgr)

        # Add background music to the video
        result_with_music = add_background_music(i,result)

        image_name = os.path.splitext(os.path.basename(image_file))[0]
        output_video_path = os.path.join(output_directory, f'localized_output_{image_name}.mp4')

        result_with_music.write_videofile(output_video_path, codec='libx264', threads=4)

if __name__ == '__main__':
    main()

Comments

Be the first one to comment!