0% found this document useful (0 votes)

18 views35 pages

F) Maybe Is Full Script Complet

The document outlines a comprehensive project for removing text from manga and comics using multiple detection methods and inpainting techniques, fully compatible with Google Colab. It includes installation instructions for necessary dependencies, setup for various text detection models such as EasyOCR and PaddleOCR, and advanced detection methods tailored for manga-specific features. The project aims to provide a robust solution for detecting and removing text in comic images effectively.

Uploaded by

bobsviking22

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

18 views35 pages

F) Maybe Is Full Script Complet

Uploaded by

bobsviking22

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 35

# Complete Manga/Comic Text Removal Project

# Advanced solution with multiple detection methods and inpainting techniques

# Fully compatible with Google Colab

import os
import sys
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFilter, ImageEnhance
import torch
import torchvision.transforms as transforms
from pathlib import Path
import requests
import zipfile
import gdown
from typing import List, Tuple, Optional, Dict
import warnings
import json
import time
from tqdm import tqdm
import gc
warnings.filterwarnings('ignore')

# ======================= INSTALLATION SETUP =======================

def install_all_dependencies():
"""Complete dependency installation for Google Colab"""
print("Installing all required packages... This may take a few minutes.")

# Core packages
packages = [
"torch torchvision torchaudio --index-url
https://download.pytorch.org/whl/cu118",
"opencv-python-headless",
"pillow>=9.0.0",
"numpy>=1.21.0",
"matplotlib>=3.5.0",
"tqdm",
"scipy",
"scikit-image",
"scikit-learn"
]

# OCR packages
ocr_packages = [
"easyocr",
"paddlepaddle-gpu" if torch.cuda.is_available() else "paddlepaddle",
"paddleocr>=2.6.0"
]

# AI/ML packages
ai_packages = [
"transformers>=4.20.0",
"diffusers>=0.21.0",
"accelerate>=0.20.0",
"controlnet-aux",
"xformers" if torch.cuda.is_available() else "",
"segment-anything",
"ultralytics>=8.0.0"
]

# Additional utilities
util_packages = [
"imageio",
"imageio-ffmpeg",
"gradio",
"ipywidgets"
]

all_packages = packages + ocr_packages + ai_packages + util_packages

for package in all_packages:

if package: # Skip empty strings
try:
print(f"Installing {package}...")
os.system(f"pip install -q {package}")
except Exception as e:
print(f"Warning: Could not install {package}: {e}")

# Additional setup for specific packages

try:
import nltk
nltk.download('punkt', quiet=True)
except:
pass

print("✅ All dependencies installed successfully!")

# ======================= ADVANCED TEXT DETECTION =======================

class AdvancedTextDetector:
"""Multi-method text detection with manga/comic specialization"""

def __init__(self):
self.setup_all_detectors()
self.detection_cache = {}

def setup_all_detectors(self):
"""Initialize all available text detection methods"""
print("🔧 Setting up text detection models...")

# OCR Readers
self.detectors = {}

# EasyOCR setup
try:
import easyocr
self.detectors['easyocr'] = easyocr.Reader(
['en', 'ja', 'ko', 'zh', 'th', 'vi'],
gpu=torch.cuda.is_available()
)
print("✅ EasyOCR initialized")
except Exception as e:
print(f"⚠️ EasyOCR failed: {e}")

# PaddleOCR setup
try:
from paddleocr import PaddleOCR
self.detectors['paddle_en'] = PaddleOCR(
use_angle_cls=True,
lang='en',
show_log=False,
use_gpu=torch.cuda.is_available()
)
self.detectors['paddle_ch'] = PaddleOCR(
use_angle_cls=True,
lang='ch',
show_log=False,
use_gpu=torch.cuda.is_available()
)
print("✅ PaddleOCR initialized")
except Exception as e:
print(f"⚠️ PaddleOCR failed: {e}")

# CRAFT Text Detection (if available)

try:
self.setup_craft_detector()
except:
print("⚠️ CRAFT detector not available")

# OpenCV-based detectors
self.setup_opencv_detectors()

print(f"✅ Text detection setup complete! Available methods:

{list(self.detectors.keys())}")

def setup_craft_detector(self):
"""Setup CRAFT text detector for better comic text detection"""
try:
# Download CRAFT model if not exists
craft_path = "/content/craft_mlt_25k.pth"
if not os.path.exists(craft_path):
print("Downloading CRAFT model...")
url =
"https://github.com/clovaai/CRAFT-pytorch/releases/download/v1.0/craft_mlt_25k.pth"
os.system(f"wget -q {url} -O {craft_path}")

# Note: Full CRAFT implementation would go here

# For now, we'll use a placeholder
self.detectors['craft'] = None

except Exception as e:
print(f"CRAFT setup failed: {e}")

def setup_opencv_detectors(self):
"""Setup OpenCV-based text detection methods"""
# EAST Text Detector
try:
east_path = "/content/frozen_east_text_detection.pb"
if not os.path.exists(east_path):
print("Downloading EAST model...")
url =
"https://github.com/opencv/opencv_extra/raw/master/testdata/dnn/frozen_east_text_de
tection.pb"
os.system(f"wget -q {url} -O {east_path}")
self.detectors['east'] = cv2.dnn.readNet(east_path)
print("✅ EAST detector initialized")
except Exception as e:
print(f"⚠️ EAST detector failed: {e}")

def detect_text_comprehensive(self, image: np.ndarray,

min_confidence: float = 0.3) -> List[Dict]:
"""
Comprehensive text detection using all available methods

Returns:
List of detection dictionaries with bbox, confidence, method, text
"""
results = []

# Method 1: EasyOCR
if 'easyocr' in self.detectors:
results.extend(self._detect_with_easyocr(image, min_confidence))

# Method 2: PaddleOCR
if 'paddle_en' in self.detectors:
results.extend(self._detect_with_paddle(image, min_confidence))

# Method 3: EAST
if 'east' in self.detectors:
results.extend(self._detect_with_east(image, min_confidence))

# Method 4: OpenCV methods

results.extend(self._detect_with_opencv(image, min_confidence))

# Method 5: Manga-specific detection

results.extend(self._detect_manga_specific(image, min_confidence))

# Merge and filter results

merged_results = self._merge_detections(results)

return merged_results

def _detect_with_easyocr(self, image: np.ndarray, min_confidence: float) ->

List[Dict]:
"""EasyOCR detection"""
results = []
try:
detections = self.detectors['easyocr'].readtext(image)
for bbox, text, confidence in detections:
if confidence >= min_confidence:
bbox_array = np.array(bbox, dtype=np.int32)
x_min, y_min = np.min(bbox_array, axis=0)
x_max, y_max = np.max(bbox_array, axis=0)

results.append({
'bbox': (x_min, y_min, x_max, y_max),
'confidence': confidence,
'method': 'easyocr',
'text': text,
'polygon': bbox
})
except Exception as e:
print(f"EasyOCR detection error: {e}")
return results

def _detect_with_paddle(self, image: np.ndarray, min_confidence: float) ->

List[Dict]:
"""PaddleOCR detection"""
results = []

for lang in ['paddle_en', 'paddle_ch']:

if lang not in self.detectors:
continue

try:
ocr_results = self.detectors[lang].ocr(image, cls=True)
if ocr_results and ocr_results[0]:
for item in ocr_results[0]:
bbox, (text, confidence) = item
if confidence >= min_confidence:
bbox_array = np.array(bbox, dtype=np.int32)
x_min, y_min = np.min(bbox_array, axis=0)
x_max, y_max = np.max(bbox_array, axis=0)

results.append({
'bbox': (x_min, y_min, x_max, y_max),
'confidence': confidence,
'method': lang,
'text': text,
'polygon': bbox
})
except Exception as e:
print(f"{lang} detection error: {e}")

return results

def _detect_with_east(self, image: np.ndarray, min_confidence: float) ->

List[Dict]:
"""EAST detector"""
results = []
try:
if 'east' not in self.detectors:
return results

net = self.detectors['east']
height, width = image.shape[:2]

# Prepare image for EAST

new_height, new_width = 320, 320
ratio_h, ratio_w = height / new_height, width / new_width

blob = cv2.dnn.blobFromImage(image, 1.0, (new_width, new_height),

(123.68, 116.78, 103.94), swapRB=True,
crop=False)

net.setInput(blob)
scores, geometry = net.forward(['feature_fusion/Conv_7/Sigmoid',
'feature_fusion/concat_3'])

# Decode predictions
boxes, confidences = self._decode_east_predictions(scores, geometry,
min_confidence)

# Apply NMS
indices = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, 0.4)

if len(indices) > 0:
for i in indices.flatten():
x, y, w, h = boxes[i]
# Scale back to original image
x = int(x * ratio_w)
y = int(y * ratio_h)
w = int(w * ratio_w)
h = int(h * ratio_h)

results.append({
'bbox': (x, y, x + w, y + h),
'confidence': confidences[i],
'method': 'east',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y + h)]
})

except Exception as e:
print(f"EAST detection error: {e}")

return results

def _decode_east_predictions(self, scores, geometry, min_confidence):

"""Decode EAST model predictions"""
boxes = []
confidences = []

height, width = scores.shape[2:4]

for y in range(height):
scores_data = scores[0, 0, y]
x_data0 = geometry[0, 0, y]
x_data1 = geometry[0, 1, y]
x_data2 = geometry[0, 2, y]
x_data3 = geometry[0, 3, y]
angles_data = geometry[0, 4, y]

for x in range(width):
if scores_data[x] < min_confidence:
continue

offset_x, offset_y = x * 4.0, y * 4.0

angle = angles_data[x]
cos = np.cos(angle)
sin = np.sin(angle)

h = x_data0[x] + x_data2[x]
w = x_data1[x] + x_data3[x]

end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))

end_y = int(offset_y - (sin * x_data1[x]) + (cos * x_data2[x]))
start_x = int(end_x - w)
start_y = int(end_y - h)
boxes.append([start_x, start_y, int(w), int(h)])
confidences.append(float(scores_data[x]))

return boxes, confidences

def _detect_with_opencv(self, image: np.ndarray, min_confidence: float) ->

List[Dict]:
"""OpenCV-based text detection methods"""
results = []

try:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3
else image

# Method 1: MSER (Maximally Stable Extremal Regions)

mser = cv2.MSER_create(
_delta=2,
_min_area=30,
_max_area=8000,
_max_variation=0.25,
_min_diversity=0.2,
_max_evolution=200,
_area_threshold=1.01,
_min_margin=0.003,
_edge_blur_size=5
)

regions, _ = mser.detectRegions(gray)
for region in regions:
if len(region) > 10:
x, y, w, h = cv2.boundingRect(region)
aspect_ratio = w / h if h > 0 else 0
area = w * h

if (0.1 < aspect_ratio < 20 and 100 < area < 10000 and
w > 15 and h > 8):
results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.6,
'method': 'mser',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y +
h)]
})

# Method 2: Contour-based detection

# Apply multiple preprocessing techniques
preprocessed = [
cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2),
cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 15, 4),
cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV +
cv2.THRESH_OTSU)[1]
]

for thresh in preprocessed:

# Morphological operations
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
processed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

contours, _ = cv2.findContours(processed, cv2.RETR_EXTERNAL,

cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:

area = cv2.contourArea(contour)
if 50 < area < 5000:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

if 0.2 < aspect_ratio < 15 and w > 10 and h > 8:

results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.5,
'method': 'contour',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x,
y + h)]
})

except Exception as e:
print(f"OpenCV detection error: {e}")

return results

def _detect_manga_specific(self, image: np.ndarray, min_confidence: float) ->

List[Dict]:
"""Manga/comic specific text detection"""
results = []

try:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3
else image

# Speech bubble detection

results.extend(self._detect_speech_bubbles(gray))

# Sound effect detection (often has different characteristics)

results.extend(self._detect_sound_effects(gray))

# Handwritten text detection

results.extend(self._detect_handwritten_text(gray))

except Exception as e:
print(f"Manga-specific detection error: {e}")

return results

def _detect_speech_bubbles(self, gray: np.ndarray) -> List[Dict]:

"""Detect speech bubbles and text within them"""
results = []

try:
# Use HoughCircles to detect circular/oval speech bubbles
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 50,
param1=50, param2=30, minRadius=20,
maxRadius=200)
if circles is not None:
circles = np.round(circles[0, :]).astype("int")
for (x, y, r) in circles:
# Create bounding box around circle
bbox = (max(0, x - r), max(0, y - r),
min(gray.shape[1], x + r), min(gray.shape[0], y + r))

results.append({
'bbox': bbox,
'confidence': 0.4,
'method': 'speech_bubble',
'text': '',
'polygon': [(bbox[0], bbox[1]), (bbox[2], bbox[1]),
(bbox[2], bbox[3]), (bbox[0], bbox[3])]
})

# Detect rectangular speech bubbles

# Apply edge detection
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
kernel = np.ones((3, 3), np.uint8)
edges = cv2.dilate(edges, kernel, iterations=1)

contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL,

cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:

area = cv2.contourArea(contour)
if 500 < area < 20000: # Size filter for speech bubbles
# Approximate contour
epsilon = 0.02 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)

if len(approx) >= 4: # Roughly rectangular

x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

if 0.3 < aspect_ratio < 5: # Reasonable aspect ratio

results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.5,
'method': 'rect_bubble',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x,
y + h)]
})

except Exception as e:
print(f"Speech bubble detection error: {e}")

return results

def _detect_sound_effects(self, gray: np.ndarray) -> List[Dict]:

"""Detect sound effects text (often stylized)"""
results = []

try:
# Sound effects often have bold, stylized text
# Use different morphological operations
kernel_large = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
kernel_small = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))

# Apply tophat transform to detect bright text on dark background

tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, kernel_large)

# Apply blackhat transform to detect dark text on bright background

blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel_large)

# Combine both
combined = cv2.add(tophat, blackhat)

# Threshold
_, thresh = cv2.threshold(combined, 10, 255, cv2.THRESH_BINARY)

# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:

area = cv2.contourArea(contour)
if 100 < area < 8000:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

# Sound effects can have more varied aspect ratios

if 0.1 < aspect_ratio < 20 and w > 20 and h > 15:
results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.4,
'method': 'sound_effect',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y +
h)]
})

except Exception as e:
print(f"Sound effect detection error: {e}")

return results

def _detect_handwritten_text(self, gray: np.ndarray) -> List[Dict]:

"""Detect handwritten text areas"""
results = []

try:
# Handwritten text often has more irregular patterns
# Use gradient-based detection
grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)

magnitude = np.sqrt(grad_x2 + grad_y2)

magnitude = np.uint8(magnitude / magnitude.max() * 255)

# Apply threshold
_, thresh = cv2.threshold(magnitude, 30, 255, cv2.THRESH_BINARY)

# Morphological operations
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:

area = cv2.contourArea(contour)
if 200 < area < 5000:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

if 0.3 < aspect_ratio < 8 and w > 25 and h > 15:

results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.35,
'method': 'handwritten',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y +
h)]
})

except Exception as e:
print(f"Handwritten text detection error: {e}")

return results

def _merge_detections(self, detections: List[Dict]) -> List[Dict]:

"""Merge overlapping detections from different methods"""
if not detections:
return []

# Sort by confidence
detections.sort(key=lambda x: x['confidence'], reverse=True)

merged = []
used = set()

for i, detection in enumerate(detections):

if i in used:
continue

current = detection.copy()
current_bbox = detection['bbox']

# Find overlapping detections

overlaps = []
for j, other in enumerate(detections[i+1:], i+1):
if j in used:
continue

iou = self._calculate_iou(current_bbox, other['bbox'])

if iou > 0.3: # Overlap threshold
overlaps.append(j)

# Merge overlapping detections

if overlaps:
all_bboxes = [current_bbox] + [detections[j]['bbox'] for j in
overlaps]
merged_bbox = self._merge_bboxes(all_bboxes)
current['bbox'] = merged_bbox
# Update polygon
x1, y1, x2, y2 = merged_bbox
current['polygon'] = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]

# Combine methods
methods = [current['method']] + [detections[j]['method'] for j in
overlaps]
current['method'] = '+'.join(set(methods))

# Use highest confidence

confidences = [current['confidence']] + [detections[j]
['confidence'] for j in overlaps]
current['confidence'] = max(confidences)

# Mark as used
used.update(overlaps)

merged.append(current)
used.add(i)

return merged

def _calculate_iou(self, bbox1: Tuple, bbox2: Tuple) -> float:

"""Calculate Intersection over Union of two bounding boxes"""
x1_1, y1_1, x2_1, y2_1 = bbox1
x1_2, y1_2, x2_2, y2_2 = bbox2

# Calculate intersection
x1_int = max(x1_1, x1_2)
y1_int = max(y1_1, y1_2)
x2_int = min(x2_1, x2_2)
y2_int = min(y2_1, y2_2)

if x2_int <= x1_int or y2_int <= y1_int:

return 0.0

intersection = (x2_int - x1_int) * (y2_int - y1_int)

# Calculate union
area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
union = area1 + area2 - intersection

return intersection / union if union > 0 else 0.0

def _merge_bboxes(self, bboxes: List[Tuple]) -> Tuple:

"""Merge multiple bounding boxes into one"""
x1_min = min(bbox[0] for bbox in bboxes)
y1_min = min(bbox[1] for bbox in bboxes)
x2_max = max(bbox[2] for bbox in bboxes)
y2_max = max(bbox[3] for bbox in bboxes)

return (x1_min, y1_min, x2_max, y2_max)

# ======================= ADVANCED INPAINTING =======================

class AdvancedInpainter:
"""Multi-method inpainting with quality optimization"""
def __init__(self):
self.setup_inpainting_models()
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def setup_inpainting_models(self):
"""Setup all available inpainting methods"""
print("🔧 Setting up inpainting models...")

self.inpainters = {}

# Stable Diffusion Inpainting

try:
from diffusers import StableDiffusionInpaintPipeline, DiffusionPipeline

model_id = "runwayml/stable-diffusion-inpainting"
self.inpainters['sd'] = StableDiffusionInpaintPipeline.from_pretrained(
model_id,
torch_dtype=torch.float16 if torch.cuda.is_available() else
torch.float32,
variant="fp16" if torch.cuda.is_available() else None,
use_safetensors=True
).to(self.device)

# Enable optimizations
if torch.cuda.is_available():
self.inpainters['sd'].enable_attention_slicing()
self.inpainters['sd'].enable_model_cpu_offload()
try:

self.inpainters['sd'].enable_xformers_memory_efficient_attention()
except:
pass

print("✅ Stable Diffusion inpainting loaded")

except Exception as e:
print(f"⚠️ Stable Diffusion loading failed: {e}")

# MAT (Mask-Aware Transformer) - if available

try:
self.setup_mat_inpainter()
except:
print("⚠️ MAT inpainter not available")

# LaMa (Large Mask Inpainting) - if available

try:
self.setup_lama_inpainter()
except:
print("⚠️ LaMa inpainter not available")

print(f"✅ Inpainting setup complete! Available methods:

{list(self.inpainters.keys())}")

def setup_mat_inpainter(self):
"""Setup MAT (Mask-Aware Transformer) inpainter"""
# Placeholder for MAT implementation
# Would require downloading MAT model weights
pass
def setup_lama_inpainter(self):
"""Setup LaMa (Large Mask Inpainting) inpainter"""
# Placeholder for LaMa implementation
# Would require downloading LaMa model weights
pass

def inpaint_comprehensive(self, image: np.ndarray, mask: np.ndarray,

method: str = 'auto') -> np.ndarray:
"""
Comprehensive inpainting using multiple methods

Args:
image: Input image (H, W, 3)
mask: Binary mask (H, W) where 255 = inpaint area
method: 'auto', 'sd', 'opencv', 'telea', 'ns', 'edge_connect'

Returns:
Inpainted image
"""
if method == 'auto':
# Choose best method based on mask characteristics
method = self._choose_best_method(image, mask)

print(f"Using inpainting method: {method}")

if method == 'sd' and 'sd' in self.inpainters:

return self._inpaint_with_sd(image, mask)
elif method in ['opencv', 'telea', 'ns']:
return self._inpaint_with_opencv(image, mask, method)
elif method == 'edge_connect':
return self._inpaint_with_edge_connect(image, mask)
elif method == 'patch_match':
return self._inpaint_with_patch_match(image, mask)
else:
# Fallback to OpenCV
return self._inpaint_with_opencv(image, mask, 'telea')

def _choose_best_method(self, image: np.ndarray, mask: np.ndarray) -> str:

"""Choose best inpainting method based on image and mask characteristics"""
mask_area = np.sum(mask > 0)
total_area = mask.shape[0] * mask.shape[1]
mask_ratio = mask_area / total_area

# Analyze mask complexity

contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
num_regions = len(contours)

if mask_ratio > 0.3: # Large areas

return 'sd' if 'sd' in self.inpainters else 'edge_connect'
elif num_regions > 10: # Many small regions
return 'telea'
elif mask_ratio > 0.1: # Medium areas
return 'sd' if 'sd' in self.inpainters else 'ns'
else: # Small areas
return 'telea'

def _inpaint_with_sd(self, image: np.ndarray, mask: np.ndarray) -> np.ndarray:

"""Inpaint using Stable Diffusion"""
try:
# Convert to PIL
pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
pil_mask = Image.fromarray(mask)

# Resize if too large

max_size = 512
if max(pil_image.size) > max_size:
ratio = max_size / max(pil_image.size)
new_size = (int(pil_image.width * ratio), int(pil_image.height *
ratio))
pil_image = pil_image.resize(new_size, Image.LANCZOS)
pil_mask = pil_mask.resize(new_size, Image.NEAREST)

# Generate prompt for manga/comic style

prompt = "high quality manga artwork, clean background, detailed
illustration, professional comic art"
negative_prompt = "text, letters, words, writing, low quality, blurry,
distorted"

# Inpaint
result = self.inpainters['sd'](
prompt=prompt,
negative_prompt=negative_prompt,
image=pil_image,
mask_image=pil_mask,
num_inference_steps=25,
guidance_scale=7.5,
strength=0.8
).images[0]

# Convert back to numpy

result_np = np.array(result)

# Resize back if needed

if result_np.shape[:2] != image.shape[:2]:
result_np = cv2.resize(result_np, (image.shape[1], image.shape[0]))

return cv2.cvtColor(result_np, cv2.COLOR_RGB2BGR)

except Exception as e:
print(f"SD inpainting failed: {e}")
return self._inpaint_with_opencv(image, mask, 'telea')

def _inpaint_with_opencv(self, image: np.ndarray, mask: np.ndarray,

method: str = 'telea') -> np.ndarray:
"""Inpaint using OpenCV methods"""
try:
if method == 'telea':
result = cv2.inpaint(image, mask, 3, cv2.INPAINT_TELEA)
elif method == 'ns':
result = cv2.inpaint(image, mask, 3, cv2.INPAINT_NS)
else:
# Default to Telea
result = cv2.inpaint(image, mask, 3, cv2.INPAINT_TELEA)

return result
except Exception as e:
print(f"OpenCV inpainting failed: {e}")
return image

def _inpaint_with_edge_connect(self, image: np.ndarray, mask: np.ndarray) ->

np.ndarray:
"""Edge-aware inpainting using structure completion"""
try:
# Convert to grayscale for edge detection
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Detect edges
edges = cv2.Canny(gray, 50, 150)

# Dilate edges to connect nearby edges

kernel = np.ones((3, 3), np.uint8)
edges = cv2.dilate(edges, kernel, iterations=1)

# Create edge mask (invert the text mask)

edge_mask = cv2.bitwise_not(mask)
edges = cv2.bitwise_and(edges, edge_mask)

# Inpaint edges first

edge_inpainted = cv2.inpaint(edges, mask, 3, cv2.INPAINT_TELEA)

# Use edge information to guide image inpainting

# Create a weighted combination
result = image.copy()

# Apply multiple OpenCV inpainting methods and blend

inpaint1 = cv2.inpaint(image, mask, 3, cv2.INPAINT_TELEA)
inpaint2 = cv2.inpaint(image, mask, 7, cv2.INPAINT_NS)

# Blend results
alpha = 0.6
result = cv2.addWeighted(inpaint1, alpha, inpaint2, 1-alpha, 0)

return result

except Exception as e:
print(f"Edge connect inpainting failed: {e}")
return self._inpaint_with_opencv(image, mask, 'telea')

def _inpaint_with_patch_match(self, image: np.ndarray, mask: np.ndarray) ->

np.ndarray:
"""Patch-based inpainting using PatchMatch algorithm"""
try:
# Simple patch-based inpainting implementation
result = image.copy()

# Find mask boundaries

contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:

# Get bounding box
x, y, w, h = cv2.boundingRect(contour)

# Extract region
region = image[y:y+h, x:x+w]
region_mask = mask[y:y+h, x:x+w]

# Simple patch-based filling

filled_region = self._fill_region_with_patches(region, region_mask)

# Blend back
result[y:y+h, x:x+w] = filled_region

return result

except Exception as e:
print(f"Patch match inpainting failed: {e}")
return self._inpaint_with_opencv(image, mask, 'telea')

def _fill_region_with_patches(self, region: np.ndarray, mask: np.ndarray,

patch_size: int = 9) -> np.ndarray:
"""Fill masked region using patch matching"""
result = region.copy()

# Find pixels to fill

mask_coords = np.where(mask > 0)

for i, (y, x) in enumerate(zip(mask_coords[0], mask_coords[1])):

# Skip if already filled
if mask[y, x] == 0:
continue

# Find best matching patch

best_patch = self._find_best_patch(region, mask, x, y, patch_size)

if best_patch is not None:

# Fill the pixel
result[y, x] = best_patch

return result

def _find_best_patch(self, image: np.ndarray, mask: np.ndarray,

x: int, y: int, patch_size: int) -> Optional[np.ndarray]:
"""Find best matching patch for a pixel"""
half_size = patch_size // 2

# Get patch around target pixel

y1, y2 = max(0, y - half_size), min(image.shape[0], y + half_size + 1)
x1, x2 = max(0, x - half_size), min(image.shape[1], x + half_size + 1)

target_patch = image[y1:y2, x1:x2]

target_mask = mask[y1:y2, x1:x2]

# Find valid pixels in the patch (not masked)

valid_pixels = target_mask == 0

if not np.any(valid_pixels):
return None

best_match = None
best_score = float('inf')

# Search for similar patches in the image

for sy in range(half_size, image.shape[0] - half_size):
for sx in range(half_size, image.shape[1] - half_size):
# Skip if in masked area
if mask[sy, sx] > 0:
continue

# Get candidate patch

cy1, cy2 = sy - half_size, sy + half_size + 1
cx1, cx2 = sx - half_size, sx + half_size + 1

candidate_patch = image[cy1:cy2, cx1:cx2]

# Calculate similarity only for valid pixels

if candidate_patch.shape == target_patch.shape:
diff = np.sum((candidate_patch[valid_pixels] -
target_patch[valid_pixels]) ** 2)

if diff < best_score:

best_score = diff
best_match = candidate_patch[half_size, half_size]

return best_match

# ======================= MAIN PROCESSING CLASS =======================

class MangaTextRemover:
"""Main class for comprehensive manga/comic text removal"""

def __init__(self):
self.detector = AdvancedTextDetector()
self.inpainter = AdvancedInpainter()
self.processing_stats = {}

def process_image(self, image_path: str, output_path: str = None,

detection_confidence: float = 0.3,
inpaint_method: str = 'auto',
expand_mask: int = 5,
show_process: bool = True) -> Dict:
"""
Complete text removal process

Args:
image_path: Path to input image
output_path: Path for output image (optional)
detection_confidence: Minimum confidence for text detection
inpaint_method: Inpainting method to use
expand_mask: Pixels to expand mask around detected text
show_process: Whether to show processing steps

Returns:
Dictionary with processing results and statistics
"""
print(f"🎯 Processing image: {image_path}")
start_time = time.time()

# Load image
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Could not load image: {image_path}")
original_image = image.copy()

# Step 1: Text Detection

print("📍 Step 1: Detecting text regions...")
detections = self.detector.detect_text_comprehensive(image,
detection_confidence)

print(f"✅ Found {len(detections)} text regions")

# Step 2: Create comprehensive mask

print("🎨 Step 2: Creating inpainting mask...")
mask = self._create_comprehensive_mask(image, detections, expand_mask)

# Step 3: Inpainting
print("🔄 Step 3: Removing text and inpainting...")
result = self.inpainter.inpaint_comprehensive(image, mask, inpaint_method)

# Step 4: Post-processing
print("✨ Step 4: Post-processing...")
result = self._post_process_result(original_image, result, mask)

# Step 5: Save result

if output_path:
cv2.imwrite(output_path, result)
print(f"💾 Saved result to: {output_path}")

# Calculate statistics
processing_time = time.time() - start_time
stats = {
'detections_count': len(detections),
'processing_time': processing_time,
'mask_area_ratio': np.sum(mask > 0) / (mask.shape[0] * mask.shape[1]),
'detection_methods': list(set([d['method'] for d in detections])),
'inpaint_method': inpaint_method,
'image_size': image.shape[:2]
}

if show_process:
self._show_processing_results(original_image, detections, mask, result,
stats)

return {
'result': result,
'original': original_image,
'mask': mask,
'detections': detections,
'stats': stats
}

def _create_comprehensive_mask(self, image: np.ndarray, detections: List[Dict],

expand: int = 5) -> np.ndarray:
"""Create comprehensive mask from all detections"""
mask = np.zeros(image.shape[:2], dtype=np.uint8)

for detection in detections:

bbox = detection['bbox']
x1, y1, x2, y2 = bbox
# Expand bounding box
x1 = max(0, x1 - expand)
y1 = max(0, y1 - expand)
x2 = min(image.shape[1], x2 + expand)
y2 = min(image.shape[0], y2 + expand)

# Add to mask
mask[y1:y2, x1:x2] = 255

# Morphological operations to clean up mask

kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)

return mask

def _post_process_result(self, original: np.ndarray, result: np.ndarray,

mask: np.ndarray) -> np.ndarray:
"""Post-process the inpainting result"""
# Blend edges for seamless integration
blurred_mask = cv2.GaussianBlur(mask.astype(np.float32), (5, 5), 0) / 255.0
blurred_mask = np.stack([blurred_mask] * 3, axis=2)

# Smooth transition
final_result = (result * blurred_mask + original * (1 -
blurred_mask)).astype(np.uint8)

# Color correction
final_result = self._match_color_distribution(original, final_result, mask)

# Sharpening
final_result = self._apply_sharpening(final_result)

return final_result

def _match_color_distribution(self, original: np.ndarray, result: np.ndarray,

mask: np.ndarray) -> np.ndarray:
"""Match color distribution between original and result"""
try:
# Get non-masked areas for reference
non_masked = mask == 0

for channel in range(3):

orig_channel = original[:, :, channel][non_masked]
result_channel = result[:, :, channel]

# Calculate statistics
orig_mean = np.mean(orig_channel)
orig_std = np.std(orig_channel)
result_mean = np.mean(result_channel)
result_std = np.std(result_channel)

# Adjust result to match original statistics

if result_std > 0:
result[:, :, channel] = (
(result_channel - result_mean) * (orig_std / result_std) +
orig_mean
).clip(0, 255)
return result.astype(np.uint8)

except Exception as e:
print(f"Color matching failed: {e}")
return result

def _apply_sharpening(self, image: np.ndarray, strength: float = 0.5) ->

np.ndarray:
"""Apply subtle sharpening to the result"""
try:
# Unsharp mask
blurred = cv2.GaussianBlur(image, (0, 0), 1.0)
sharpened = cv2.addWeighted(image, 1.0 + strength, blurred, -strength,
0)

return sharpened.clip(0, 255).astype(np.uint8)

except Exception as e:
print(f"Sharpening failed: {e}")
return image

def _show_processing_results(self, original: np.ndarray, detections:

List[Dict],
mask: np.ndarray, result: np.ndarray, stats: Dict):
"""Display processing results"""
# Create visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Original with detections

img_with_detections = original.copy()
for detection in detections:
bbox = detection['bbox']
x1, y1, x2, y2 = bbox
cv2.rectangle(img_with_detections, (x1, y1), (x2, y2), (0, 255, 0), 2)

axes[0, 0].imshow(cv2.cvtColor(img_with_detections, cv2.COLOR_BGR2RGB))

axes[0, 0].set_title(f'Original + Detections ({len(detections)} regions)')
axes[0, 0].axis('off')

# Mask
axes[0, 1].imshow(mask, cmap='gray')
axes[0, 1].set_title('Inpainting Mask')
axes[0, 1].axis('off')

# Result
axes[1, 0].imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
axes[1, 0].set_title('Text Removed Result')
axes[1, 0].axis('off')

# Comparison
comparison = np.hstack([
cv2.cvtColor(original, cv2.COLOR_BGR2RGB),
cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
])
axes[1, 1].imshow(comparison)
axes[1, 1].set_title('Before vs After')
axes[1, 1].axis('off')

plt.tight_layout()
plt.show()

# Print statistics
print(f"\n📊 Processing Statistics:")
print(f" • Text regions detected: {stats['detections_count']}")
print(f" • Processing time: {stats['processing_time']:.2f} seconds")
print(f" • Mask area ratio: {stats['mask_area_ratio']:.1%}")
print(f" • Detection methods: {', '.join(stats['detection_methods'])}")
print(f" • Inpainting method: {stats['inpaint_method']}")
print(f" • Image size: {stats['image_size'][1]}x{stats['image_size']
[0]}")

# ======================= BATCH PROCESSING =======================

class BatchProcessor:
"""Batch processing for multiple images"""

def __init__(self):
self.remover = MangaTextRemover()

def process_folder(self, input_folder: str, output_folder: str,

image_extensions: List[str] = None,
**kwargs) -> Dict:
"""Process all images in a folder"""
if image_extensions is None:
image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp']

# Create output folder

os.makedirs(output_folder, exist_ok=True)

# Find all images

image_files = []
for ext in image_extensions:
image_files.extend(Path(input_folder).glob(f'*{ext}'))
image_files.extend(Path(input_folder).glob(f'*{ext.upper()}'))

print(f"Found {len(image_files)} images to process")

results = {}
failed = []

for image_file in tqdm(image_files, desc="Processing images"):

try:
# Generate output path
output_path = os.path.join(output_folder,
f"cleaned_{image_file.name}")

# Process image
result = self.remover.process_image(
str(image_file),
output_path,
show_process=False,
**kwargs
)

results[str(image_file)] = result['stats']

except Exception as e:
print(f"❌ Failed to process {image_file}: {e}")
failed.append(str(image_file))

return {
'processed': len(results),
'failed': len(failed),
'failed_files': failed,
'results': results
}

# ======================= EASY-TO-USE FUNCTIONS =======================

def setup_environment():
"""One-click setup for Google Colab"""
print("🚀 Setting up Manga Text Removal environment...")
install_all_dependencies()
print("✅ Environment setup complete!")

def remove_text_from_image(image_path: str, output_path: str = None,

confidence: float = 0.3) -> str:
"""
Simple function to remove text from a single image

Args:
image_path: Path to input image
output_path: Path for output (optional)
confidence: Detection confidence threshold

Returns:
Path to output image
"""
if output_path is None:
name, ext = os.path.splitext(image_path)
output_path = f"{name}_no_text{ext}"

remover = MangaTextRemover()
result = remover.process_image(image_path, output_path, confidence)

return output_path

def remove_text_from_folder(input_folder: str, output_folder: str = None,

confidence: float = 0.3) -> Dict:
"""
Simple function to remove text from all images in a folder

Args:
input_folder: Path to input folder
output_folder: Path to output folder (optional)
confidence: Detection confidence threshold

Returns:
Processing statistics
"""
if output_folder is None:
output_folder = f"{input_folder}_cleaned"

processor = BatchProcessor()
return processor.process_folder(input_folder, output_folder,
detection_confidence=confidence)
# ======================= USAGE EXAMPLES =======================

def demo_usage():
"""Demonstrate how to use the system"""
print("""
🎯 Manga/Comic Text Removal System - Usage Examples

1. Setup (run once):

setup_environment()

2. Process single image:

remove_text_from_image('manga_page.jpg', 'clean_manga.jpg')

3. Process folder:
stats = remove_text_from_folder('manga_folder/', 'clean_manga_folder/')

4. Advanced usage:
remover = MangaTextRemover()
result = remover.process_image('image.jpg', confidence=0.4)

5. Batch processing:
processor = BatchProcessor()
stats = processor.process_folder('input/', 'output/')

📝 Tips:
- Lower confidence (0.1-0.3) detects more text but may have false positives
- Higher confidence (0.4-0.8) is more selective but may miss some text
- Use 'sd' inpainting method for best quality (requires GPU)
- Use 'telea' or 'ns' for faster processing
""")

# ======================= GOOGLE COLAB HELPERS =======================

def upload_and_process():
"""Helper function for Google Colab file upload"""
try:
from google.colab import files

# Upload files
print("📤 Please select image files to upload:")
uploaded = files.upload()

results = []

for filename in uploaded.keys():

print(f"\n🔄 Processing {filename}...")

# Process the image

output_path = f"cleaned_{filename}"
remover = MangaTextRemover()
result = remover.process_image(filename, output_path)

results.append({
'input': filename,
'output': output_path,
'stats': result['stats']
})

# Download results
print("\n📥 Download processed images:")
for result in results:
files.download(result['output'])

return results

except ImportError:
print("This function is only available in Google Colab")
return None

def create_gradio_interface():
"""Create Gradio web interface for easy use"""
try:
import gradio as gr

def process_image_gradio(image, confidence, inpaint_method):

"""Gradio processing function"""
if image is None:
return None, "Please upload an image"

# Save uploaded image

temp_input = "temp_input.jpg"
temp_output = "temp_output.jpg"

# Convert PIL to CV2 and save

cv2.imwrite(temp_input, cv2.cvtColor(np.array(image),
cv2.COLOR_RGB2BGR))

try:
# Process
remover = MangaTextRemover()
result = remover.process_image(
temp_input,
temp_output,
detection_confidence=confidence,
inpaint_method=inpaint_method,
show_process=False
)

# Load result
result_image = Image.open(temp_output)

# Create stats text

stats = result['stats']
stats_text = f"""
Detected {stats['detections_count']} text regions
Processing time: {stats['processing_time']:.2f} seconds
Methods used: {', '.join(stats['detection_methods'])}
Inpainting: {stats['inpaint_method']}
"""

return result_image, stats_text

except Exception as e:
return None, f"Error: {str(e)}"

# Create interface
interface = gr.Interface(
fn=process_image_gradio,
inputs=[
gr.Image(type="pil", label="Upload Manga/Comic Image"),
gr.Slider(0.1, 0.9, value=0.3, label="Detection Confidence"),
gr.Dropdown(
["auto", "sd", "telea", "ns", "edge_connect"],
value="auto",
label="Inpainting Method"
)
],
outputs=[
gr.Image(type="pil", label="Text Removed"),
gr.Textbox(label="Processing Stats")
],
title="Manga/Comic Text Removal",
description="Upload a manga or comic image to automatically detect and
remove text while preserving the artwork."
)

return interface

except ImportError:
print("Gradio not available. Install with: pip install gradio")
return None

# ======================= MAIN EXECUTION =======================

if __name__ == "__main__":
# Show usage information
demo_usage()

print("\n" + "="*50)
print("🎯 MANGA/COMIC TEXT REMOVAL SYSTEM READY!")
print("="*50)

# Check if running in Colab

try:
import google.colab
print("📱 Google Colab detected!")
print("Run setup_environment() to install dependencies")
print("Run upload_and_process() for easy file processing")

# Auto-setup if requested
setup_choice = input("\nSetup environment now? (y/n): ").lower()
if setup_choice == 'y':
setup_environment()

# Offer to create Gradio interface

gradio_choice = input("\nCreate web interface? (y/n): ").lower()
if gradio_choice == 'y':
interface = create_gradio_interface()
if interface:
interface.launch(share=True)

except ImportError:
print("💻 Running in local environment")
print("Make sure all dependencies are installed:")
print("pip install opencv-python pillow numpy matplotlib tqdm easyocr
pytesseract transformers torch diffusers")
# Offer local setup
local_choice = input("\nInstall dependencies now? (y/n): ").lower()
if local_choice == 'y':
install_all_dependencies()

# ======================= ADDITIONAL UTILITY FUNCTIONS =======================

def quick_test():
"""Quick test function to verify everything works"""
print("🧪 Running quick system test...")

# Create a test image with text

test_image = np.ones((400, 600, 3), dtype=np.uint8) * 255

# Add some test text using OpenCV

cv2.putText(test_image, "TEST TEXT", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 2,
(0, 0, 0), 3)
cv2.putText(test_image, "Sample Manga Text", (50, 200),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
cv2.rectangle(test_image, (40, 250), (300, 320), (0, 0, 0), 2)
cv2.putText(test_image, "Speech Bubble", (60, 290), cv2.FONT_HERSHEY_SIMPLEX,
1, (0, 0, 0), 2)

# Save test image

cv2.imwrite("test_manga.jpg", test_image)

try:
# Test the system
remover = MangaTextRemover()
result = remover.process_image("test_manga.jpg", "test_result.jpg",
show_process=True)

print("✅ System test passed!")

print(f"Detected {result['stats']['detections_count']} text regions")

# Clean up
if os.path.exists("test_manga.jpg"):
os.remove("test_manga.jpg")

return True

except Exception as e:
print(f"❌ System test failed: {e}")
return False

def optimize_for_colab():
"""Optimize settings for Google Colab environment"""
print("⚙️ Optimizing for Google Colab...")

# Set environment variables for better performance

os.environ['OPENCV_IO_MAX_IMAGE_PIXELS'] = str(2**63-1)
os.environ['PYTHONWARNINGS'] = 'ignore'

# Configure matplotlib for Colab

plt.rcParams['figure.max_open_warning'] = 50

# Memory optimization
import gc
gc.collect()
print("✅ Optimization complete!")

def create_sample_images():
"""Create sample manga-style images for testing"""
print("🎨 Creating sample images for testing...")

samples_dir = "sample_images"
os.makedirs(samples_dir, exist_ok=True)

# Sample 1: Simple text on white background

img1 = np.ones((300, 400, 3), dtype=np.uint8) * 255
cv2.putText(img1, "Hello World!", (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,
0, 0), 2)
cv2.imwrite(f"{samples_dir}/sample1_simple.jpg", img1)

# Sample 2: Speech bubble simulation

img2 = np.ones((400, 500, 3), dtype=np.uint8) * 240
# Create bubble shape
cv2.ellipse(img2, (250, 150), (180, 80), 0, 0, 360, (255, 255, 255), -1)
cv2.ellipse(img2, (250, 150), (180, 80), 0, 0, 360, (0, 0, 0), 2)
cv2.putText(img2, "This is a", (180, 135), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,
0, 0), 2)
cv2.putText(img2, "speech bubble!", (160, 165), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
(0, 0, 0), 2)
cv2.imwrite(f"{samples_dir}/sample2_bubble.jpg", img2)

# Sample 3: Multiple text regions

img3 = np.ones((500, 600, 3), dtype=np.uint8) * 245
# Add various text elements
cv2.putText(img3, "MANGA TITLE", (150, 60), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,
0, 0), 3)
cv2.rectangle(img3, (50, 100), (300, 180), (255, 255, 255), -1)
cv2.rectangle(img3, (50, 100), (300, 180), (0, 0, 0), 2)
cv2.putText(img3, "Character says:", (70, 130), cv2.FONT_HERSHEY_SIMPLEX, 0.6,
(0, 0, 0), 1)
cv2.putText(img3, "Something important!", (70, 155), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (0, 0, 0), 2)
# Sound effect
cv2.putText(img3, "BOOM!", (400, 250), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0,
0), 3)
cv2.imwrite(f"{samples_dir}/sample3_complex.jpg", img3)

print(f"✅ Created 3 sample images in '{samples_dir}/' folder")

return samples_dir

def benchmark_methods():
"""Benchmark different detection and inpainting methods"""
print(" Benchmarking detection and inpainting methods...")

# Create test images

samples_dir = create_sample_images()

results = {}

# Test each sample

for sample_file in os.listdir(samples_dir):
if sample_file.endswith('.jpg'):
sample_path = os.path.join(samples_dir, sample_file)
print(f"\n🔍 Testing {sample_file}...")

file_results = {}

# Test different confidence levels

for confidence in [0.1, 0.3, 0.5, 0.7]:
print(f" Confidence {confidence}...")

start_time = time.time()

try:
remover = MangaTextRemover()
result = remover.process_image(
sample_path,
f"benchmark_{sample_file}_{confidence}.jpg",
detection_confidence=confidence,
show_process=False
)

processing_time = time.time() - start_time

file_results[confidence] = {
'detections': result['stats']['detections_count'],
'time': processing_time,
'methods': result['stats']['detection_methods']
}

except Exception as e:
file_results[confidence] = {'error': str(e)}

results[sample_file] = file_results

# Print benchmark results

print("\n📊 BENCHMARK RESULTS:")
print("="*60)

for file_name, file_results in results.items():

print(f"\n{file_name}:")
for confidence, result in file_results.items():
if 'error' in result:
print(f" Confidence {confidence}: ERROR - {result['error']}")
else:
print(f" Confidence {confidence}: {result['detections']}
detections, {result['time']:.2f}s")

return results

def create_advanced_gradio_interface():
"""Create advanced Gradio interface with more options"""
try:
import gradio as gr

def process_with_options(image, confidence, inpaint_method, expand_mask,

show_steps):
"""Advanced processing function with more options"""
if image is None:
return None, None, None, None, "Please upload an image"

# Save uploaded image

temp_input = "temp_input.jpg"
temp_output = "temp_output.jpg"

# Convert PIL to CV2 and save

cv2.imwrite(temp_input, cv2.cvtColor(np.array(image),
cv2.COLOR_RGB2BGR))

try:
# Process
remover = MangaTextRemover()
result = remover.process_image(
temp_input,
temp_output,
detection_confidence=confidence,
inpaint_method=inpaint_method,
expand_mask=expand_mask,
show_process=False
)

# Load result
result_image = Image.open(temp_output)

# Create visualization images

original = result['original']
mask = result['mask']
detections = result['detections']

# Create detection visualization

detection_vis = original.copy()
for detection in detections:
bbox = detection['bbox']
x1, y1, x2, y2 = bbox
cv2.rectangle(detection_vis, (x1, y1), (x2, y2), (0, 255, 0),
2)
# Add confidence text
conf_text = f"{detection['method']}:
{detection['confidence']:.2f}"
cv2.putText(detection_vis, conf_text, (x1, y1-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)

detection_image = Image.fromarray(cv2.cvtColor(detection_vis,
cv2.COLOR_BGR2RGB))
mask_image = Image.fromarray(mask)

# Create side-by-side comparison

comparison = Image.new('RGB', (original.shape[1]*2,
original.shape[0]))
comparison.paste(Image.fromarray(cv2.cvtColor(original,
cv2.COLOR_BGR2RGB)), (0, 0))
comparison.paste(result_image, (original.shape[1], 0))

# Create stats text

stats = result['stats']
stats_text = f"""
📊 Processing Results:
• Detected regions: {stats['detections_count']}
• Processing time: {stats['processing_time']:.2f} seconds
• Detection methods: {', '.join(stats['detection_methods'])}
• Inpainting method: {stats['inpaint_method']}
• Mask area: {stats['mask_area_ratio']:.1%} of image
• Image size: {stats['image_size'][1]}×{stats['image_size'][0]}
"""

return (result_image, detection_image, mask_image, comparison,

stats_text)

except Exception as e:
return None, None, None, None, f"Error: {str(e)}"

# Create interface
interface = gr.Interface(
fn=process_with_options,
inputs=[
gr.Image(type="pil", label="📸 Upload Manga/Comic Image"),
gr.Slider(0.05, 0.95, value=0.3, step=0.05, label="🎯 Detection
Confidence"),
gr.Dropdown(
["auto", "sd", "telea", "ns", "edge_connect", "patch_match"],
value="auto",
label="🎨 Inpainting Method"
),
gr.Slider(1, 20, value=5, step=1, label="📏 Mask Expansion
(pixels)"),
gr.Checkbox(value=False, label=" Show Processing Steps")
],
outputs=[
gr.Image(type="pil", label="✨ Text Removed Result"),
gr.Image(type="pil", label="🔍 Detected Text Regions"),
gr.Image(type="pil", label="🎭 Inpainting Mask"),
gr.Image(type="pil", label="📊 Before/After Comparison"),
gr.Textbox(label="📈 Processing Statistics", lines=8)
],
title="🎯 Advanced Manga/Comic Text Removal",
description="""
Upload a manga or comic image to automatically detect and remove text
while preserving the artwork.

**Tips:**
- Lower confidence detects more text but may include false positives
- 'auto' inpainting method chooses the best approach automatically
- Increase mask expansion for better coverage around text
- Use 'sd' method for highest quality (requires GPU)
""",
examples=[
["sample_images/sample1_simple.jpg", 0.3, "auto", 5, False],
["sample_images/sample2_bubble.jpg", 0.4, "telea", 7, False],
["sample_images/sample3_complex.jpg", 0.2, "auto", 8, False]
] if os.path.exists("sample_images") else None,
theme=gr.themes.Soft(),
allow_flagging="never"
)

return interface

except ImportError:
print("Gradio not available. Install with: pip install gradio")
return None
def create_batch_interface():
"""Create Gradio interface for batch processing"""
try:
import gradio as gr
import zipfile

def process_batch(zip_file, confidence, inpaint_method):

"""Process multiple images from a ZIP file"""
if zip_file is None:
return None, "Please upload a ZIP file containing images"

try:
# Create temp directories
temp_dir = "temp_batch"
output_dir = "batch_output"
os.makedirs(temp_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

# Extract ZIP file

with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
zip_ref.extractall(temp_dir)

# Process all images

processor = BatchProcessor()
results = processor.process_folder(
temp_dir,
output_dir,
detection_confidence=confidence,
inpaint_method=inpaint_method
)

# Create output ZIP

output_zip = "processed_images.zip"
with zipfile.ZipFile(output_zip, 'w') as zip_ref:
for root, dirs, files in os.walk(output_dir):
for file in files:
file_path = os.path.join(root, file)
zip_ref.write(file_path, file)

# Create summary
summary = f"""
📊 Batch Processing Complete!
• Total images processed: {results['processed']}
• Failed images: {results['failed']}
• Success rate: {results['processed']/(results['processed']
+results['failed'])*100:.1f}%

Download the ZIP file to get all processed images.

"""

return output_zip, summary

except Exception as e:
return None, f"Error processing batch: {str(e)}"

interface = gr.Interface(
fn=process_batch,
inputs=[
gr.File(label="📦 Upload ZIP file with images",
file_types=[".zip"]),
gr.Slider(0.1, 0.9, value=0.3, label="🎯 Detection Confidence"),
gr.Dropdown(["auto", "telea", "ns"], value="auto", label="🎨
Inpainting Method")
],
outputs=[
gr.File(label="📥 Download Processed Images"),
gr.Textbox(label="📊 Processing Summary", lines=6)
],
title="📦 Batch Manga Text Removal",
description="Upload a ZIP file containing multiple manga/comic images
for batch processing."
)

return interface

except ImportError:
return None

# ======================= MAIN LAUNCHER =======================

def launch_application():
"""Launch the complete application with all features"""
print("🚀 Launching Manga Text Removal Application...")

# Check environment
gpu_available = check_gpu_availability()

if gpu_available:
print("🎮 GPU acceleration available - using advanced models")
else:
print("💻 Using CPU mode - basic models only")

# Optimize for current environment

optimize_for_colab()

# Create sample images for testing

create_sample_images()

# Run quick test

if quick_test():
print("✅ All systems ready!")

# Create interfaces
basic_interface = create_gradio_interface()
advanced_interface = create_advanced_gradio_interface()
batch_interface = create_batch_interface()

if basic_interface and advanced_interface:

# Create tabbed interface
try:
import gradio as gr

demo = gr.TabbedInterface(
[basic_interface, advanced_interface, batch_interface] if
batch_interface else [basic_interface, advanced_interface],
["⚙️
Basic Mode", " Advanced Mode", "📦 Batch Mode"] if
batch_interface else ["⚙️
Basic Mode", " Advanced Mode"],
title="🎨 Complete Manga/Comic Text Removal System"
)

print("🌐 Launching web interface...")

demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
show_error=True
)

except Exception as e:
print(f"Web interface failed: {e}")
print("You can still use the command-line functions")

else:
print("❌ System test failed. Please check dependencies.")

# ======================= FINAL EXECUTION =======================

if __name__ == "__main__":
# Show usage information
demo_usage()

print("\n" + "="*50)
print("🎯 MANGA/COMIC TEXT REMOVAL SYSTEM READY!")
print("="*50)

# Check if running in Colab

try:
import google.colab
print("📱 Google Colab detected!")
print("Run setup_environment() to install dependencies")
print("Run upload_and_process() for easy file processing")
print("Run launch_application() for full web interface")

# Auto-setup if requested
setup_choice = input("\nSetup environment now? (y/n): ").lower()
if setup_choice == 'y':
setup_environment()

# Offer to launch full application

launch_choice = input("\nLaunch full application? (y/n): ").lower()
if launch_choice == 'y':
launch_application()
else:
# Offer to create basic interface
gradio_choice = input("\nCreate basic web interface? (y/n):
").lower()
if gradio_choice == 'y':
interface = create_gradio_interface()
if interface:
interface.launch(share=True)

except ImportError:
print("💻 Running in local environment")
print("Make sure all dependencies are installed:")
print("pip install opencv-python pillow numpy matplotlib tqdm easyocr
pytesseract transformers torch diffusers gradio")
# Offer local setup
local_choice = input("\nInstall dependencies now? (y/n): ").lower()
if local_choice == 'y':
install_all_dependencies()

# Launch application
launch_choice = input("\nLaunch application? (y/n): ").lower()
if launch_choice == 'y':
launch_application()

print("\n🎉 Setup complete! You can now use these functions:")

print("• remove_text_from_image('image.jpg') - Process single image")
print("• remove_text_from_folder('folder/') - Process folder")
print("• launch_application() - Launch full web interface")
print("• quick_test() - Test the system")
print("• benchmark_methods() - Compare different methods")

# ======================= END OF SCRIPT

C) Le Script But Not Complet Partie 1
No ratings yet
C) Le Script But Not Complet Partie 1
13 pages
4-Channel YOLO Training Guide For RGB+IR Drone Detection
No ratings yet
4-Channel YOLO Training Guide For RGB+IR Drone Detection
22 pages
Image Caption2
No ratings yet
Image Caption2
9 pages
CVDL Tae 63
No ratings yet
CVDL Tae 63
9 pages
EasyOCR Reader Initialization Guide
No ratings yet
EasyOCR Reader Initialization Guide
8 pages
Detect
No ratings yet
Detect
6 pages
Wa0029.
No ratings yet
Wa0029.
11 pages
Detection ORIGINAL
No ratings yet
Detection ORIGINAL
3 pages
Img Proc
No ratings yet
Img Proc
2 pages
DETECTCAMERA
No ratings yet
DETECTCAMERA
3 pages
导入所需库
No ratings yet
导入所需库
20 pages
Yolo Detect
No ratings yet
Yolo Detect
5 pages
DR Basit Assignments
No ratings yet
DR Basit Assignments
13 pages
Demo Inference Note
No ratings yet
Demo Inference Note
15 pages
Python Code 3
No ratings yet
Python Code 3
17 pages
Stable Diffusion Report Updated
No ratings yet
Stable Diffusion Report Updated
19 pages
Word Extraction-1
No ratings yet
Word Extraction-1
2 pages
Aishwarya MiniProjectReport - SC
No ratings yet
Aishwarya MiniProjectReport - SC
6 pages
Hand Written Letter Recognition
No ratings yet
Hand Written Letter Recognition
14 pages
Python Project
No ratings yet
Python Project
2 pages
CD 601 Lab Manual
No ratings yet
CD 601 Lab Manual
61 pages
21bai1724 - Ex 05 06
No ratings yet
21bai1724 - Ex 05 06
18 pages
IndicTrans2 PDF to Punjabi Docx Conversion
No ratings yet
IndicTrans2 PDF to Punjabi Docx Conversion
5 pages
Import Cv2
No ratings yet
Import Cv2
5 pages
Nndlrepo
No ratings yet
Nndlrepo
2 pages
Csc413 Project Semantic Segmentation
No ratings yet
Csc413 Project Semantic Segmentation
84 pages
Codeyolov 5
No ratings yet
Codeyolov 5
16 pages
Font Image Augmentation & Model Training
No ratings yet
Font Image Augmentation & Model Training
78 pages
Huggin Face Code
No ratings yet
Huggin Face Code
3 pages
PR Writing
No ratings yet
PR Writing
21 pages
Implementation of Handwritten Digit Recognizer Using CNN: Vinjit, Bhojak, Kumar and Nikam
No ratings yet
Implementation of Handwritten Digit Recognizer Using CNN: Vinjit, Bhojak, Kumar and Nikam
9 pages
Import cv2
No ratings yet
Import cv2
6 pages
Capstone Project Report (Digit-Recognition Using CNN)
No ratings yet
Capstone Project Report (Digit-Recognition Using CNN)
11 pages
Python Handwriting Recognition Guide
No ratings yet
Python Handwriting Recognition Guide
31 pages
Def Set Random Seed (Seed)
No ratings yet
Def Set Random Seed (Seed)
29 pages
Project Guidelines - AIML
No ratings yet
Project Guidelines - AIML
30 pages
Optical Character Recognition (OCR) in Python
No ratings yet
Optical Character Recognition (OCR) in Python
110 pages
Val
No ratings yet
Val
9 pages
Base Nao Mudar
No ratings yet
Base Nao Mudar
10 pages
Pasted Text 1756308794495
No ratings yet
Pasted Text 1756308794495
32 pages
PyTorch Cheat Sheet & Quick Reference
No ratings yet
PyTorch Cheat Sheet & Quick Reference
6 pages
Final Question1 With Results
No ratings yet
Final Question1 With Results
21 pages
GPU Setup for ML Libraries
No ratings yet
GPU Setup for ML Libraries
1 page
Genai 1,2,3
No ratings yet
Genai 1,2,3
15 pages
Lab 4-Image Segmentation Using U-Net
No ratings yet
Lab 4-Image Segmentation Using U-Net
9 pages
Deep Learning Manual
No ratings yet
Deep Learning Manual
53 pages
CV Ex 7
No ratings yet
CV Ex 7
7 pages
Deep Learning OCR Python Resources
No ratings yet
Deep Learning OCR Python Resources
3 pages
Experimental Pix2pix
No ratings yet
Experimental Pix2pix
5 pages
Extraction of Information From Handwriting Using Optical Character Recognition and Neural Networks
No ratings yet
Extraction of Information From Handwriting Using Optical Character Recognition and Neural Networks
6 pages
Lab Record
No ratings yet
Lab Record
30 pages
Step by Step Process
No ratings yet
Step by Step Process
8 pages
Run 1
No ratings yet
Run 1
57 pages
JJJ
No ratings yet
JJJ
6 pages
Finalised Question 1
No ratings yet
Finalised Question 1
40 pages
LSTM Autoencoder
No ratings yet
LSTM Autoencoder
8 pages
CV Lab Manual
No ratings yet
CV Lab Manual
45 pages
Vit32 GPTMD
No ratings yet
Vit32 GPTMD
6 pages
Grade 9 Social Studies Schemes of Work
No ratings yet
Grade 9 Social Studies Schemes of Work
15 pages
Ucd3138 PFC
No ratings yet
Ucd3138 PFC
27 pages
Marketing Mix
No ratings yet
Marketing Mix
18 pages
Robotics, Monitoring and Control Systems Answers
No ratings yet
Robotics, Monitoring and Control Systems Answers
4 pages
Idcdcsolar Im
No ratings yet
Idcdcsolar Im
2 pages
OTN N50 N70 Flexible Ethernet Transport
No ratings yet
OTN N50 N70 Flexible Ethernet Transport
8 pages
Performance of Novice and Experienced Teachers Using Blended Learning Modality in The Division of Quezon: Basis For Intervention Program
No ratings yet
Performance of Novice and Experienced Teachers Using Blended Learning Modality in The Division of Quezon: Basis For Intervention Program
17 pages
510k Cell Dyn
No ratings yet
510k Cell Dyn
10 pages
Drum and Lyre Action Plan
No ratings yet
Drum and Lyre Action Plan
3 pages
Sustainable Finance Strategy
No ratings yet
Sustainable Finance Strategy
42 pages
0-28V 6-8A Power Supply (LM317, 2N3055)
No ratings yet
0-28V 6-8A Power Supply (LM317, 2N3055)
4 pages
Astro
No ratings yet
Astro
256 pages
Model 63 357 302B Foot Valves
No ratings yet
Model 63 357 302B Foot Valves
4 pages
Yamaha RX-659 AV Receiver Manual
No ratings yet
Yamaha RX-659 AV Receiver Manual
132 pages
Sheikh Zayed & UAE History Quiz
No ratings yet
Sheikh Zayed & UAE History Quiz
8 pages
CLINIMED Atraumix Scissor For Atraumatic Tissue Dissection
No ratings yet
CLINIMED Atraumix Scissor For Atraumatic Tissue Dissection
2 pages
Terminal Report
No ratings yet
Terminal Report
13 pages
Department of Education: A. Access
No ratings yet
Department of Education: A. Access
68 pages
Canon Microfilm Scanner 350ii Owners Manual 587842
No ratings yet
Canon Microfilm Scanner 350ii Owners Manual 587842
62 pages
7 PM Compilation May 2025
No ratings yet
7 PM Compilation May 2025
93 pages
Rse-P Ii Short Manual: W X +12.0 Bar +12.0 Bar
100% (2)
Rse-P Ii Short Manual: W X +12.0 Bar +12.0 Bar
81 pages
Spuds System PDF
No ratings yet
Spuds System PDF
10 pages
Dini Argeo CPWEBELT-MODBUS - 01.00 - 10.10 - EN
No ratings yet
Dini Argeo CPWEBELT-MODBUS - 01.00 - 10.10 - EN
19 pages
DIY Metal Monkey Bars Guide
No ratings yet
DIY Metal Monkey Bars Guide
8 pages
Quick Reference Card Invoice Example Essent
No ratings yet
Quick Reference Card Invoice Example Essent
1 page
Food Manufacturers
100% (2)
Food Manufacturers
15 pages
Add a Node to RAC 11g Guide
No ratings yet
Add a Node to RAC 11g Guide
17 pages
Manual de Voo AS350B3 Arriel2B1 Treinamento
No ratings yet
Manual de Voo AS350B3 Arriel2B1 Treinamento
612 pages
Paper FENkeyactivitiesinthebankingindustry
No ratings yet
Paper FENkeyactivitiesinthebankingindustry
26 pages
MSDS-Agnique SLES 270
No ratings yet
MSDS-Agnique SLES 270
20 pages

F) Maybe Is Full Script Complet

Uploaded by

F) Maybe Is Full Script Complet

Uploaded by

# Complete Manga/Comic Text Removal Project

# Advanced solution with multiple detection methods and inpainting techniques

# ======================= INSTALLATION SETUP =======================

all_packages = packages + ocr_packages + ai_packages + util_packages

for package in all_packages:

# Additional setup for specific packages

print("✅ All dependencies installed successfully!")

# ======================= ADVANCED TEXT DETECTION =======================

# CRAFT Text Detection (if available)

print(f"✅ Text detection setup complete! Available methods:

# Note: Full CRAFT implementation would go here

def detect_text_comprehensive(self, image: np.ndarray,

# Method 4: OpenCV methods

# Method 5: Manga-specific detection

# Merge and filter results

def _detect_with_easyocr(self, image: np.ndarray, min_confidence: float) ->

def _detect_with_paddle(self, image: np.ndarray, min_confidence: float) ->

for lang in ['paddle_en', 'paddle_ch']:

def _detect_with_east(self, image: np.ndarray, min_confidence: float) ->

# Prepare image for EAST

blob = cv2.dnn.blobFromImage(image, 1.0, (new_width, new_height),

def _decode_east_predictions(self, scores, geometry, min_confidence):

height, width = scores.shape[2:4]

offset_x, offset_y = x * 4.0, y * 4.0

end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))

return boxes, confidences

def _detect_with_opencv(self, image: np.ndarray, min_confidence: float) ->

# Method 1: MSER (Maximally Stable Extremal Regions)

# Method 2: Contour-based detection

for thresh in preprocessed:

contours, _ = cv2.findContours(processed, cv2.RETR_EXTERNAL,

for contour in contours:

if 0.2 < aspect_ratio < 15 and w > 10 and h > 8:

def _detect_manga_specific(self, image: np.ndarray, min_confidence: float) ->

# Speech bubble detection

# Sound effect detection (often has different characteristics)

# Handwritten text detection

def _detect_speech_bubbles(self, gray: np.ndarray) -> List[Dict]:

# Detect rectangular speech bubbles

contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL,

for contour in contours:

if len(approx) >= 4: # Roughly rectangular

if 0.3 < aspect_ratio < 5: # Reasonable aspect ratio

def _detect_sound_effects(self, gray: np.ndarray) -> List[Dict]:

# Apply tophat transform to detect bright text on dark background

# Apply blackhat transform to detect dark text on bright background

for contour in contours:

# Sound effects can have more varied aspect ratios

def _detect_handwritten_text(self, gray: np.ndarray) -> List[Dict]:

magnitude = np.sqrt(grad_x**2 + grad_y**2)

for contour in contours:

if 0.3 < aspect_ratio < 8 and w > 25 and h > 15:

def _merge_detections(self, detections: List[Dict]) -> List[Dict]:

for i, detection in enumerate(detections):

# Find overlapping detections

iou = self._calculate_iou(current_bbox, other['bbox'])

# Merge overlapping detections

# Use highest confidence

def _calculate_iou(self, bbox1: Tuple, bbox2: Tuple) -> float:

if x2_int <= x1_int or y2_int <= y1_int:

intersection = (x2_int - x1_int) * (y2_int - y1_int)

return intersection / union if union > 0 else 0.0

def _merge_bboxes(self, bboxes: List[Tuple]) -> Tuple:

return (x1_min, y1_min, x2_max, y2_max)

# ======================= ADVANCED INPAINTING =======================

# Stable Diffusion Inpainting

print("✅ Stable Diffusion inpainting loaded")

# MAT (Mask-Aware Transformer) - if available

# LaMa (Large Mask Inpainting) - if available

print(f"✅ Inpainting setup complete! Available methods:

def inpaint_comprehensive(self, image: np.ndarray, mask: np.ndarray,

print(f"Using inpainting method: {method}")

if method == 'sd' and 'sd' in self.inpainters:

def _choose_best_method(self, image: np.ndarray, mask: np.ndarray) -> str:

# Analyze mask complexity

if mask_ratio > 0.3: # Large areas

magnitude = np.sqrt(grad_x2 + grad_y2)