# Complete Manga/Comic Text Removal Project - Continuation
# Advanced solution with multiple detection methods and inpainting techniques
print(f"✅ Inpainting setup complete! Available methods:
{list(self.inpainters.keys())}")
def setup_mat_inpainter(self):
"""Setup MAT (Mask-Aware Transformer) inpainter"""
# Placeholder for MAT implementation
# Would require downloading MAT model weights
pass
def setup_lama_inpainter(self):
"""Setup LaMa (Large Mask Inpainting) inpainter"""
# Placeholder for LaMa implementation
# Would require downloading LaMa model weights
pass
def inpaint_comprehensive(self, image: np.ndarray, mask: np.ndarray,
method: str = 'auto') -> np.ndarray:
"""
Comprehensive inpainting using multiple methods
Args:
image: Input image (H, W, 3)
mask: Binary mask (H, W) where 255 = inpaint area
method: 'auto', 'sd', 'opencv', 'telea', 'ns', 'edge_connect'
Returns:
Inpainted image
"""
if method == 'auto':
# Choose best method based on mask characteristics
method = self._choose_best_method(image, mask)
print(f"Using inpainting method: {method}")
if method == 'sd' and 'sd' in self.inpainters:
return self._inpaint_with_sd(image, mask)
elif method in ['opencv', 'telea', 'ns']:
return self._inpaint_with_opencv(image, mask, method)
elif method == 'edge_connect':
return self._inpaint_with_edge_connect(image, mask)
elif method == 'patch_match':
return self._inpaint_with_patch_match(image, mask)
else:
# Fallback to OpenCV
return self._inpaint_with_opencv(image, mask, 'telea')
def _choose_best_method(self, image: np.ndarray, mask: np.ndarray) -> str:
"""Choose best inpainting method based on image and mask characteristics"""
mask_area = np.sum(mask > 0)
total_area = mask.shape[0] * mask.shape[1]
mask_ratio = mask_area / total_area
# Analyze mask complexity
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
num_regions = len(contours)
if mask_ratio > 0.3: # Large areas
return 'sd' if 'sd' in self.inpainters else 'edge_connect'
elif num_regions > 10: # Many small regions
return 'telea'
elif mask_ratio > 0.1: # Medium areas
return 'sd' if 'sd' in self.inpainters else 'ns'
else: # Small areas
return 'telea'
def _inpaint_with_sd(self, image: np.ndarray, mask: np.ndarray) -> np.ndarray:
"""Inpaint using Stable Diffusion"""
try:
# Convert to PIL
pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
pil_mask = Image.fromarray(mask)
# Resize if too large
max_size = 512
if max(pil_image.size) > max_size:
ratio = max_size / max(pil_image.size)
new_size = (int(pil_image.width * ratio), int(pil_image.height *
ratio))
pil_image = pil_image.resize(new_size, Image.LANCZOS)
pil_mask = pil_mask.resize(new_size, Image.NEAREST)
# Generate prompt for manga/comic style
prompt = "high quality manga artwork, clean background, detailed
illustration, professional comic art"
negative_prompt = "text, letters, words, writing, low quality, blurry,
distorted"
# Inpaint
result = self.inpainters['sd'](
prompt=prompt,
negative_prompt=negative_prompt,
image=pil_image,
mask_image=pil_mask,
num_inference_steps=25,
guidance_scale=7.5,
strength=0.8
).images[0]
# Convert back to numpy
result_np = np.array(result)
# Resize back if needed
if result_np.shape[:2] != image.shape[:2]:
result_np = cv2.resize(result_np, (image.shape[1], image.shape[0]))
return cv2.cvtColor(result_np, cv2.COLOR_RGB2BGR)
except Exception as e:
print(f"SD inpainting failed: {e}")
return self._inpaint_with_opencv(image, mask, 'telea')
def _inpaint_with_opencv(self, image: np.ndarray, mask: np.ndarray,
method: str = 'telea') -> np.ndarray:
"""Inpaint using OpenCV methods"""
try:
if method == 'telea':
result = cv2.inpaint(image, mask, 3, cv2.INPAINT_TELEA)
elif method == 'ns':
result = cv2.inpaint(image, mask, 3, cv2.INPAINT_NS)
else:
# Default to Telea
result = cv2.inpaint(image, mask, 3, cv2.INPAINT_TELEA)
return result
except Exception as e:
print(f"OpenCV inpainting failed: {e}")
return image
def _inpaint_with_edge_connect(self, image: np.ndarray, mask: np.ndarray) ->
np.ndarray:
"""Edge-aware inpainting using structure completion"""
try:
# Convert to grayscale for edge detection
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Detect edges
edges = cv2.Canny(gray, 50, 150)
# Dilate edges to connect nearby edges
kernel = np.ones((3, 3), np.uint8)
edges = cv2.dilate(edges, kernel, iterations=1)
# Create edge mask (invert the text mask)
edge_mask = cv2.bitwise_not(mask)
edges = cv2.bitwise_and(edges, edge_mask)
# Inpaint edges first
edge_inpainted = cv2.inpaint(edges, mask, 3, cv2.INPAINT_TELEA)
# Use edge information to guide image inpainting
# Create a weighted combination
result = image.copy()
# Apply multiple OpenCV inpainting methods and blend
inpaint1 = cv2.inpaint(image, mask, 3, cv2.INPAINT_TELEA)
inpaint2 = cv2.inpaint(image, mask, 7, cv2.INPAINT_NS)
# Blend results
alpha = 0.6
result = cv2.addWeighted(inpaint1, alpha, inpaint2, 1-alpha, 0)
return result
except Exception as e:
print(f"Edge connect inpainting failed: {e}")
return self._inpaint_with_opencv(image, mask, 'telea')
def _inpaint_with_patch_match(self, image: np.ndarray, mask: np.ndarray) ->
np.ndarray:
"""Patch-based inpainting using PatchMatch algorithm"""
try:
# Simple patch-based inpainting implementation
result = image.copy()
# Find mask boundaries
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
# Get bounding box
x, y, w, h = cv2.boundingRect(contour)
# Extract region
region = image[y:y+h, x:x+w]
region_mask = mask[y:y+h, x:x+w]
# Simple patch-based filling
filled_region = self._fill_region_with_patches(region, region_mask)
# Blend back
result[y:y+h, x:x+w] = filled_region
return result
except Exception as e:
print(f"Patch match inpainting failed: {e}")
return self._inpaint_with_opencv(image, mask, 'telea')
def _fill_region_with_patches(self, region: np.ndarray, mask: np.ndarray,
patch_size: int = 9) -> np.ndarray:
"""Fill masked region using patch matching"""
result = region.copy()
# Find pixels to fill
mask_coords = np.where(mask > 0)
for i, (y, x) in enumerate(zip(mask_coords[0], mask_coords[1])):
# Skip if already filled
if mask[y, x] == 0:
continue
# Find best matching patch
best_patch = self._find_best_patch(region, mask, x, y, patch_size)
if best_patch is not None:
# Fill the pixel
result[y, x] = best_patch
return result
def _find_best_patch(self, image: np.ndarray, mask: np.ndarray,
x: int, y: int, patch_size: int) -> Optional[np.ndarray]:
"""Find best matching patch for a pixel"""
half_size = patch_size // 2
# Get patch around target pixel
y1, y2 = max(0, y - half_size), min(image.shape[0], y + half_size + 1)
x1, x2 = max(0, x - half_size), min(image.shape[1], x + half_size + 1)
target_patch = image[y1:y2, x1:x2]
target_mask = mask[y1:y2, x1:x2]
# Find valid pixels in the patch (not masked)
valid_pixels = target_mask == 0
if not np.any(valid_pixels):
return None
best_match = None
best_score = float('inf')
# Search for similar patches in the image
for sy in range(half_size, image.shape[0] - half_size):
for sx in range(half_size, image.shape[1] - half_size):
# Skip if in masked area
if mask[sy, sx] > 0:
continue
# Get candidate patch
cy1, cy2 = sy - half_size, sy + half_size + 1
cx1, cx2 = sx - half_size, sx + half_size + 1
candidate_patch = image[cy1:cy2, cx1:cx2]
# Calculate similarity only for valid pixels
if candidate_patch.shape == target_patch.shape:
diff = np.sum((candidate_patch[valid_pixels] -
target_patch[valid_pixels]) ** 2)
if diff < best_score:
best_score = diff
best_match = candidate_patch[half_size, half_size]
return best_match
# ======================= MAIN PROCESSING CLASS =======================
class MangaTextRemover:
"""Main class for comprehensive manga/comic text removal"""
def __init__(self):
self.detector = AdvancedTextDetector()
self.inpainter = AdvancedInpainter()
self.processing_stats = {}
def process_image(self, image_path: str, output_path: str = None,
detection_confidence: float = 0.3,
inpaint_method: str = 'auto',
expand_mask: int = 5,
show_process: bool = True) -> Dict:
"""
Complete text removal process
Args:
image_path: Path to input image
output_path: Path for output image (optional)
detection_confidence: Minimum confidence for text detection
inpaint_method: Inpainting method to use
expand_mask: Pixels to expand mask around detected text
show_process: Whether to show processing steps
Returns:
Dictionary with processing results and statistics
"""
print(f"🎯 Processing image: {image_path}")
start_time = time.time()
# Load image
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Could not load image: {image_path}")
original_image = image.copy()
# Step 1: Text Detection
print("📍 Step 1: Detecting text regions...")
detections = self.detector.detect_text_comprehensive(image,
detection_confidence)
print(f"✅ Found {len(detections)} text regions")
# Step 2: Create comprehensive mask
print("🎨 Step 2: Creating inpainting mask...")
mask = self._create_comprehensive_mask(image, detections, expand_mask)
# Step 3: Inpainting
print("🔄 Step 3: Removing text and inpainting...")
result = self.inpainter.inpaint_comprehensive(image, mask, inpaint_method)
# Step 4: Post-processing
print("✨ Step 4: Post-processing...")
result = self._post_process_result(original_image, result, mask)
# Step 5: Save result
if output_path:
cv2.imwrite(output_path, result)
print(f"💾 Saved result to: {output_path}")
# Calculate statistics
processing_time = time.time() - start_time
stats = {
'detections_count': len(detections),
'processing_time': processing_time,
'mask_area_ratio': np.sum(mask > 0) / (mask.shape[0] * mask.shape[1]),
'detection_methods': list(set([d['method'] for d in detections])),
'inpaint_method': inpaint_method,
'image_size': image.shape[:2]
}
if show_process:
self._show_processing_results(original_image, detections, mask, result,
stats)
return {
'result': result,
'original': original_image,
'mask': mask,
'detections': detections,
'stats': stats
}
def _create_comprehensive_mask(self, image: np.ndarray, detections: List[Dict],
expand: int = 5) -> np.ndarray:
"""Create comprehensive mask from all detections"""
mask = np.zeros(image.shape[:2], dtype=np.uint8)
for detection in detections:
bbox = detection['bbox']
x1, y1, x2, y2 = bbox
# Expand bounding box
x1 = max(0, x1 - expand)
y1 = max(0, y1 - expand)
x2 = min(image.shape[1], x2 + expand)
y2 = min(image.shape[0], y2 + expand)
# Add to mask
mask[y1:y2, x1:x2] = 255
# Morphological operations to clean up mask
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
return mask
def _post_process_result(self, original: np.ndarray, result: np.ndarray,
mask: np.ndarray) -> np.ndarray:
"""Post-process the inpainting result"""
# Blend edges for seamless integration
blurred_mask = cv2.GaussianBlur(mask.astype(np.float32), (5, 5), 0) / 255.0
blurred_mask = np.stack([blurred_mask] * 3, axis=2)
# Smooth transition
final_result = (result * blurred_mask + original * (1 -
blurred_mask)).astype(np.uint8)
# Color correction
final_result = self._match_color_distribution(original, final_result, mask)
# Sharpening
final_result = self._apply_sharpening(final_result)
return final_result
def _match_color_distribution(self, original: np.ndarray, result: np.ndarray,
mask: np.ndarray) -> np.ndarray:
"""Match color distribution between original and result"""
try:
# Get non-masked areas for reference
non_masked = mask == 0
for channel in range(3):
orig_channel = original[:, :, channel][non_masked]
result_channel = result[:, :, channel]
# Calculate statistics
orig_mean = np.mean(orig_channel)
orig_std = np.std(orig_channel)
result_mean = np.mean(result_channel)
result_std = np.std(result_channel)
# Adjust result to match original statistics
if result_std > 0:
result[:, :, channel] = (
(result_channel - result_mean) * (orig_std / result_std) +
orig_mean
).clip(0, 255)
return result.astype(np.uint8)
except Exception as e:
print(f"Color matching failed: {e}")
return result
def _apply_sharpening(self, image: np.ndarray, strength: float = 0.5) ->
np.ndarray:
"""Apply subtle sharpening to the result"""
try:
# Unsharp mask
blurred = cv2.GaussianBlur(image, (0, 0), 1.0)
sharpened = cv2.addWeighted(image, 1.0 + strength, blurred, -strength,
0)
return sharpened.clip(0, 255).astype(np.uint8)
except Exception as e:
print(f"Sharpening failed: {e}")
return image
def _show_processing_results(self, original: np.ndarray, detections:
List[Dict],
mask: np.ndarray, result: np.ndarray, stats: Dict):
"""Display processing results"""
# Create visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
# Original with detections
img_with_detections = original.copy()
for detection in detections:
bbox = detection['bbox']
x1, y1, x2, y2 = bbox
cv2.rectangle(img_with_detections, (x1, y1), (x2, y2), (0, 255, 0), 2)
axes[0, 0].imshow(cv2.cvtColor(img_with_detections, cv2.COLOR_BGR2RGB))
axes[0, 0].set_title(f'Original + Detections ({len(detections)} regions)')
axes[0, 0].axis('off')
# Mask
axes[0, 1].imshow(mask, cmap='gray')
axes[0, 1].set_title('Inpainting Mask')
axes[0, 1].axis('off')
# Result
axes[1, 0].imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
axes[1, 0].set_title('Text Removed Result')
axes[1, 0].axis('off')
# Comparison
comparison = np.hstack([
cv2.cvtColor(original, cv2.COLOR_BGR2RGB),
cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
])
axes[1, 1].imshow(comparison)
axes[1, 1].set_title('Before vs After')
axes[1, 1].axis('off')
plt.tight_layout()
plt.show()
# Print statistics
print(f"\n📊 Processing Statistics:")
print(f" • Text regions detected: {stats['detections_count']}")
print(f" • Processing time: {stats['processing_time']:.2f} seconds")
print(f" • Mask area ratio: {stats['mask_area_ratio']:.1%}")
print(f" • Detection methods: {', '.join(stats['detection_methods'])}")
print(f" • Inpainting method: {stats['inpaint_method']}")
print(f" • Image size: {stats['image_size'][1]}x{stats['image_size']
[0]}")
# ======================= BATCH PROCESSING =======================
class BatchProcessor:
"""Batch processing for multiple images"""
def __init__(self):
self.remover = MangaTextRemover()
def process_folder(self, input_folder: str, output_folder: str,
image_extensions: List[str] = None,
**kwargs) -> Dict:
"""Process all images in a folder"""
if image_extensions is None:
image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp']
# Create output folder
os.makedirs(output_folder, exist_ok=True)
# Find all images
image_files = []
for ext in image_extensions:
image_files.extend(Path(input_folder).glob(f'*{ext}'))
image_files.extend(Path(input_folder).glob(f'*{ext.upper()}'))
print(f"Found {len(image_files)} images to process")
results = {}
failed = []
for image_file in tqdm(image_files, desc="Processing images"):
try:
# Generate output path
output_path = os.path.join(output_folder,
f"cleaned_{image_file.name}")
# Process image
result = self.remover.process_image(
str(image_file),
output_path,
show_process=False,
**kwargs
)
results[str(image_file)] = result['stats']
except Exception as e:
print(f"❌ Failed to process {image_file}: {e}")
failed.append(str(image_file))
return {
'processed': len(results),
'failed': len(failed),
'failed_files': failed,
'results': results
}
# ======================= EASY-TO-USE FUNCTIONS =======================
def setup_environment():
"""One-click setup for Google Colab"""
print("🚀 Setting up Manga Text Removal environment...")
install_all_dependencies()
print("✅ Environment setup complete!")
def remove_text_from_image(image_path: str, output_path: str = None,
confidence: float = 0.3) -> str:
"""
Simple function to remove text from a single image
Args:
image_path: Path to input image
output_path: Path for output (optional)
confidence: Detection confidence threshold
Returns:
Path to output image
"""
if output_path is None:
name, ext = os.path.splitext(image_path)
output_path = f"{name}_no_text{ext}"
remover = MangaTextRemover()
result = remover.process_image(image_path, output_path, confidence)
return output_path
def remove_text_from_folder(input_folder: str, output_folder: str = None,
confidence: float = 0.3) -> Dict:
"""
Simple function to remove text from all images in a folder
Args:
input_folder: Path to input folder
output_folder: Path to output folder (optional)
confidence: Detection confidence threshold
Returns:
Processing statistics
"""
if output_folder is None:
output_folder = f"{input_folder}_cleaned"
processor = BatchProcessor()
return processor.process_folder(input_folder, output_folder,
detection_confidence=confidence)
# ======================= USAGE EXAMPLES =======================
def demo_usage():
"""Demonstrate how to use the system"""
print("""
🎯 Manga/Comic Text Removal System - Usage Examples
1. Setup (run once):
setup_environment()
2. Process single image:
remove_text_from_image('manga_page.jpg', 'clean_manga.jpg')
3. Process folder:
stats = remove_text_from_folder('manga_folder/', 'clean_manga_folder/')
4. Advanced usage:
remover = MangaTextRemover()
result = remover.process_image('image.jpg', confidence=0.4)
5. Batch processing:
processor = BatchProcessor()
stats = processor.process_folder('input/', 'output/')
📝 Tips:
- Lower confidence (0.1-0.3) detects more text but may have false positives
- Higher confidence (0.4-0.8) is more selective but may miss some text
- Use 'sd' inpainting method for best quality (requires GPU)
- Use 'telea' or 'ns' for faster processing
""")
# ======================= GOOGLE COLAB HELPERS =======================
def upload_and_process():
"""Helper function for Google Colab file upload"""
try:
from google.colab import files
# Upload files
print("📤 Please select image files to upload:")
uploaded = files.upload()
results = []
for filename in uploaded.keys():
print(f"\n🔄 Processing {filename}...")
# Process the image
output_path = f"cleaned_{filename}"
remover = MangaTextRemover()
result = remover.process_image(filename, output_path)
results.append({
'input': filename,
'output': output_path,
'stats': result['stats']
})
# Download results
print("\n📥 Download processed images:")
for result in results:
files.download(result['output'])
return results
except ImportError:
print("This function is only available in Google Colab")
return None
def create_gradio_interface():
"""Create Gradio web interface for easy use"""
try:
import gradio as gr
def process_image_gradio(image, confidence, inpaint_method):
"""Gradio processing function"""
if image is None:
return None, "Please upload an image"
# Save uploaded image
temp_input = "temp_input.jpg"
temp_output = "temp_output.jpg"
# Convert PIL to CV2 and save
cv2.imwrite(temp_input, cv2.cvtColor(np.array(image),
cv2.COLOR_RGB2BGR))
try:
# Process
remover = MangaTextRemover()
result = remover.process_image(
temp_input,
temp_output,
detection_confidence=confidence,
inpaint_method=inpaint_method,
show_process=False
)
# Load result
result_image = Image.open(temp_output)
# Create stats text
stats = result['stats']
stats_text = f"""
Detected {stats['detections_count']} text regions
Processing time: {stats['processing_time']:.2f} seconds
Methods used: {', '.join(stats['detection_methods'])}
Inpainting: {stats['inpaint_method']}
"""
return result_image, stats_text
except Exception as e:
return None, f"Error: {str(e)}"
# Create interface
interface = gr.Interface(
fn=process_image_gradio,
inputs=[
gr.Image(type="pil", label="Upload Manga/Comic Image"),
gr.Slider(0.1, 0.9, value=0.3, label="Detection Confidence"),
gr.Dropdown(
["auto", "sd", "telea", "ns", "edge_connect"],
value="auto",
label="Inpainting Method"
)
],
outputs=[
gr.Image(type="pil", label="Text Removed"),
gr.Textbox(label="Processing Stats")
],
title="Manga/Comic Text Removal",
description="Upload a manga or comic image to automatically detect and
remove text while preserving the artwork."
)
return interface
except ImportError:
print("Gradio not available. Install with: pip install gradio")
return None
# ======================= MAIN EXECUTION =======================
if __name__ == "__main__":
# Show usage information
demo_usage()
print("\n" + "="*50)
print("🎯 MANGA/COMIC TEXT REMOVAL SYSTEM READY!")
print("="*50)
# Check if running in Colab
try:
import google.colab
print("📱 Google Colab detected!")
print("Run setup_environment() to install dependencies")
print("Run upload_and_process() for easy file processing")
# Auto-setup if requested
setup_choice = input("\nSetup environment now? (y/n): ").lower()
if setup_choice == 'y':
setup_environment()
# Offer to create Gradio interface
gradio_choice = input("\nCreate web interface? (y/n): ").lower()
if gradio_choice == 'y':
interface = create_gradio_interface()
if interface:
interface.launch(share=True)
except ImportError:
print("