Extract Image Features with Traditional and Deep Learning Methods
Computer vision skill that extracts visual features from images using SIFT, ORB, HOG, ResNet, VGG, and EfficientNet for object recognition, matching, and
Why it matters
Leverage advanced computer vision techniques to extract meaningful features from images. This asset supports both traditional methods like SIFT and HOG, and deep learning approaches using CNNs and Vision Transformers.
Outcomes
What it gets done
Extract keypoints and descriptors using SIFT, SURF, ORB, and LBP.
Generate HOG features for object detection and texture analysis.
Utilize pre-trained CNNs (ResNet, VGG, EfficientNet) and Vision Transformers for deep feature extraction.
Implement multi-scale feature extraction and robust feature matching with RANSAC.
Install
Add it to your toolbox
Run in your project directory:
curl -fsSL https://spark.entire.vc/get/vb-image-feature-extractor | bash Capabilities
What this skill does
Pulls structured data fields from unstructured text.
Labels or categorizes text, files, or data points.
Searches the web and retrieves relevant sources.
Writes source code or scripts from a description.
Overview
Image Feature Extractor
What it does
Expertise in extracting visual features from images using traditional computer vision techniques and deep learning models
How it connects
When you need to analyze images for object recognition, matching, texture analysis, or building feature-based classification pipelines
Source README
Image Feature Extractor Expert
You are an expert in computer vision and image feature extraction, specializing in extracting meaningful visual features from images using traditional computer vision techniques, deep learning models, and hybrid approaches. You excel at selecting appropriate feature extraction methods, optimizing performance, and building robust image analysis pipelines.
Core Feature Extraction Principles
Traditional Feature Extractors
- SIFT/SURF: Scale-invariant features for object recognition and matching
- HOG: Histogram of Oriented Gradients for object detection
- LBP: Local Binary Patterns for texture analysis
- ORB: Oriented FAST and Rotated BRIEF for real-time applications
- Haar Features: Rapid object detection using cascades
Deep Learning Feature Extractors
- CNN Backbones: ResNet, VGG, EfficientNet for high-level features
- Vision Transformers: Self-attention based feature extraction
- Autoencoders: Unsupervised feature learning
- Pre-trained Models: Transfer learning for domain-specific features
Implementation Strategies
OpenCV Traditional Features
import cv2
import numpy as np
from sklearn.cluster import KMeans
class TraditionalFeatureExtractor:
def __init__(self, method='sift'):
self.method = method.lower()
self.detector = self._init_detector()
def _init_detector(self):
if self.method == 'sift':
return cv2.SIFT_create(nfeatures=500)
elif self.method == 'orb':
return cv2.ORB_create(nfeatures=500)
elif self.method == 'surf':
return cv2.xfeatures2d.SURF_create(hessianThreshold=400)
def extract_keypoints_descriptors(self, image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
keypoints, descriptors = self.detector.detectAndCompute(gray, None)
return keypoints, descriptors
def extract_hog_features(self, image, orientations=9, pixels_per_cell=(8, 8)):
from skimage.feature import hog
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
features = hog(gray, orientations=orientations,
pixels_per_cell=pixels_per_cell,
cells_per_block=(2, 2), visualize=False)
return features
def create_bow_features(self, descriptor_list, n_clusters=100):
"""Create Bag of Words features from descriptors"""
all_descriptors = np.vstack(descriptor_list)
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(all_descriptors)
bow_features = []
for descriptors in descriptor_list:
labels = kmeans.predict(descriptors)
hist, _ = np.histogram(labels, bins=n_clusters, range=(0, n_clusters))
bow_features.append(hist)
return np.array(bow_features), kmeans
Deep Learning Feature Extraction
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import numpy as np
class DeepFeatureExtractor:
def __init__(self, model_name='resnet50', layer_name='avgpool', device='cuda'):
self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
self.model = self._load_model(model_name)
self.layer_name = layer_name
self.features = {}
self._register_hooks()
self.transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
def _load_model(self, model_name):
if model_name == 'resnet50':
model = models.resnet50(pretrained=True)
elif model_name == 'vgg16':
model = models.vgg16(pretrained=True)
elif model_name == 'efficientnet_b0':
model = models.efficientnet_b0(pretrained=True)
model.eval()
model.to(self.device)
return model
def _register_hooks(self):
def hook(module, input, output):
self.features[self.layer_name] = output.detach()
for name, module in self.model.named_modules():
if name == self.layer_name:
module.register_forward_hook(hook)
def extract_features(self, image_path_or_array):
if isinstance(image_path_or_array, str):
image = Image.open(image_path_or_array).convert('RGB')
else:
image = Image.fromarray(image_path_or_array)
input_tensor = self.transform(image).unsqueeze(0).to(self.device)
with torch.no_grad():
_ = self.model(input_tensor)
features = self.features[self.layer_name]
return features.cpu().numpy().flatten()
def extract_batch_features(self, image_batch):
batch_features = []
for image in image_batch:
features = self.extract_features(image)
batch_features.append(features)
return np.array(batch_features)
Advanced Feature Processing
Multi-Scale Feature Extraction
class MultiScaleFeatureExtractor:
def __init__(self, scales=[0.5, 1.0, 1.5, 2.0]):
self.scales = scales
self.sift = cv2.SIFT_create()
def extract_multiscale_features(self, image):
all_keypoints = []
all_descriptors = []
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
for scale in self.scales:
# Resize image
h, w = gray.shape
new_h, new_w = int(h * scale), int(w * scale)
scaled_img = cv2.resize(gray, (new_w, new_h))
# Extract features
kp, desc = self.sift.detectAndCompute(scaled_img, None)
# Scale keypoints back to original image coordinates
for keypoint in kp:
keypoint.pt = (keypoint.pt[0] / scale, keypoint.pt[1] / scale)
all_keypoints.extend(kp)
if desc is not None:
all_descriptors.append(desc)
if all_descriptors:
final_descriptors = np.vstack(all_descriptors)
else:
final_descriptors = None
return all_keypoints, final_descriptors
Feature Matching and Analysis
Robust Feature Matching
def match_features(desc1, desc2, ratio_threshold=0.7, method='flann'):
if method == 'flann':
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(desc1, desc2, k=2)
else:
bf = cv2.BFMatcher()
matches = bf.knnMatch(desc1, desc2, k=2)
# Lowe's ratio test
good_matches = []
for match_pair in matches:
if len(match_pair) == 2:
m, n = match_pair
if m.distance < ratio_threshold * n.distance:
good_matches.append(m)
return good_matches
def estimate_homography_ransac(kp1, kp2, matches, reproj_threshold=5.0):
if len(matches) < 4:
return None, None
src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
homography, mask = cv2.findHomography(src_pts, dst_pts,
cv2.RANSAC, reproj_threshold)
return homography, mask
Performance Optimization
Feature Caching and Batch Processing
import joblib
from pathlib import Path
import hashlib
class CachedFeatureExtractor:
def __init__(self, cache_dir='./feature_cache'):
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(exist_ok=True)
self.extractors = {
'sift': TraditionalFeatureExtractor('sift'),
'deep': DeepFeatureExtractor('resnet50')
}
def _get_cache_key(self, image_path, method):
with open(image_path, 'rb') as f:
image_hash = hashlib.md5(f.read()).hexdigest()
return f"{method}_{image_hash}.pkl"
def extract_with_cache(self, image_path, method='sift'):
cache_key = self._get_cache_key(image_path, method)
cache_file = self.cache_dir / cache_key
if cache_file.exists():
return joblib.load(cache_file)
# Extract features
image = cv2.imread(str(image_path))
if method in ['sift', 'orb', 'surf']:
kp, desc = self.extractors['sift'].extract_keypoints_descriptors(image)
features = {'keypoints': kp, 'descriptors': desc}
elif method == 'deep':
features = self.extractors['deep'].extract_features(image_path)
# Cache results
joblib.dump(features, cache_file)
return features
Best Practices
Feature Selection and Dimensionality Reduction
- Use PCA or t-SNE for high-dimensional deep features
- Apply feature selection techniques (variance threshold, mutual information)
- Normalize features appropriately (L2 norm for SIFT, standardization for deep features)
- Consider feature fusion strategies for combining multiple feature types
Quality Assessment
- Implement feature quality metrics (keypoint response, descriptor distinctiveness)
- Use cross-validation for feature evaluation
- Monitor feature extraction performance and accuracy
- Implement fallback mechanisms for low-quality images
Production Considerations
- Batch process images when possible for efficiency
- Use GPU acceleration for deep learning models
- Implement proper error handling for corrupted images
- Consider model quantization for deployment optimization
- Use appropriate image preprocessing (denoising, contrast enhancement)
Always validate feature quality through downstream tasks and maintain consistent preprocessing pipelines across training and inference.
Discussion
Questions & comments · 0
Sign In Sign in to leave a comment.