實時監(jiān)控圖像中的人臉識別
在當(dāng)今的數(shù)字時代,人臉識別技術(shù)已經(jīng)成為一個關(guān)鍵技術(shù),它正在從安全到個性化體驗等多個領(lǐng)域進行革新。從門禁控制到考勤系統(tǒng),再到走失兒童的檢測,人臉識別的應(yīng)用非常廣泛,涵蓋了安全、教育和公共安全領(lǐng)域。然而,在不同的條件下準(zhǔn)確識別人臉面臨著一系列獨特的挑戰(zhàn)。在本指南中,我們將深入探討人臉識別的復(fù)雜性,探索使用Python和先進的機器學(xué)習(xí)工具來確保強大的識別和驗證的技術(shù)。
理解人臉識別
人臉識別技術(shù)利用先進的算法分析和比較從圖像或視頻片段中提取的面部特征,從而實現(xiàn)個人的識別和驗證。提供的Python代碼展示了人臉識別的實現(xiàn)。讓我們探索代碼的主要組成部分,以深入了解該過程。
1. 導(dǎo)入庫:
在本節(jié)中,導(dǎo)入了執(zhí)行各種任務(wù)所需的庫,如面部檢測、圖像處理、數(shù)據(jù)增強和人臉識別。
import face_recognition
import cv2
import numpy as np
from deepface import DeepFace
from utils import apply_blur, generate_unique_random_numbers, find_cosine_distance_helper
from utils import apply_resize
from utils import augment_data, face_distance
import os
from PIL import Image
from mtcnn.mtcnn import MTCNN
import random
2.數(shù)據(jù)集準(zhǔn)備:
數(shù)據(jù)集準(zhǔn)備階段涉及遍歷包含已知個人圖像的目錄(known_people_dir)。對于每個人,代碼在train_dataset目錄中創(chuàng)建一個輸出目錄。然后加載每張圖像,使用MTCNN(多任務(wù)級聯(lián)卷積網(wǎng)絡(luò))模型檢測面部,裁剪檢測到的面部區(qū)域,并將其保存在相應(yīng)的輸出目錄(known_people_train_dir)中。此外,還應(yīng)用了模糊、調(diào)整大小和應(yīng)用隨機變換等數(shù)據(jù)增強技術(shù),以增加數(shù)據(jù)集的多樣性并增強人臉識別系統(tǒng)的魯棒性。
for person_name in os.listdir(known_people_dir):
person_dir = os.path.join(known_people_dir, person_name)
if os.path.isdir(person_dir):
output_person_dir = os.path.join("train_dataset", person_name)
os.makedirs(output_person_dir, exist_ok=True)
for filename in os.listdir(person_dir):
image_path = os.path.join(person_dir, filename)
image = cv2.imread(image_path)
faces = mtcnn.detect_faces(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
if faces:
for face in faces:
x, y, w, h = face['box']
left = max(x, 0)
top = max(y, 0)
right = min(x + w, image.shape[1])
bottom = min(y + h, image.shape[0])
if right > left and bottom > top:
output_face_path = os.path.join(output_person_dir, f"{filename}.jpg")
cv2.imwrite(output_face_path, image)
# Apply data augmentation
apply_blur(output_face_path, output_folder=output_person_dir)
apply_resize(output_face_path, output_folder=output_person_dir)
augment_data(output_face_path, output_folder=output_person_dir, face_coordinates=(left, top, right, bottom), prefix=filename)
訓(xùn)練數(shù)據(jù)集
3. 檢測面部:
使用MTCNN(多任務(wù)級聯(lián)卷積網(wǎng)絡(luò))模型進行面部檢測,該模型能夠檢測圖像中的面部。然后使用檢測到的面部進行進一步處理。
# Detect faces in the image using MTCNN
faces = mtcnn.detect_faces(rgb_image)
4. 提取邊界框坐標(biāo):
# Get the bounding box coordinates of the face
x, y, w, h = face['box']
# Ensure that the bounding box coordinates are valid
left = max(x, 0)
top = max(y, 0)
right = min(x + w, image.shape[1])
bottom = min(y + h, image.shape[0])
5. 圖像增強:
對提取的面部圖像應(yīng)用模糊、調(diào)整大小和隨機變換等數(shù)據(jù)增強技術(shù),以增強數(shù)據(jù)集的多樣性。
(1) 隨機變換
def augment_data(original_image_path, output_folder, face_coordinates, num_augmented_images=3, should_add_jitter=True, prefix=""):
# Load the original image
original_image = Image.open(original_image_path)
# Convert face image to grayscale
face_image_gray = original_image.convert('L')
# Define torchvision transforms for data augmentation
data_transforms = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(degrees=15),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(),
])
# Apply data augmentation and save augmented images
for i in range(num_augmented_images):
# Apply different transformations to each augmented image
transformed_image = data_transforms(face_image_gray)
augmented_image_path = os.path.join(output_folder, f"{prefix}augmented_{i + 1}.jpg")
transforms.ToPILImage()(transformed_image).save(augmented_image_path)
print(f"Augmented image {i + 1} saved to {augmented_image_path}")
(2) 調(diào)整大小和模糊
def apply_blur(image_path, output_folder, kernel_size=(7, 7)):
# Load the image
image = cv2.imread(image_path)
# Apply Gaussian blur
blurred_image = cv2.GaussianBlur(image, kernel_size, 0)
# Save the blurred image
filename = os.path.basename(image_path)
output_path = os.path.join(output_folder, f"blurred_{filename}")
cv2.imwrite(output_path, blurred_image)
print(f"Blurred image saved to {output_path}")
def apply_resize(image_path, output_folder, target_size=(256, 256)):
# Load the image
image = cv2.imread(image_path)
# Resize the image
resized_image = cv2.resize(image, target_size)
# Save the resized image
filename = os.path.basename(image_path)
output_path = os.path.join(output_folder, f"resized_{filename}")
cv2.imwrite(output_path, resized_image)
print(f"Resized image saved to {output_path}")
6.在訓(xùn)練目錄中存儲增強圖像:
處理后的圖像,包括裁剪的面部、模糊的面部和增強的圖像,存儲在訓(xùn)練目錄(train_dataset)中。這種目錄結(jié)構(gòu)便于訪問訓(xùn)練數(shù)據(jù),以構(gòu)建人臉識別模型。
# Save the face image
cv2.imwrite(output_face_path, image)
# Apply data augmentation on the face image
apply_blur(output_face_path, output_folder=output_person_dir)
apply_resize(output_face_path, output_folder=output_person_dir)
augment_data(output_face_path, output_folder=output_person_dir,
face_coordinates=(left, top, right, bottom),prefix=filename)
7.從訓(xùn)練數(shù)據(jù)集中編碼已知面部:
代碼遍歷我們指定的訓(xùn)練數(shù)據(jù)集目錄中的目錄,稱為known_people_train_dir。在每個代表特定個人的目錄中,它處理每個圖像文件。代碼驗證每個圖像文件的有效性,加載它,并使用高級技術(shù)提取面部特征。這些特征被編碼成數(shù)值向量,稱為面部編碼,使用DeepFace.represent函數(shù)。這些編碼以及相應(yīng)的人名然后被添加到列表中以進行進一步處理。
通過將增強數(shù)據(jù)與原始圖像結(jié)合,我們的模型訓(xùn)練數(shù)據(jù)集變得更加豐富和多樣化,從而在不同條件和環(huán)境中提高了人臉識別的準(zhǔn)確性和魯棒性。
for person_name in os.listdir(known_people_train_dir):
person_dir = os.path.join(known_people_train_dir, person_name)
# Check if it's a directory
if os.path.isdir(person_dir):
# Iterate over each file in the person's directory
for filename in os.listdir(person_dir):
image_path = os.path.join(person_dir, filename)
print(image_path)
# Check if the file is a valid image file
try:
with Image.open(image_path) as img:
img.verify() # Attempt to open and verify the image file
# Load the image file
person_image = face_recognition.load_image_file(image_path)
# Encode the face in the image-
face_encoding = DeepFace.represent(person_image,model_name="Dlib",detector_backend="mtcnn", enforce_detection=False)
# Append the face encoding and name to the respective lists
known_face_encodings.append(np.array(face_encoding[0]['embedding']))
known_face_names.append(person_name)
except (IOError, SyntaxError,IndexError):
# Ignore any files that are not valid image files
continue
8.人臉識別循環(huán):
在人臉識別循環(huán)中,程序不斷從網(wǎng)絡(luò)攝像頭捕獲幀,確保實時人臉識別。為了優(yōu)化處理速度,每個幀都被調(diào)整大小,減少了計算負載而不影響準(zhǔn)確性。使用MTCNN面部檢測模型,程序在幀內(nèi)識別面部,對其特征進行編碼以進行比較。
# Continuous capture of frames from the webcam
while True:
ret, frame = video_capture.read()
# Resize each frame for optimized processing speed
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
# Using MTCNN for face detection
rgb_small_frame = small_frame[:, :, ::-1]
result1 = DeepFace.represent(rgb_small_frame, model_name="Dlib", detector_backend="mtcnn", enforce_detection=False)
# Encoding features of detected faces for comparison
face_locations = [(res['facial_area']['y'], res['facial_area']['x'] + res['facial_area']['w'], res['facial_area']['y'] + res['facial_area']['h'], res['facial_area']['x']) for res in result1]
face_encodings = [res['embedding'] for res in result1]
通過計算檢測到的面部和訓(xùn)練數(shù)據(jù)集中已知面部之間的余弦距離,程序確定潛在的匹配項。
# Calculating cosine distances between detected faces and known faces
for f_encoding in face_encodings:
face_distances = find_cosine_distance_helper(known_face_encodings, f_encoding)
best_match_index = np.argmin(face_distances)
if face_distances[best_match_index] <= 0.07:
name = known_face_names[best_match_index]
else:
name = "Unknown"
face_names.append(name)
9.顯示結(jié)果:
檢測到的面部顯示在視頻流中,包括相應(yīng)的名稱(如果識別出來,否則為“未知”)。在面部周圍繪制矩形框,并在每個面部下方添加標(biāo)簽,以便于識別。
# Draw a bounding box around the face
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Draw a label with a name below the face
cv2.putText(frame, text, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
# Display the resulting image
cv2.imshow('Video', frame)
輸出
人臉識別系統(tǒng)使用網(wǎng)絡(luò)攝像頭在實時面部檢測和識別任務(wù)中取得了顯著的性能。它能夠準(zhǔn)確識別已知個人,并以良好的精度進行標(biāo)記,并將未知面部適當(dāng)?shù)貥?biāo)記為“未知”。系統(tǒng)以高置信度運行,提高了其可靠性和可用性。在Salman的圖像上訓(xùn)練的模型準(zhǔn)確地識別了他在監(jiān)控錄像中的面部。
當(dāng)遇到不在數(shù)據(jù)集中的Amitabh的圖像時,它被適當(dāng)?shù)貥?biāo)記為“未知”。