Spaces:

Mayank395
/

detection

Running

App Files Files Community

detection / app.py

Mayank395

Upload 11 files

5615f0c verified over 1 year ago

raw

history blame contribute delete

11.7 kB

	from flask import Flask, render_template, Response, jsonify,request
	import os
	import threading
	import time
	import cv2
	import numpy as np
	import torch
	import yaml
	from torchvision import transforms
	from face_alignment.alignment import norm_crop
	from face_detection.scrfd.detector import SCRFD
	from face_detection.yolov5_face.detector import Yolov5Face
	from face_recognition.arcface.model import iresnet_inference
	from face_recognition.arcface.utils import compare_encodings, read_features
	from face_tracking.tracker.byte_tracker import BYTETracker
	from face_tracking.tracker.visualize import plot_tracking

	app = Flask(__name__)

	# Device configuration
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(device)

	# Face detector (choose one)
	detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx")
	# detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5n-face.pt")

	# Face recognizer
	recognizer = iresnet_inference(model_name="r100", path="face_recognition/arcface/weights/arcface_r100.pth", device=device)

	# Load precomputed face features and names
	images_names, images_embs = read_features(feature_path="./datasets/face_features/feature")

	# Mapping of face IDs to names
	id_face_mapping = {}

	# Data mapping for tracking information
	data_mapping = {
	"raw_image": [],
	"tracking_ids": [],
	"detection_bboxes": [],
	"detection_landmarks": [],
	"tracking_bboxes": [],
	}

	# Global variable to control pause/resume
	paused = False

	# Load config
	def load_config(file_name):
	with open(file_name, "r") as stream:
	try:
	return yaml.safe_load(stream)
	except yaml.YAMLError as exc:
	print(exc)

	# Recognition and face name mapping
	@torch.no_grad()
	def get_feature1(face_image):
	face_preprocess = transforms.Compose([
	transforms.ToTensor(),
	transforms.Resize((112, 112)),
	transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
	])

	face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
	face_image = face_preprocess(face_image).unsqueeze(0).to(device)
	emb_img_face = recognizer(face_image)[0].cpu().numpy()
	return emb_img_face / np.linalg.norm(emb_img_face)

	def add_persons(name, images):
	images_name = []
	images_emb = []

	person_face_path = f"./datasets/data/{name}"
	os.makedirs(person_face_path, exist_ok=True)

	for image in images:
	input_image = cv2.imdecode(np.frombuffer(image.read(), np.uint8), cv2.IMREAD_COLOR)
	bboxes, _ = detector.detect(image=input_image)

	for i in range(len(bboxes)):
	x1, y1, x2, y2, _ = bboxes[i]
	face_image = input_image[y1:y2, x1:x2]
	cv2.imwrite(os.path.join(person_face_path, f"{len(images_name)}.jpg"), face_image)

	images_emb.append(get_feature1(face_image))
	images_name.append(name)

	if not images_emb:
	return "No valid images found."

	images_emb = np.array(images_emb)
	images_name = np.array(images_name)

	features_path = "./datasets/face_features/feature.npz"
	features = read_features(features_path)

	if features is not None:
	old_images_name, old_images_emb = features
	images_name = np.hstack((old_images_name, images_name))
	images_emb = np.vstack((old_images_emb, images_emb))

	np.savez_compressed(features_path, images_name=images_name, images_emb=images_emb)
	return "Successfully added new person!"

	@app.route('/add_person', methods=['POST'])
	def add_person():
	name = request.form['name']
	images = request.files.getlist('images')

	if not name or not images:
	return jsonify({"error": "Name and images are required."}), 400

	message = add_persons(name, images)
	return jsonify({"message": message})
	@torch.no_grad()
	def get_feature(face_image):
	face_preprocess = transforms.Compose([
	transforms.ToTensor(),
	transforms.Resize((112, 112)),
	transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
	])

	# Convert to RGB
	face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)

	# Preprocess image (BGR)
	face_image = face_preprocess(face_image).unsqueeze(0).to(device)

	# Inference to get feature
	emb_img_face = recognizer(face_image).cpu().numpy()

	# Normalize the feature
	images_emb = emb_img_face / np.linalg.norm(emb_img_face)

	return images_emb

	def recognition(face_image):
	query_emb = get_feature(face_image)
	score, id_min = compare_encodings(query_emb, images_embs)
	name = images_names[id_min]
	score = score[0]

	if score < 0.25:
	return "UNKNOWN", score
	else:
	return name, score

	# Tracking logic with landmarks and face name mapping
	# def process_tracking(frame, detector, tracker, args, frame_id, fps):
	# outputs, img_info, bboxes, landmarks = detector.detect_tracking(image=frame)

	# tracking_tlwhs = []
	# tracking_ids = []
	# tracking_scores = []
	# tracking_bboxes = []

	# if outputs is not None:
	# online_targets = tracker.update(outputs, [img_info["height"], img_info["width"]], (128, 128))

	# for i in range(len(online_targets)):
	# t = online_targets[i]
	# tlwh = t.tlwh
	# tid = t.track_id
	# vertical = tlwh[2] / tlwh[3] > args["aspect_ratio_thresh"]
	# if tlwh[2] * tlwh[3] > args["min_box_area"] and not vertical:
	# x1, y1, w, h = tlwh
	# tracking_bboxes.append([x1, y1, x1 + w, y1 + h])
	# tracking_tlwhs.append(tlwh)
	# tracking_ids.append(tid)
	# tracking_scores.append(t.score)

	# tracking_image = plot_tracking(
	# img_info["raw_img"],
	# tracking_tlwhs,
	# tracking_ids,
	# names=id_face_mapping, # This maps the face names to tracking IDs
	# frame_id=frame_id + 1,
	# fps=fps,
	# )
	# else:
	# tracking_image = img_info["raw_img"]

	# # For every detected face, recognize the face and map the ID
	# for i, bbox in enumerate(bboxes):
	# if i < len(landmarks):
	# face_alignment = norm_crop(img=img_info["raw_img"], landmark=landmarks[i])
	# name, score = recognition(face_image=face_alignment)
	# id_face_mapping[tracking_ids[i]] = f"{name}:{score:.2f}"

	# # Print the tracking ID, face name, and detection landmarks


	# data_mapping["raw_image"] = img_info["raw_img"]
	# data_mapping["detection_bboxes"] = bboxes
	# data_mapping["detection_landmarks"] = landmarks
	# data_mapping["tracking_ids"] = tracking_ids
	# data_mapping["tracking_bboxes"] = tracking_bboxes

	# return tracking_image
	def process_tracking(frame, detector, tracker, args, frame_id, fps):
	outputs, img_info, bboxes, landmarks = detector.detect_tracking(image=frame)

	tracking_tlwhs = []
	tracking_ids = []
	tracking_scores = []
	tracking_bboxes = []

	if outputs is not None and len(bboxes) > 0:
	# Perform face tracking
	online_targets = tracker.update(outputs, [img_info["height"], img_info["width"]], (128, 128))

	for i in range(len(online_targets)):
	t = online_targets[i]
	tlwh = t.tlwh
	tid = t.track_id
	vertical = tlwh[2] / tlwh[3] > args["aspect_ratio_thresh"]

	# Ensure box area is large enough and not vertical
	if tlwh[2] * tlwh[3] > args["min_box_area"] and not vertical:
	x1, y1, w, h = tlwh
	tracking_bboxes.append([x1, y1, x1 + w, y1 + h])
	tracking_tlwhs.append(tlwh)
	tracking_ids.append(tid)
	tracking_scores.append(t.score)

	# Draw tracking results with names
	tracking_image = plot_tracking(
	img_info["raw_img"],
	tracking_tlwhs,
	tracking_ids,
	names=id_face_mapping, # Map face names to tracking IDs
	frame_id=frame_id + 1,
	fps=fps,
	)
	else:
	# If no detections, just return the raw image
	tracking_image = img_info["raw_img"]

	# For every detected face, perform recognition and map the face ID
	for i, bbox in enumerate(bboxes):
	if i < len(landmarks):
	try:
	face_alignment = norm_crop(img=img_info["raw_img"], landmark=landmarks[i])
	name, score = recognition(face_image=face_alignment)

	# Ensure we do not get index errors when assigning names to IDs
	if i < len(tracking_ids):
	id_face_mapping[tracking_ids[i]] = f"{name}:{score:.2f}"

	# Draw the name on the live image near the bounding box
	x1, y1, x2, y2 = tracking_bboxes[i]
	cv2.putText(tracking_image, (int(x1), int(y1) - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
	else:
	print(f"Tracking ID {i} not found. Skipping name assignment.")
	except IndexError as e:
	print(f"Index error during face recognition: {e}. Skipping this face.")
	except Exception as e:
	print(f"Unexpected error during recognition: {e}. Skipping this face.")

	# Update data mapping for other parts of the system
	data_mapping["raw_image"] = img_info["raw_img"]
	data_mapping["detection_bboxes"] = bboxes
	data_mapping["detection_landmarks"] = landmarks
	data_mapping["tracking_ids"] = tracking_ids
	data_mapping["tracking_bboxes"] = tracking_bboxes

	return tracking_image

	# Flask route to display video stream
	@app.route('/video_feed')
	def video_feed():
	return Response(generate_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')

	# Frame generator
	def generate_frames():
	config_tracking = load_config("./face_tracking/config/config_tracking.yaml")
	tracker = BYTETracker(args=config_tracking, frame_rate=30)
	frame_id = 0
	cap = cv2.VideoCapture(0)
	start_time = time.time_ns()
	frame_count = 0
	fps = -1

	global paused

	while True:
	if not paused:
	_, img = cap.read()
	tracking_image = process_tracking(img, detector, tracker, config_tracking, frame_id, fps)

	frame_count += 1
	if frame_count >= 30:
	fps = 1e9 * frame_count / (time.time_ns() - start_time)
	frame_count = 0
	start_time = time.time_ns()

	# Encode frame as JPEG
	ret, buffer = cv2.imencode('.jpg', tracking_image)
	frame = buffer.tobytes()

	yield (b'--frame\r\n'
	b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
	else:
	time.sleep(0.1) # Small sleep to avoid busy-waiting

	# Route to toggle pause state
	@app.route('/toggle_pause', methods=['POST'])
	def toggle_pause():
	global paused
	paused = not paused
	return jsonify({'paused': paused})

	# Home route
	@app.route('/')
	def index():
	return render_template('indx.html')
	@app.route('/data')
	def data():
	return render_template('data.html')
	# Main entry point
	if __name__ == '__main__':
	app.run(debug=True)