MediaPipe Holistic์ ํ์ฉํ๋ฉด ์ด๋ฏธ์ง์ ๋น๋์ค์์ ์ฌ์ฉ์์ ํฌ์ฆ๋ฅผ ๋ถ์ํ๊ณ , ํน์ ํ๋์ ๊ฐ์งํ ์ ์์ต๋๋ค. ์ด๋ฒ ํฌ์คํ ์์๋ ์ด๋ฏธ์ง๋ฅผ ๋ถ์ํ์ฌ ๋๋๋งํฌ๋ฅผ ์ถ์ถํ๊ณ , ๋น๋์ค์์ ๊ฐ์ฅ ์ผ์นํ๋ ํ๋ ์์ ์ฐพ๋ ๋ฐฉ๋ฒ์ ์์๋ณด๊ฒ ์ต๋๋ค.
< ๋ชฉ์ฐจ >
1. ํ๊ฒฝ ์ค์ ๋ฐ ์ด๋ฏธ์ง ํ์ผ ์ค๋น
2. ์ด๋ฏธ์ง์์ ๋๋๋งํฌ ์ถ์ถ ๋ฐ ํ์
3. ๋น๋์ค ํ์ผ ์ค์ ๋ฐ ์ฒ๋ฆฌ
4. ๋๋๋งํฌ ๋น๊ต ํจ์ ์ ์
5. ๋น๋์ค ํ๋ ์ ๋ถ์ ๋ฐ ๊ฒฐ๊ณผ ์ ์ฅ
6. ๊ฒฐ๊ณผ
1. ํ๊ฒฝ ์ค์ ๋ฐ ์ด๋ฏธ์ง ํ์ผ ์ค๋น
๋จผ์ ํ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ํฌํธํ๊ณ , ์ด๋ฏธ์ง ํ์ผ ๊ฒฝ๋ก์ ์ถ๋ ฅ ๊ฒฝ๋ก๋ฅผ ์ค์ ํฉ๋๋ค.
import os
import cv2
import mediapipe as mp
import json
# MediaPipe ์ค์
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic
# ์ด๋ฏธ์ง ํ์ผ ๊ฒฝ๋ก
image_path = 'test.png'
output_image_path = 'accuracy_test_image/accuracy_answer_landmark.png'
# ๋๋ ํ ๋ฆฌ ํ์ธ ๋ฐ ์์ฑ
output_image_dir = os.path.dirname(output_image_path)
if not os.path.exists(output_image_dir):
os.makedirs(output_image_dir)
์ ์ฝ๋๋ ํ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ํฌํธํ๊ณ , ์ด๋ฏธ์ง ํ์ผ ๊ฒฝ๋ก์ ์ถ๋ ฅ ๊ฒฝ๋ก๋ฅผ ์ค์ ํ๋ฉฐ, ์ถ๋ ฅ ๋๋ ํ ๋ฆฌ๊ฐ ์์ผ๋ฉด ์์ฑํฉ๋๋ค.
2. ์ด๋ฏธ์ง์์ ๋๋๋งํฌ ์ถ์ถ ๋ฐ ํ์
์ด๋ฏธ์ง์์ ๋๋๋งํฌ๋ฅผ ์ถ์ถํ๊ณ , ์ด๋ฅผ ์ด๋ฏธ์ง์ ํ์ํ๋ ํจ์๋ฅผ ์ ์ํฉ๋๋ค.
def extract_and_draw_landmarks_from_image(image_path, output_image_path):
image = cv2.imread(image_path)
height, width, _ = image.shape
with mp_holistic.Holistic(
static_image_mode=True,
model_complexity=2,
enable_segmentation=False,
refine_face_landmarks=True) as holistic:
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = holistic.process(image_rgb)
if results.pose_landmarks:
# ๋ฐ์ด๋ฉ ๋ฐ์ค ์ขํ ์ถ์ถ
x_min = min([lm.x for lm in results.pose_landmarks.landmark])
x_max = max([lm.x for lm in results.pose_landmarks.landmark])
y_min = min([lm.y for lm in results.pose_landmarks.landmark])
y_max = max([lm.y for lm in results.pose_landmarks.landmark])
# ๋ฐ์ด๋ฉ ๋ฐ์ค ๊ทธ๋ฆฌ๊ธฐ
cv2.rectangle(image, (int(x_min * width), int(y_min * height)), (int(x_max * width), (int(y_max * height)), (0, 255, 0), 2)
# ๋๋๋งํฌ ๊ทธ๋ฆฌ๊ธฐ
mp_drawing.draw_landmarks(
image,
results.pose_landmarks,
mp_holistic.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2)
)
cv2.imwrite(output_image_path, image)
# ์ ๊ทํ๋ ๋๋๋งํฌ ์ขํ ๋ฐํ
return [{'x': (lm.x - x_min) / (x_max - x_min), 'y': (lm.y - y_min) / (y_max - y_min), 'z': lm.z} for lm in results.pose_landmarks.landmark]
else:
return None
์ ์ฝ๋๋ ์ด๋ฏธ์ง๋ฅผ ์ฝ๊ณ , MediaPipe Holistic ๋ชจ๋ธ์ ์ด์ฉํด ๋๋๋งํฌ๋ฅผ ์ถ์ถํ ํ, ์ด๋ฅผ ์ด๋ฏธ์ง์ ๊ทธ๋ฆฝ๋๋ค. ์ถ์ถ๋ ๋๋๋งํฌ๋ ์ ๊ทํํ์ฌ ๋ฐํํฉ๋๋ค.
3. ๋น๋์ค ํ์ผ ์ค์ ๋ฐ ์ฒ๋ฆฌ
๋น๋์ค ํ์ผ์ ์ฝ์ด ๋๋๋งํฌ๋ฅผ ์ถ์ถํ๊ณ , ์ด๋ฏธ์ง์ ๋๋๋งํฌ์ ๋น๊ตํ์ฌ ๊ฐ์ฅ ์ผ์นํ๋ ํ๋ ์์ ์ฐพ์ต๋๋ค.
# ๋น๋์ค ํ์ผ ๊ฒฝ๋ก
video_path = 'ppt_data/ppt_test.mp4'
# JSON ๋ฐ ์ด๋ฏธ์ง ํ์ผ ์ ์ฅ ๋๋ ํ ๋ฆฌ ์ค์
output_json_path = 'accuracy_test_video/accuracy_answer.json'
output_video_path = 'accuracy_test_video/accuracy_test_landmark.mp4'
if not os.path.exists(os.path.dirname(output_json_path)):
os.makedirs(os.path.dirname(output_json_path))
# ๋น๋์ค ํ์ผ ์ด๊ธฐ
cap = cv2.VideoCapture(video_path)
frame_count = 0
# ๋น๋์ค ์ ์ฅ ์ค์
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
์ ์ฝ๋๋ ๋น๋์ค ํ์ผ์ ์ด๊ณ , ์ถ๋ ฅ ๋น๋์ค ํ์ผ์ ์ค์ ํฉ๋๋ค.
4. ๋๋๋งํฌ ๋น๊ต ํจ์ ์ ์
์ด๋ฏธ์ง์ ๋น๋์ค์ ๋๋๋งํฌ๋ฅผ ๋น๊ตํ๋ ํจ์๋ฅผ ์ ์ํฉ๋๋ค
def compare_landmarks(landmarks1, landmarks2, threshold=0.05):
matching_points = 0
for lm1, lm2 in zip(landmarks1, landmarks2):
if abs(lm1['x'] - lm2['x']) <= threshold and abs(lm1['y'] - lm2['y']) <= threshold and abs(lm1['z'] - lm2['z']) <= threshold:
matching_points += 1
return matching_points
์ ์ฝ๋๋ ๋ ๋๋๋งํฌ ์ธํธ๋ฅผ ๋น๊ตํ์ฌ ์ผ์ ์๊ณ๊ฐ ์ด๋ด์ ์๋ ์ ๋ค์ ์๋ฅผ ๋ฐํํฉ๋๋ค.
5. ๋น๋์ค ํ๋ ์ ๋ถ์ ๋ฐ ๊ฒฐ๊ณผ ์ ์ฅ
๋น๋์ค๋ฅผ ํ๋ ์ ๋จ์๋ก ๋ถ์ํ์ฌ ๊ฐ์ฅ ์ผ์นํ๋ ํ๋ ์์ ์ฐพ๊ณ , ๊ฒฐ๊ณผ๋ฅผ ์ ์ฅํฉ๋๋ค.
best_frame = None
best_landmarks = None
best_frame_index = -1
max_matching_points = 0
with mp_holistic.Holistic(
static_image_mode=False,
model_complexity=2,
enable_segmentation=True,
refine_face_landmarks=True) as holistic:
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = holistic.process(image)
if results.pose_landmarks:
# ๋ฐ์ด๋ฉ ๋ฐ์ค ์ขํ ์ถ์ถ
x_min = min([lm.x for lm in results.pose_landmarks.landmark])
x_max = max([lm.x for lm in results.pose_landmarks.landmark])
y_min = min([lm.y for lm in results.pose_landmarks.landmark])
y_max = max([lm.y for lm in results.pose_landmarks.landmark])
# ๋ฐ์ด๋ฉ ๋ฐ์ค ๊ทธ๋ฆฌ๊ธฐ
cv2.rectangle(frame, (int(x_min * width), int(y_min * height)), (int(x_max * width), int(y_max * height)), (0, 255, 0), 2)
# ์ ๊ทํ๋ ๋๋๋งํฌ ์ขํ ์ถ์ถ
video_landmarks = [{'x': (lm.x - x_min) / (x_max - x_min), 'y': (lm.y - y_min) / (y_max - y_min), 'z': lm.z} for lm in results.pose_landmarks.landmark]
matching_points = compare_landmarks(image_landmarks, video_landmarks)
print(f"ํ๋ ์ {frame_count}์์ ์ผ์นํ๋ ๋๋๋งํฌ ์: {matching_points}")
if matching_points > max_matching_points:
max_matching_points = matching_points
best_frame = frame.copy()
best_landmarks = video_landmarks.copy()
best_frame_index = frame_count
# ๋๋๋งํฌ๋ฅผ ํ๋ ์์ ํ์
if results.pose_landmarks:
mp_drawing.draw_landmarks(
frame,
results.pose_landmarks,
mp_holistic.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2)
)
# ํ๋ ์ ์ ์ฅ
out.write(frame)
frame_count += 1
cap.release()
out.release()
print(f"์ด ์ฒ๋ฆฌ๋ ํ๋ ์ ์: {frame_count}")
# ๊ฐ์ฅ ์ผ์นํ๋ ๋๋๋งํฌ๊ฐ ๋ง์ ํ๋ ์์ ์ ์ฅ
if best_frame is not None:
cv2.imwrite('answer/answer.png', best_frame)
print(f"๊ฐ์ฅ ์ผ์นํ๋ ํ๋ ์์ answer.png๋ก ์ ์ฅํ์ต๋๋ค.")
# ๋๋๋งํฌ ๊ฐ์ JSON์ผ๋ก ์ ์ฅ
with open(output_json_path, 'w') as json_file:
json.dump({'frame_index': best_frame_index, 'landmarks': best_landmarks}, json_file, indent=2)
print(f"๊ฐ์ฅ ์ผ์นํ๋ ํ๋ ์์ ๋๋๋งํฌ๋ฅผ {output_json_path}์ ์ ์ฅํ์ต๋๋ค.")
else:
print("์ผ์นํ๋ ํ๋ ์์ ์ฐพ์ ์ ์์ต๋๋ค.")
์ ์ฝ๋๋ ๋น๋์ค ํ๋ ์์ ํ๋์ฉ ์ฒ๋ฆฌํ์ฌ ๋๋๋งํฌ๋ฅผ ์ถ์ถํ๊ณ , ์ด๋ฏธ์ง์ ๋๋๋งํฌ์ ๋น๊ตํ์ฌ ๊ฐ์ฅ ์ผ์นํ๋ ํ๋ ์์ ์ฐพ์ต๋๋ค. ์ต์ข ๊ฒฐ๊ณผ๋ฅผ ์ด๋ฏธ์ง์ JSON ํ์ผ๋ก ์ ์ฅํฉ๋๋ค.
6. ๊ฒฐ๊ณผ
์ ๋ต์ด๋ฏธ์ง
๋น๊ต ๋น๋์ค