์ธ๊ณต์ง€๋Šฅ(AI) ๐Ÿ“š

MediaPipe Holistic์„ ํ™œ์šฉํ•œ ์‚ฌ์šฉ์ž ํŠน์ •ํ–‰๋™ ์ˆ˜ํ–‰ ์—ฌ๋ถ€ ๊ฐ์ง€

leejaejae 2024. 6. 19. 15:20

MediaPipe Holistic์„ ํ™œ์šฉํ•˜๋ฉด ์ด๋ฏธ์ง€์™€ ๋น„๋””์˜ค์—์„œ ์‚ฌ์šฉ์ž์˜ ํฌ์ฆˆ๋ฅผ ๋ถ„์„ํ•˜๊ณ , ํŠน์ • ํ–‰๋™์„ ๊ฐ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์ด๋ฒˆ ํฌ์ŠคํŒ…์—์„œ๋Š” ์ด๋ฏธ์ง€๋ฅผ ๋ถ„์„ํ•˜์—ฌ ๋žœ๋“œ๋งˆํฌ๋ฅผ ์ถ”์ถœํ•˜๊ณ , ๋น„๋””์˜ค์—์„œ ๊ฐ€์žฅ ์ผ์น˜ํ•˜๋Š” ํ”„๋ ˆ์ž„์„ ์ฐพ๋Š” ๋ฐฉ๋ฒ•์„ ์•Œ์•„๋ณด๊ฒ ์Šต๋‹ˆ๋‹ค.

< ๋ชฉ์ฐจ >

1. ํ™˜๊ฒฝ ์„ค์ • ๋ฐ ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ค€๋น„
2. ์ด๋ฏธ์ง€์—์„œ ๋žœ๋“œ๋งˆํฌ ์ถ”์ถœ ๋ฐ ํ‘œ์‹œ
3. ๋น„๋””์˜ค ํŒŒ์ผ ์„ค์ • ๋ฐ ์ฒ˜๋ฆฌ
4. ๋žœ๋“œ๋งˆํฌ ๋น„๊ต ํ•จ์ˆ˜ ์ •์˜
5. ๋น„๋””์˜ค ํ”„๋ ˆ์ž„ ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ €์žฅ
6. ๊ฒฐ๊ณผ

 


1. ํ™˜๊ฒฝ ์„ค์ • ๋ฐ ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ค€๋น„

๋จผ์ € ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์ž„ํฌํŠธํ•˜๊ณ , ์ด๋ฏธ์ง€ ํŒŒ์ผ ๊ฒฝ๋กœ์™€ ์ถœ๋ ฅ ๊ฒฝ๋กœ๋ฅผ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.

import os
import cv2
import mediapipe as mp
import json

# MediaPipe ์„ค์ •
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

# ์ด๋ฏธ์ง€ ํŒŒ์ผ ๊ฒฝ๋กœ
image_path = 'test.png'
output_image_path = 'accuracy_test_image/accuracy_answer_landmark.png'

# ๋””๋ ‰ํ† ๋ฆฌ ํ™•์ธ ๋ฐ ์ƒ์„ฑ
output_image_dir = os.path.dirname(output_image_path)
if not os.path.exists(output_image_dir):
    os.makedirs(output_image_dir)

์œ„ ์ฝ”๋“œ๋Š” ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์ž„ํฌํŠธํ•˜๊ณ , ์ด๋ฏธ์ง€ ํŒŒ์ผ ๊ฒฝ๋กœ์™€ ์ถœ๋ ฅ ๊ฒฝ๋กœ๋ฅผ ์„ค์ •ํ•˜๋ฉฐ, ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ๊ฐ€ ์—†์œผ๋ฉด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.

 

2. ์ด๋ฏธ์ง€์—์„œ ๋žœ๋“œ๋งˆํฌ ์ถ”์ถœ ๋ฐ ํ‘œ์‹œ

์ด๋ฏธ์ง€์—์„œ ๋žœ๋“œ๋งˆํฌ๋ฅผ ์ถ”์ถœํ•˜๊ณ , ์ด๋ฅผ ์ด๋ฏธ์ง€์— ํ‘œ์‹œํ•˜๋Š” ํ•จ์ˆ˜๋ฅผ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.

def extract_and_draw_landmarks_from_image(image_path, output_image_path):
    image = cv2.imread(image_path)
    height, width, _ = image.shape
    with mp_holistic.Holistic(
        static_image_mode=True,
        model_complexity=2,
        enable_segmentation=False,
        refine_face_landmarks=True) as holistic:
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = holistic.process(image_rgb)

        if results.pose_landmarks:
            # ๋ฐ”์šด๋”ฉ ๋ฐ•์Šค ์ขŒํ‘œ ์ถ”์ถœ
            x_min = min([lm.x for lm in results.pose_landmarks.landmark])
            x_max = max([lm.x for lm in results.pose_landmarks.landmark])
            y_min = min([lm.y for lm in results.pose_landmarks.landmark])
            y_max = max([lm.y for lm in results.pose_landmarks.landmark])
            
            # ๋ฐ”์šด๋”ฉ ๋ฐ•์Šค ๊ทธ๋ฆฌ๊ธฐ
            cv2.rectangle(image, (int(x_min * width), int(y_min * height)), (int(x_max * width), (int(y_max * height)), (0, 255, 0), 2)
            
            # ๋žœ๋“œ๋งˆํฌ ๊ทธ๋ฆฌ๊ธฐ
            mp_drawing.draw_landmarks(
                image,
                results.pose_landmarks,
                mp_holistic.POSE_CONNECTIONS,
                landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2)
            )
            cv2.imwrite(output_image_path, image)
            
            # ์ •๊ทœํ™”๋œ ๋žœ๋“œ๋งˆํฌ ์ขŒํ‘œ ๋ฐ˜ํ™˜
            return [{'x': (lm.x - x_min) / (x_max - x_min), 'y': (lm.y - y_min) / (y_max - y_min), 'z': lm.z} for lm in results.pose_landmarks.landmark]
        else:
            return None

์œ„ ์ฝ”๋“œ๋Š” ์ด๋ฏธ์ง€๋ฅผ ์ฝ๊ณ , MediaPipe Holistic ๋ชจ๋ธ์„ ์ด์šฉํ•ด ๋žœ๋“œ๋งˆํฌ๋ฅผ ์ถ”์ถœํ•œ ํ›„, ์ด๋ฅผ ์ด๋ฏธ์ง€์— ๊ทธ๋ฆฝ๋‹ˆ๋‹ค. ์ถ”์ถœ๋œ ๋žœ๋“œ๋งˆํฌ๋Š” ์ •๊ทœํ™”ํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.

 

3. ๋น„๋””์˜ค ํŒŒ์ผ ์„ค์ • ๋ฐ ์ฒ˜๋ฆฌ

๋น„๋””์˜ค ํŒŒ์ผ์„ ์ฝ์–ด ๋žœ๋“œ๋งˆํฌ๋ฅผ ์ถ”์ถœํ•˜๊ณ , ์ด๋ฏธ์ง€์˜ ๋žœ๋“œ๋งˆํฌ์™€ ๋น„๊ตํ•˜์—ฌ ๊ฐ€์žฅ ์ผ์น˜ํ•˜๋Š” ํ”„๋ ˆ์ž„์„ ์ฐพ์Šต๋‹ˆ๋‹ค.

# ๋น„๋””์˜ค ํŒŒ์ผ ๊ฒฝ๋กœ
video_path = 'ppt_data/ppt_test.mp4'

# JSON ๋ฐ ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ €์žฅ ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
output_json_path = 'accuracy_test_video/accuracy_answer.json'
output_video_path = 'accuracy_test_video/accuracy_test_landmark.mp4'

if not os.path.exists(os.path.dirname(output_json_path)):
    os.makedirs(os.path.dirname(output_json_path))

# ๋น„๋””์˜ค ํŒŒ์ผ ์—ด๊ธฐ
cap = cv2.VideoCapture(video_path)
frame_count = 0

# ๋น„๋””์˜ค ์ €์žฅ ์„ค์ •
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

์œ„ ์ฝ”๋“œ๋Š” ๋น„๋””์˜ค ํŒŒ์ผ์„ ์—ด๊ณ , ์ถœ๋ ฅ ๋น„๋””์˜ค ํŒŒ์ผ์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.

 

4. ๋žœ๋“œ๋งˆํฌ ๋น„๊ต ํ•จ์ˆ˜ ์ •์˜

์ด๋ฏธ์ง€์™€ ๋น„๋””์˜ค์˜ ๋žœ๋“œ๋งˆํฌ๋ฅผ ๋น„๊ตํ•˜๋Š” ํ•จ์ˆ˜๋ฅผ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค

def compare_landmarks(landmarks1, landmarks2, threshold=0.05):
    matching_points = 0
    for lm1, lm2 in zip(landmarks1, landmarks2):
        if abs(lm1['x'] - lm2['x']) <= threshold and abs(lm1['y'] - lm2['y']) <= threshold and abs(lm1['z'] - lm2['z']) <= threshold:
            matching_points += 1
    return matching_points

์œ„ ์ฝ”๋“œ๋Š” ๋‘ ๋žœ๋“œ๋งˆํฌ ์„ธํŠธ๋ฅผ ๋น„๊ตํ•˜์—ฌ ์ผ์ • ์ž„๊ณ„๊ฐ’ ์ด๋‚ด์— ์žˆ๋Š” ์ ๋“ค์˜ ์ˆ˜๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.

 

5. ๋น„๋””์˜ค ํ”„๋ ˆ์ž„ ๋ถ„์„ ๋ฐ ๊ฒฐ๊ณผ ์ €์žฅ

๋น„๋””์˜ค๋ฅผ ํ”„๋ ˆ์ž„ ๋‹จ์œ„๋กœ ๋ถ„์„ํ•˜์—ฌ ๊ฐ€์žฅ ์ผ์น˜ํ•˜๋Š” ํ”„๋ ˆ์ž„์„ ์ฐพ๊ณ , ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.

best_frame = None
best_landmarks = None
best_frame_index = -1
max_matching_points = 0

with mp_holistic.Holistic(
    static_image_mode=False,
    model_complexity=2,
    enable_segmentation=True,
    refine_face_landmarks=True) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic.process(image)

        if results.pose_landmarks:
            # ๋ฐ”์šด๋”ฉ ๋ฐ•์Šค ์ขŒํ‘œ ์ถ”์ถœ
            x_min = min([lm.x for lm in results.pose_landmarks.landmark])
            x_max = max([lm.x for lm in results.pose_landmarks.landmark])
            y_min = min([lm.y for lm in results.pose_landmarks.landmark])
            y_max = max([lm.y for lm in results.pose_landmarks.landmark])
            
            # ๋ฐ”์šด๋”ฉ ๋ฐ•์Šค ๊ทธ๋ฆฌ๊ธฐ
            cv2.rectangle(frame, (int(x_min * width), int(y_min * height)), (int(x_max * width), int(y_max * height)), (0, 255, 0), 2)
            
            # ์ •๊ทœํ™”๋œ ๋žœ๋“œ๋งˆํฌ ์ขŒํ‘œ ์ถ”์ถœ
            video_landmarks = [{'x': (lm.x - x_min) / (x_max - x_min), 'y': (lm.y - y_min) / (y_max - y_min), 'z': lm.z} for lm in results.pose_landmarks.landmark]
            
            matching_points = compare_landmarks(image_landmarks, video_landmarks)
            print(f"ํ”„๋ ˆ์ž„ {frame_count}์—์„œ ์ผ์น˜ํ•˜๋Š” ๋žœ๋“œ๋งˆํฌ ์ˆ˜: {matching_points}")
            
            if matching_points > max_matching_points:
                max_matching_points = matching_points
                best_frame = frame.copy()
                best_landmarks = video_landmarks.copy()
                best_frame_index = frame_count
        
        # ๋žœ๋“œ๋งˆํฌ๋ฅผ ํ”„๋ ˆ์ž„์— ํ‘œ์‹œ
        if results.pose_landmarks:
            mp_drawing.draw_landmarks(
                frame,
                results.pose_landmarks,
                mp_holistic.POSE_CONNECTIONS,
                landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2)
            )
        
        # ํ”„๋ ˆ์ž„ ์ €์žฅ
        out.write(frame)
        frame_count += 1

cap.release()
out.release()
print(f"์ด ์ฒ˜๋ฆฌ๋œ ํ”„๋ ˆ์ž„ ์ˆ˜: {frame_count}")

# ๊ฐ€์žฅ ์ผ์น˜ํ•˜๋Š” ๋žœ๋“œ๋งˆํฌ๊ฐ€ ๋งŽ์€ ํ”„๋ ˆ์ž„์„ ์ €์žฅ
if best_frame is not None:
    cv2.imwrite('answer/answer.png', best_frame)
    print(f"๊ฐ€์žฅ ์ผ์น˜ํ•˜๋Š” ํ”„๋ ˆ์ž„์„ answer.png๋กœ ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
    # ๋žœ๋“œ๋งˆํฌ ๊ฐ’์„ JSON์œผ๋กœ ์ €์žฅ
    with open(output_json_path, 'w') as json_file:
        json.dump({'frame_index': best_frame_index, 'landmarks': best_landmarks}, json_file, indent=2)
    print(f"๊ฐ€์žฅ ์ผ์น˜ํ•˜๋Š” ํ”„๋ ˆ์ž„์˜ ๋žœ๋“œ๋งˆํฌ๋ฅผ {output_json_path}์— ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
else:
    print("์ผ์น˜ํ•˜๋Š” ํ”„๋ ˆ์ž„์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")

์œ„ ์ฝ”๋“œ๋Š” ๋น„๋””์˜ค ํ”„๋ ˆ์ž„์„ ํ•˜๋‚˜์”ฉ ์ฒ˜๋ฆฌํ•˜์—ฌ ๋žœ๋“œ๋งˆํฌ๋ฅผ ์ถ”์ถœํ•˜๊ณ , ์ด๋ฏธ์ง€์˜ ๋žœ๋“œ๋งˆํฌ์™€ ๋น„๊ตํ•˜์—ฌ ๊ฐ€์žฅ ์ผ์น˜ํ•˜๋Š” ํ”„๋ ˆ์ž„์„ ์ฐพ์Šต๋‹ˆ๋‹ค. ์ตœ์ข… ๊ฒฐ๊ณผ๋ฅผ ์ด๋ฏธ์ง€์™€ JSON ํŒŒ์ผ๋กœ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.

 

6. ๊ฒฐ๊ณผ

์ •๋‹ต์ด๋ฏธ์ง€

๋น„๊ต ๋น„๋””์˜ค