python -m tf2onnx.convert --opset 11 --tflite pose_landmark_lite.tflite --output pose_landmark_lite.onnx
onnx 推理代码:
import os
import cv2
import onnxruntime
import time
os.environ['TF_CPP_MIN_LOG_LEVEL']='3'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import numpy as np
if __name__ == '__main__':
for aa in [1,2]:
# img_path=f'./166_{aa}.jpg'
img_path=f'./03.jpg'
img=cv2.imread(img_path)
if img is None:
exit(1)
t_h, t_w = img.shape[:2]
a_w = max(t_w, t_h)
img_b = np.zeros((a_w, a_w, 3), dtype=np.uint8)
if t_h > t_w:
img_b[:,
AI识别人可以分成五个层次,依次为:
1.有没有人? object detection
2.人在哪里? object localization & semantic segmentation
3.这个人是谁? face identification
4.这个人此刻处于什么状态?
pose
estimation
5.这个人在当前一段时间里在做什么? Sequence action recognition
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示例代码 yolov5
姿态
识别示
# Set up the model
MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth'
OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth'
WIDTH = 224
HEIGHT = 224
# Load the model
model = trt_
pose
.models.resnet18_baseline_att(num_classes=2, pretrained=True)
model.load_state_dict(torch.load(MODEL_WEIGHTS))
model = model.cuda().eval()
# Set up the optimized model
data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()
model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)
torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)
# Load the optimized model
model_trt = TRTModule()
model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))
# Load the COCO classes
with o
pe
n('human_
pose
.json', 'r') as f:
human_
pose
= json.load(f)
# Load the keypoints
num_parts = len(human_
pose
['keypoints'])
part_names = []
part_edges = []
for i in range(num_parts):
part_names.ap
pe
nd(human_
pose
['keypoints'][i])
if 'skeleton' in human_
pose
:
skel = human_
pose
['skeleton']
for j in range(len(skel)):
if skel[j][0] == i:
part_edges.ap
pe
nd((skel[j][0], skel[j][1]))
# Set up the transforms
transform = transforms.Com
pose
([
transforms.Resize((HEIGHT, WIDTH)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# Set up the camera
cap = cv2.VideoCapture(0)
if not cap.isO
pe
ned():
print("Cannot o
pe
n camera")
exit()
while True:
# Capture frame-by-frame
ret, frame = cap.read()
# Process the frame
image = PIL.Image.fromarray(frame).convert('RGB')
image = transform(image).cuda()
cmap, paf = model_trt(image[None, ...])
cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
counts, objects,
pe
aks = trt_
pose
.models.parse_objects(cmap, paf)
# Draw the keypoints
for i in range(counts[0]):
keypoints = objects[0].keypoints
x = keypoints[:, 2]
y = keypoints[:, 1]
for j in range(num_parts):
if x[j] > 0 and y[j] > 0:
cv2.circle(frame, (int(x[j]), int(y[j])), 3, (0, 255, 255), thickness=-1, lineTy
pe
=cv2.FILLED)
for j, e in enumerate(part_edges):
if x[e].min() > 0 and y[e].min() > 0:
cv2.line(frame, (int(x[e[0]]), int(y[e[0]])), (int(x[e[1]]), int(y[e[1]])), (0, 255, 255), 2)
# Display the resulting frame
cv2.imshow('frame', frame)
if cv2.waitKey(1) == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()