针对目标被遮挡的情况,使用卡尔曼滤波进行轨迹预测,试验表明,具有较好的轨迹预测效果
观测值:传感器读数,因为某些客观原因,可能存在较大误差。 估计值:也称为先验估计,卡尔曼滤波的状态转移方程进行估计。 最优估计值:也称为后验估计,综合观测值和先验估计得到后验估计。 后验估计基于估计值和观测值进行综合,如下一帧的预测值和下一帧检测值,下一帧的预测值是根据当前帧得到的 先估计(先验估计),后更新(后验估计),更新的除了状态量,也包括状态协方差矩阵,增益矩阵
使用卡尔曼滤波实现单目标跟踪过程中的目标运动轨迹预测
两个版本的代码版本1,直接读取用检测算法生成的目标框文件txt1.1 main.py1.2 utils.py
版本2,用检测算法实时生成目标框坐标2.1 ai-kalman.py2.1 AIDetector_pytorch.py
卡尔曼理论卡尔曼滤波中的式子p(w) ~ N(0,Q),p(v) ~ N(0,R)实现过程中的问题积累1,获取tensor里的值
参考连接
两个版本的代码
版本1,直接读取用检测算法生成的目标框文件txt
单目标跟踪 检测器获得检测框,全程只赋予1个ID,有两个相同的东西进来时,不会丢失唯一跟踪目标 检测器的检测框为观测值 目标的状态X = [x,y,h,w,delta_x,delta_y],中心坐标,宽高,中心坐标速度 观测值 如何寻找目标的观测值 观测到的是N个框 怎么找到目标的观测值 t时刻的框与t-1后验估计时刻IOU最大的框的那个作为观测值(存在误差,交叉情况下观测值会有误差) 所以需要使用先验估计值进行融合
1.1 main.py
import os
import cv2
import numpy as np
from utils import plot_one_box, cal_iou, xyxy_to_xywh, xywh_to_xyxy, updata_trace_list, draw_trace
# 单目标跟踪
# 检测器获得检测框,全程只赋予1个ID,有两个相同的东西进来时,不会丢失唯一跟踪目标
# 检测器的检测框为测量值
# 目标的状态X = [x,y,h,w,delta_x,delta_y],中心坐标,宽高,中心坐标速度
# 观测值
# 如何寻找目标的观测值
# 观测到的是N个框
# 怎么找到目标的观测值
# t时刻的框与t-1后验估计时刻IOU最大的框的那个作为观测值(存在误差,交叉情况下观测值会有误差)
# 所以需要使用先验估计值进行融合
#
# 状态初始化
initial_target_box = [729, 238, 764, 339] # 目标初始bouding box
# initial_target_box = [193 ,342 ,250 ,474]
initial_box_state = xyxy_to_xywh(initial_target_box)
initial_state = np.array([[initial_box_state[0], initial_box_state[1], initial_box_state[2], initial_box_state[3],
0, 0]]).T # [中心x,中心y,宽w,高h,dx,dy]
IOU_Threshold = 0.3 # 匹配时的阈值
# 状态转移矩阵,上一时刻的状态转移到当前时刻
A = np.array([[1, 0, 0, 0, 1, 0],
[0, 1, 0, 0, 0, 1],
[0, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 1]])
# 状态观测矩阵,即教程中的大C
H = np.eye(6)
# 过程噪声w协方差矩阵Q,p(w)~N(0,Q),噪声w来自真实世界中的不确定性,w服从均值为0、协方差为Q的正态分布(高斯分布)
# 在跟踪任务当中,过程噪声来自于目标移动的不确定性(突然加速、减速、转弯等)
Q = np.eye(6) * 0.1
# 观测噪声v协方差矩阵R,p(v)~N(0,R)
# 观测噪声来自于检测框丢失、重叠等,代码中没有用到观测噪声
R = np.eye(6) * 1
# 控制输入矩阵B
B = None
# 状态估计协方差矩阵P初始化,表示状态之间的变化关系,比如xywh之间有某种联系,比如y变化大那么h大概率也要发生较大变化
# 该矩阵用于估计各个权重值,包括卡尔曼增益K等
P = np.eye(6)
if __name__ == "__main__":
video_path = "./data/testvideo1.mp4"
label_path = "./data/labels"
file_name = "testvideo1"
cap = cv2.VideoCapture(video_path)
# cv2.namedWindow("track", cv2.WINDOW_NORMAL)
SAVE_VIDEO = True
if SAVE_VIDEO:
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('kalman_output.avi', fourcc, 20,(768,576))
# ---------状态初始化----------------------------------------
frame_counter = 1
X_posterior = np.array(initial_state)
P_posterior = np.array(P)#状态估计协方差矩阵P
Z = np.array(initial_state)
trace_list = [] # 用于保存目标box的轨迹
while (True):
# Capture frame-by-frame
ret, frame = cap.read()
last_box_posterior = xywh_to_xyxy(X_posterior[0:4])
plot_one_box(last_box_posterior, frame, color=(255, 255, 255), target=False)
if not ret:
break
# print(frame_counter)
label_file_path = os.path.join(label_path, file_name + "_" + str(frame_counter) + ".txt")
with open(label_file_path, "r") as f:
content = f.readlines()
max_iou = IOU_Threshold
max_iou_matched = False
# ---------使用最大IOU来寻找观测值------------
for j, data_ in enumerate(content):
data = data_.replace('\n', "").split(" ")
xyxy = np.array(data[1:5], dtype="float")
plot_one_box(xyxy, frame)
iou = cal_iou(xyxy, xywh_to_xyxy(X_posterior[0:4]))
if iou > max_iou:
target_box = xyxy
max_iou = iou
max_iou_matched = True
if max_iou_matched == True:
# 如果找到了最大IOU BOX,则认为该框为观测值
plot_one_box(target_box, frame, target=True)
xywh = xyxy_to_xywh(target_box)
box_center = (int((target_box[0] + target_box[2]) // 2), int((target_box[1] + target_box[3]) // 2))
trace_list = updata_trace_list(box_center, trace_list, 100)
cv2.putText(frame, "Tracking", (int(target_box[0]), int(target_box[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 0, 0), 2)
# 计算dx,dy
dx = xywh[0] - X_posterior[0]
dy = xywh[1] - X_posterior[1]
Z[0:4] = np.array([xywh]).T
Z[4::] = np.array([dx, dy])
if max_iou_matched:
# -----进行先验估计-----------------
X_prior = np.dot(A, X_posterior)
box_prior = xywh_to_xyxy(X_prior[0:4])
# plot_one_box(box_prior, frame, color=(0, 0, 0), target=False)
# -----计算状态估计协方差矩阵P--------
P_prior_1 = np.dot(A, P_posterior)
P_prior = np.dot(P_prior_1, A.T) + Q
# ------更新/计算卡尔曼增益---------------------卡尔曼增益的大小决定了在先验估计的基础上加上多少(观测值-先验估计值)
k1 = np.dot(P_prior, H.T)
k2 = np.dot(np.dot(H, P_prior), H.T) + R
K = np.dot(k1, np.linalg.inv(k2))
# --------------更新后验估计------------z是观测值,即检测结果;X_prior是先验估计,X_posterior是后验估计
X_posterior_1 = Z - np.dot(H, X_prior)
X_posterior = X_prior + np.dot(K, X_posterior_1)
box_posterior = xywh_to_xyxy(X_posterior[0:4])
# plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)
# ---------更新状态估计协方差矩阵P-----
P_posterior_1 = np.eye(6) - np.dot(K, H)
P_posterior = np.dot(P_posterior_1, P_prior)
else:
# 如果IOU匹配失败,此时失去观测值,那么直接使用上一次的最优估计作为先验估计
# 此时直接迭代,不使用卡尔曼滤波
X_posterior = np.dot(A, X_posterior)
# X_posterior = np.dot(A_, X_posterior)
box_posterior = xywh_to_xyxy(X_posterior[0:4])
# plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)
box_center = (
(int(box_posterior[0] + box_posterior[2]) // 2), int((box_posterior[1] + box_posterior[3]) // 2))
trace_list = updata_trace_list(box_center, trace_list, 20)
cv2.putText(frame, "Lost", (box_center[0], box_center[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
(255, 0, 0), 2)
draw_trace(frame, trace_list)
cv2.putText(frame, "ALL BOXES(Green)", (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 200, 0), 2)
cv2.putText(frame, "TRACKED BOX(Red)", (25, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.putText(frame, "Last frame best estimation(White)", (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
cv2.imshow('track', frame)
if SAVE_VIDEO:
out.write(frame)
frame_counter = frame_counter + 1
if cv2.waitKey(10) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
1.2 utils.py
import cv2
def xyxy_to_xywh(xyxy):
center_x = (xyxy[0] + xyxy[2]) / 2
center_y = (xyxy[1] + xyxy[3]) / 2
w = xyxy[2] - xyxy[0]
h = xyxy[3] - xyxy[1]
return (center_x, center_y, w, h)
def plot_one_box(xyxy, img, color=(0, 200, 0), target=False):
xy1 = (int(xyxy[0]), int(xyxy[1]))
xy2 = (int(xyxy[2]), int(xyxy[3]))
if target:
color = (0, 0, 255)
cv2.rectangle(img, xy1, xy2, color, 1, cv2.LINE_AA) # filled
def updata_trace_list(box_center, trace_list, max_list_len=50):
if len(trace_list) <= max_list_len:
trace_list.append(box_center)
else:
trace_list.pop(0)
trace_list.append(box_center)
return trace_list
def draw_trace(img, trace_list):
"""
更新trace_list,绘制trace
:param trace_list:
:param max_list_len:
:return:
"""
for i, item in enumerate(trace_list):
if i < 1:
continue
cv2.line(img,
(trace_list[i][0], trace_list[i][1]), (trace_list[i - 1][0], trace_list[i - 1][1]),
(255, 255, 0), 3)
def cal_iou(box1, box2):
"""
:param box1: xyxy 左上右下
:param box2: xyxy
:return:
"""
x1min, y1min, x1max, y1max = box1[0], box1[1], box1[2], box1[3]
x2min, y2min, x2max, y2max = box2[0], box2[1], box2[2], box2[3]
# 计算两个框的面积
s1 = (y1max - y1min + 1.) * (x1max - x1min + 1.)
s2 = (y2max - y2min + 1.) * (x2max - x2min + 1.)
# 计算相交部分的坐标
xmin = max(x1min, x2min)
ymin = max(y1min, y2min)
xmax = min(x1max, x2max)
ymax = min(y1max, y2max)
inter_h = max(ymax - ymin + 1, 0)
inter_w = max(xmax - xmin + 1, 0)
intersection = inter_h * inter_w
union = s1 + s2 - intersection
# 计算iou
iou = intersection / union
return iou
def cal_distance(box1, box2):
"""
计算两个box中心点的距离
:param box1: xyxy 左上右下
:param box2: xyxy
:return:
"""
center1 = ((box1[0] + box1[2]) // 2, (box1[1] + box1[3]) // 2)
center2 = ((box2[0] + box2[2]) // 2, (box2[1] + box2[3]) // 2)
dis = ((center1[0] - center2[0]) ** 2 + (center1[1] - center2[1]) ** 2) ** 0.5
return dis
def xywh_to_xyxy(xywh):
x1 = xywh[0] - xywh[2]//2
y1 = xywh[1] - xywh[3]//2
x2 = xywh[0] + xywh[2] // 2
y2 = xywh[1] + xywh[3] // 2
return [x1, y1, x2, y2]
if __name__ == "__main__":
box1 = [100, 100, 200, 200]
box2 = [100, 100, 200, 300]
iou = cal_iou(box1, box2)
print(iou)
box1.pop(0)
box1.append(555)
print(box1)
版本2,用检测算法实时生成目标框坐标
AIDetector_pytorch对YOLO检测算法进行了封装,导入后可实时对视频的每一帧进行检测,输出检测结果,由于我的视频是单目标视频,所以一直是一个目标,可实现单目标跟踪。 使用卡尔曼滤波对运动轨迹预测,连续起来,就是一个目标该跟踪算法,本文实现的代码,是基于检测算法实现的,即,每一帧执行检测算法,当目标消失的时候,使用卡尔曼滤波进行轨迹预测。
2.1 ai-kalman.py
from AIDetector_pytorch import Detector
import cv2
import os
import numpy as np
from utilss import plot_one_box, cal_iou, xyxy_to_xywh, xywh_to_xyxy, updata_trace_list, draw_trace
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
det = Detector()
# 状态初始化
initial_target_box = [308, 630, 431, 670]
# initial_target_box = [137, 280, 192, 297] # 目标初始bouding box
# initial_target_box = [193 ,342 ,250 ,474]
initial_box_state = xyxy_to_xywh(initial_target_box)
initial_state = np.array([[initial_box_state[0], initial_box_state[1], initial_box_state[2], initial_box_state[3],
0, 0]]).T # [中心x,中心y,宽w,高h,dx,dy]
IOU_Threshold = 0.1 # 匹配时的阈值
# 状态转移矩阵,上一时刻的状态转移到当前时刻
A = np.array([[1, 0, 0, 0, 1, 0],
[0, 1, 0, 0, 0, 1],
[0, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 1]])
# 状态观测矩阵,即教程中的大C
H = np.eye(6)
# 过程噪声w协方差矩阵Q,p(w)~N(0,Q),噪声w来自真实世界中的不确定性,w服从均值为0、协方差为Q的正态分布(高斯分布)
# 在跟踪任务当中,过程噪声来自于目标移动的不确定性(突然加速、减速、转弯等)
Q = np.eye(6) * 0.1
# 观测噪声v协方差矩阵R,p(v)~N(0,R)
# 观测噪声来自于检测框丢失、重叠等
R = np.eye(6) * 1
# 控制输入矩阵B
B = None
# 状态估计协方差矩阵P初始化,表示状态之间的变化关系,比如xywh之间有某种联系,比如y变化大那么h大概率也要发生较大变化
# 该矩阵用于估计各个权重值,包括卡尔曼增益K等
P = np.eye(6)
if __name__ == "__main__":
count = 0 #统计中间丢失的帧数
video_path = r'E:\dplearning\KCFTest\testVideo\fire\4.mp4'
cap = cv2.VideoCapture(video_path)
# cv2.namedWindow("track", cv2.WINDOW_NORMAL)
SAVE_VIDEO = True
if SAVE_VIDEO:
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('kalman_output.avi', fourcc, 20,(1440,1080))
# ---------状态初始化----------------------------------------
frame_counter = 1
X_posterior = np.array(initial_state)
P_posterior = np.array(P)#状态估计协方差矩阵P
Z = np.array(initial_state)
trace_list = [] # 用于保存目标box的轨迹
while (True):
# Capture frame-by-frame
ret, frame = cap.read()
# print(frame.shape)
last_box_posterior = xywh_to_xyxy(X_posterior[0:4])
plot_one_box(last_box_posterior, frame, color=(0, 255, 0), target=False)
if not ret:
break
# print(frame_counter)
im, result = det.detect(frame)
print(result)
# ---------使用最大IOU来寻找观测值------------
max_iou = IOU_Threshold
max_iou_matched = False
if len(result) > 0:
xyxy = np.array(result[0][0:4], dtype="float")
# plot_one_box(xyxy, frame)
iou = cal_iou(xyxy, xywh_to_xyxy(X_posterior[0:4]))
# if iou > max_iou:
target_box = xyxy
# max_iou = iou
max_iou_matched = True
else:
count=count+1
max_iou_matched=False
# --------------------------------
if max_iou_matched == True:
# 如果找到了最大IOU BOX,则认为该框为观测值
plot_one_box(target_box, frame, target=True)
xywh = xyxy_to_xywh(target_box)
box_center = (int((target_box[0] + target_box[2]) // 2), int((target_box[1] + target_box[3]) // 2))
trace_list = updata_trace_list(box_center, trace_list, 100)
cv2.putText(frame, "Tracking", (int(target_box[0]), int(target_box[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 0, 0), 2)
# 计算dx,dy
dx = xywh[0] - X_posterior[0]
dy = xywh[1] - X_posterior[1]
Z[0:4] = np.array([xywh]).T
Z[4::] = np.array([dx, dy])
if max_iou_matched:
# -----进行先验估计-----------------
X_prior = np.dot(A, X_posterior)
box_prior = xywh_to_xyxy(X_prior[0:4])
# plot_one_box(box_prior, frame, color=(0, 0, 0), target=False)
# -----计算状态估计协方差矩阵P--------
P_prior_1 = np.dot(A, P_posterior)
P_prior = np.dot(P_prior_1, A.T) + Q
# ------更新/计算卡尔曼增益---------------------卡尔曼增益的大小决定了在先验估计的基础上加上多少(观测值-先验估计值)
k1 = np.dot(P_prior, H.T)
k2 = np.dot(np.dot(H, P_prior), H.T) + R
K = np.dot(k1, np.linalg.inv(k2))
# --------------更新后验估计------------z是观测值,即检测结果;X_prior是先验估计,X_posterior是后验估计
X_posterior_1 = Z - np.dot(H, X_prior)
X_posterior = X_prior + np.dot(K, X_posterior_1)
box_posterior = xywh_to_xyxy(X_posterior[0:4])
plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)
# ---------更新状态估计协方差矩阵P-----
P_posterior_1 = np.eye(6) - np.dot(K, H)
P_posterior = np.dot(P_posterior_1, P_prior)
else:
# 如果IOU匹配失败,此时失去观测值,那么直接使用上一次的最优估计作为先验估计
# 此时直接迭代,不使用卡尔曼滤波
X_posterior = np.dot(A, X_posterior)
# X_posterior = np.dot(A_, X_posterior)
box_posterior = xywh_to_xyxy(X_posterior[0:4])
# plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)
box_center = (
(int(box_posterior[0] + box_posterior[2]) // 2), int((box_posterior[1] + box_posterior[3]) // 2))
trace_list = updata_trace_list(box_center, trace_list, 20)
cv2.putText(frame, "Lost", (box_center[0], box_center[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
(255, 0, 0), 2)
# draw_trace(frame, trace_list)
# cv2.putText(frame, "ALL BOXES(Green)", (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 200, 0), 2)
cv2.putText(frame, "TRACKED BOX(Red)", (25, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.putText(frame, "Last frame best estimation(White)", (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
cv2.imshow('track', frame)
if SAVE_VIDEO:
out.write(frame)
frame_counter = frame_counter + 1
if cv2.waitKey(10) & 0xFF == ord('q'):
break
# When everything done, release the capture
print(count)
cap.release()
cv2.destroyAllWindows()
2.1 AIDetector_pytorch.py
import torch
import numpy as np
from models.experimental import attempt_load
from utils.general import non_max_suppression, scale_coords
from utils.datasets import letterbox
from utils.torch_utils import select_device
from utils.BaseDetector import baseDet
class Detector(baseDet):
def __init__(self):
super(Detector, self).__init__()
self.init_model()
self.build_config()
def init_model(self):
self.weights = 'weights/best-4-28.pt' # 如果要更换参数,更换这行
self.device = '0' if torch.cuda.is_available() else 'cpu'
self.device = select_device(self.device)
model = attempt_load(self.weights, map_location=self.device)
model.to(self.device).eval()
model.half()
# torch.save(model, 'test.pt')
self.m = model
self.names = model.module.names if hasattr(
model, 'module') else model.names
def preprocess(self, img):
img0 = img.copy()
img = letterbox(img, new_shape=self.img_size)[0]
img = img[:, :, ::-1].transpose(2, 0, 1)
img = np.ascontiguousarray(img)
img = torch.from_numpy(img).to(self.device)
img = img.half() # 半精度
img /= 255.0 # 图像归一化
if img.ndimension() == 3:
img = img.unsqueeze(0)
return img0, img
def detect(self, im):
im0, img = self.preprocess(im)
pred = self.m(img, augment=False)[0]
pred = pred.float()
pred = non_max_suppression(pred, self.threshold, 0.40)
pred_boxes = []
for det in pred:
if det is not None and len(det):
det[:, :4] = scale_coords(
img.shape[2:], det[:, :4], im0.shape).round()
for *x, conf, cls_id in det:
lbl = self.names[int(cls_id)]
x1, y1 = int(x[0]), int(x[1])
x2, y2 = int(x[2]), int(x[3])
pred_boxes.append(
(x1, y1, x2, y2, lbl, conf))
return im, pred_boxes
卡尔曼理论
观测值:传感器读数,因为某些客观原因,可能存在较大误差 估计值:卡尔曼滤波的状态转移方程进行估计–先验估计, 最优估计值:综合前两项–后验估计,用到观测值进行修正 然后基于估计值和观测值进行综合,如下一帧的预测值和下一帧检测值,下一帧的预测值是根据当前帧得到的 先估计(先验估计),后更新(后验估计),更新的除了状态量,也包括状态协方差矩阵,增益矩阵
卡尔曼增益的作用 ![在这里插入图片描述](https://img-blog.csdnimg.cn/9f6bf022cbf1453b9c74ea78d54bdd04.png
卡尔曼滤波中的式子p(w) ~ N(0,Q),p(v) ~ N(0,R)
标准的卡尔曼滤波主要针对线性系统(过程方程+观测方程), w和v分别指的是过程方程和观测方程中的噪声。 w服从于均值为0、协方差为Q的正态分布(也成为高斯分布), v服从于均值为0、协方差为R的正态分布。 即w和v都是零均值的高斯噪声。
实现过程中的问题积累
1,获取tensor里的值
试验效果: 对静态镜头效果好,而且需要低速运动 ![在这里插入图片描述](https://img-blog.csdnimg.cn/92a9f4bdd466420eb87a0667701a2923.png
参考连接
https://www.bilibili.com/video/BV1Qf4y1J7D4?p=1&vd_source=9f55bd955a3e8c3a2f2960ba719c5890 https://github.com/liuchangji/kalman-filter-in-single-object-tracking https://github.com/ZhangPHEngr/Kalman-in-MOT
在我们的跟踪算法中假如整个预测,解决目标短暂丢失问题
相关阅读
发表评论