针对目标被遮挡的情况,使用卡尔曼滤波进行轨迹预测,试验表明,具有较好的轨迹预测效果

观测值:传感器读数,因为某些客观原因,可能存在较大误差。 估计值:也称为先验估计,卡尔曼滤波的状态转移方程进行估计。 最优估计值:也称为后验估计,综合观测值和先验估计得到后验估计。 后验估计基于估计值和观测值进行综合,如下一帧的预测值和下一帧检测值,下一帧的预测值是根据当前帧得到的 先估计(先验估计),后更新(后验估计),更新的除了状态量,也包括状态协方差矩阵,增益矩阵

使用卡尔曼滤波实现单目标跟踪过程中的目标运动轨迹预测

两个版本的代码版本1,直接读取用检测算法生成的目标框文件txt1.1 main.py1.2 utils.py

版本2,用检测算法实时生成目标框坐标2.1 ai-kalman.py2.1 AIDetector_pytorch.py

卡尔曼理论卡尔曼滤波中的式子p(w) ~ N(0,Q),p(v) ~ N(0,R)实现过程中的问题积累1,获取tensor里的值

参考连接

两个版本的代码

版本1,直接读取用检测算法生成的目标框文件txt

单目标跟踪 检测器获得检测框,全程只赋予1个ID,有两个相同的东西进来时,不会丢失唯一跟踪目标 检测器的检测框为观测值 目标的状态X = [x,y,h,w,delta_x,delta_y],中心坐标,宽高,中心坐标速度 观测值 如何寻找目标的观测值 观测到的是N个框 怎么找到目标的观测值 t时刻的框与t-1后验估计时刻IOU最大的框的那个作为观测值(存在误差,交叉情况下观测值会有误差) 所以需要使用先验估计值进行融合

1.1 main.py

import os

import cv2

import numpy as np

from utils import plot_one_box, cal_iou, xyxy_to_xywh, xywh_to_xyxy, updata_trace_list, draw_trace

# 单目标跟踪

# 检测器获得检测框,全程只赋予1个ID,有两个相同的东西进来时,不会丢失唯一跟踪目标

# 检测器的检测框为测量值

# 目标的状态X = [x,y,h,w,delta_x,delta_y],中心坐标,宽高,中心坐标速度

# 观测值

# 如何寻找目标的观测值

# 观测到的是N个框

# 怎么找到目标的观测值

# t时刻的框与t-1后验估计时刻IOU最大的框的那个作为观测值(存在误差,交叉情况下观测值会有误差)

# 所以需要使用先验估计值进行融合

#

# 状态初始化

initial_target_box = [729, 238, 764, 339] # 目标初始bouding box

# initial_target_box = [193 ,342 ,250 ,474]

initial_box_state = xyxy_to_xywh(initial_target_box)

initial_state = np.array([[initial_box_state[0], initial_box_state[1], initial_box_state[2], initial_box_state[3],

0, 0]]).T # [中心x,中心y,宽w,高h,dx,dy]

IOU_Threshold = 0.3 # 匹配时的阈值

# 状态转移矩阵,上一时刻的状态转移到当前时刻

A = np.array([[1, 0, 0, 0, 1, 0],

[0, 1, 0, 0, 0, 1],

[0, 0, 1, 0, 0, 0],

[0, 0, 0, 1, 0, 0],

[0, 0, 0, 0, 1, 0],

[0, 0, 0, 0, 0, 1]])

# 状态观测矩阵,即教程中的大C

H = np.eye(6)

# 过程噪声w协方差矩阵Q,p(w)~N(0,Q),噪声w来自真实世界中的不确定性,w服从均值为0、协方差为Q的正态分布(高斯分布)

# 在跟踪任务当中,过程噪声来自于目标移动的不确定性(突然加速、减速、转弯等)

Q = np.eye(6) * 0.1

# 观测噪声v协方差矩阵R,p(v)~N(0,R)

# 观测噪声来自于检测框丢失、重叠等,代码中没有用到观测噪声

R = np.eye(6) * 1

# 控制输入矩阵B

B = None

# 状态估计协方差矩阵P初始化,表示状态之间的变化关系,比如xywh之间有某种联系,比如y变化大那么h大概率也要发生较大变化

# 该矩阵用于估计各个权重值,包括卡尔曼增益K等

P = np.eye(6)

if __name__ == "__main__":

video_path = "./data/testvideo1.mp4"

label_path = "./data/labels"

file_name = "testvideo1"

cap = cv2.VideoCapture(video_path)

# cv2.namedWindow("track", cv2.WINDOW_NORMAL)

SAVE_VIDEO = True

if SAVE_VIDEO:

fourcc = cv2.VideoWriter_fourcc(*'XVID')

out = cv2.VideoWriter('kalman_output.avi', fourcc, 20,(768,576))

# ---------状态初始化----------------------------------------

frame_counter = 1

X_posterior = np.array(initial_state)

P_posterior = np.array(P)#状态估计协方差矩阵P

Z = np.array(initial_state)

trace_list = [] # 用于保存目标box的轨迹

while (True):

# Capture frame-by-frame

ret, frame = cap.read()

last_box_posterior = xywh_to_xyxy(X_posterior[0:4])

plot_one_box(last_box_posterior, frame, color=(255, 255, 255), target=False)

if not ret:

break

# print(frame_counter)

label_file_path = os.path.join(label_path, file_name + "_" + str(frame_counter) + ".txt")

with open(label_file_path, "r") as f:

content = f.readlines()

max_iou = IOU_Threshold

max_iou_matched = False

# ---------使用最大IOU来寻找观测值------------

for j, data_ in enumerate(content):

data = data_.replace('\n', "").split(" ")

xyxy = np.array(data[1:5], dtype="float")

plot_one_box(xyxy, frame)

iou = cal_iou(xyxy, xywh_to_xyxy(X_posterior[0:4]))

if iou > max_iou:

target_box = xyxy

max_iou = iou

max_iou_matched = True

if max_iou_matched == True:

# 如果找到了最大IOU BOX,则认为该框为观测值

plot_one_box(target_box, frame, target=True)

xywh = xyxy_to_xywh(target_box)

box_center = (int((target_box[0] + target_box[2]) // 2), int((target_box[1] + target_box[3]) // 2))

trace_list = updata_trace_list(box_center, trace_list, 100)

cv2.putText(frame, "Tracking", (int(target_box[0]), int(target_box[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX,

0.7,

(255, 0, 0), 2)

# 计算dx,dy

dx = xywh[0] - X_posterior[0]

dy = xywh[1] - X_posterior[1]

Z[0:4] = np.array([xywh]).T

Z[4::] = np.array([dx, dy])

if max_iou_matched:

# -----进行先验估计-----------------

X_prior = np.dot(A, X_posterior)

box_prior = xywh_to_xyxy(X_prior[0:4])

# plot_one_box(box_prior, frame, color=(0, 0, 0), target=False)

# -----计算状态估计协方差矩阵P--------

P_prior_1 = np.dot(A, P_posterior)

P_prior = np.dot(P_prior_1, A.T) + Q

# ------更新/计算卡尔曼增益---------------------卡尔曼增益的大小决定了在先验估计的基础上加上多少(观测值-先验估计值)

k1 = np.dot(P_prior, H.T)

k2 = np.dot(np.dot(H, P_prior), H.T) + R

K = np.dot(k1, np.linalg.inv(k2))

# --------------更新后验估计------------z是观测值,即检测结果;X_prior是先验估计,X_posterior是后验估计

X_posterior_1 = Z - np.dot(H, X_prior)

X_posterior = X_prior + np.dot(K, X_posterior_1)

box_posterior = xywh_to_xyxy(X_posterior[0:4])

# plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)

# ---------更新状态估计协方差矩阵P-----

P_posterior_1 = np.eye(6) - np.dot(K, H)

P_posterior = np.dot(P_posterior_1, P_prior)

else:

# 如果IOU匹配失败,此时失去观测值,那么直接使用上一次的最优估计作为先验估计

# 此时直接迭代,不使用卡尔曼滤波

X_posterior = np.dot(A, X_posterior)

# X_posterior = np.dot(A_, X_posterior)

box_posterior = xywh_to_xyxy(X_posterior[0:4])

# plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)

box_center = (

(int(box_posterior[0] + box_posterior[2]) // 2), int((box_posterior[1] + box_posterior[3]) // 2))

trace_list = updata_trace_list(box_center, trace_list, 20)

cv2.putText(frame, "Lost", (box_center[0], box_center[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7,

(255, 0, 0), 2)

draw_trace(frame, trace_list)

cv2.putText(frame, "ALL BOXES(Green)", (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 200, 0), 2)

cv2.putText(frame, "TRACKED BOX(Red)", (25, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

cv2.putText(frame, "Last frame best estimation(White)", (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

cv2.imshow('track', frame)

if SAVE_VIDEO:

out.write(frame)

frame_counter = frame_counter + 1

if cv2.waitKey(10) & 0xFF == ord('q'):

break

# When everything done, release the capture

cap.release()

cv2.destroyAllWindows()

1.2 utils.py

import cv2

def xyxy_to_xywh(xyxy):

center_x = (xyxy[0] + xyxy[2]) / 2

center_y = (xyxy[1] + xyxy[3]) / 2

w = xyxy[2] - xyxy[0]

h = xyxy[3] - xyxy[1]

return (center_x, center_y, w, h)

def plot_one_box(xyxy, img, color=(0, 200, 0), target=False):

xy1 = (int(xyxy[0]), int(xyxy[1]))

xy2 = (int(xyxy[2]), int(xyxy[3]))

if target:

color = (0, 0, 255)

cv2.rectangle(img, xy1, xy2, color, 1, cv2.LINE_AA) # filled

def updata_trace_list(box_center, trace_list, max_list_len=50):

if len(trace_list) <= max_list_len:

trace_list.append(box_center)

else:

trace_list.pop(0)

trace_list.append(box_center)

return trace_list

def draw_trace(img, trace_list):

"""

更新trace_list,绘制trace

:param trace_list:

:param max_list_len:

:return:

"""

for i, item in enumerate(trace_list):

if i < 1:

continue

cv2.line(img,

(trace_list[i][0], trace_list[i][1]), (trace_list[i - 1][0], trace_list[i - 1][1]),

(255, 255, 0), 3)

def cal_iou(box1, box2):

"""

:param box1: xyxy 左上右下

:param box2: xyxy

:return:

"""

x1min, y1min, x1max, y1max = box1[0], box1[1], box1[2], box1[3]

x2min, y2min, x2max, y2max = box2[0], box2[1], box2[2], box2[3]

# 计算两个框的面积

s1 = (y1max - y1min + 1.) * (x1max - x1min + 1.)

s2 = (y2max - y2min + 1.) * (x2max - x2min + 1.)

# 计算相交部分的坐标

xmin = max(x1min, x2min)

ymin = max(y1min, y2min)

xmax = min(x1max, x2max)

ymax = min(y1max, y2max)

inter_h = max(ymax - ymin + 1, 0)

inter_w = max(xmax - xmin + 1, 0)

intersection = inter_h * inter_w

union = s1 + s2 - intersection

# 计算iou

iou = intersection / union

return iou

def cal_distance(box1, box2):

"""

计算两个box中心点的距离

:param box1: xyxy 左上右下

:param box2: xyxy

:return:

"""

center1 = ((box1[0] + box1[2]) // 2, (box1[1] + box1[3]) // 2)

center2 = ((box2[0] + box2[2]) // 2, (box2[1] + box2[3]) // 2)

dis = ((center1[0] - center2[0]) ** 2 + (center1[1] - center2[1]) ** 2) ** 0.5

return dis

def xywh_to_xyxy(xywh):

x1 = xywh[0] - xywh[2]//2

y1 = xywh[1] - xywh[3]//2

x2 = xywh[0] + xywh[2] // 2

y2 = xywh[1] + xywh[3] // 2

return [x1, y1, x2, y2]

if __name__ == "__main__":

box1 = [100, 100, 200, 200]

box2 = [100, 100, 200, 300]

iou = cal_iou(box1, box2)

print(iou)

box1.pop(0)

box1.append(555)

print(box1)

版本2,用检测算法实时生成目标框坐标

AIDetector_pytorch对YOLO检测算法进行了封装,导入后可实时对视频的每一帧进行检测,输出检测结果,由于我的视频是单目标视频,所以一直是一个目标,可实现单目标跟踪。 使用卡尔曼滤波对运动轨迹预测,连续起来,就是一个目标该跟踪算法,本文实现的代码,是基于检测算法实现的,即,每一帧执行检测算法,当目标消失的时候,使用卡尔曼滤波进行轨迹预测。

2.1 ai-kalman.py

from AIDetector_pytorch import Detector

import cv2

import os

import numpy as np

from utilss import plot_one_box, cal_iou, xyxy_to_xywh, xywh_to_xyxy, updata_trace_list, draw_trace

import os

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

det = Detector()

# 状态初始化

initial_target_box = [308, 630, 431, 670]

# initial_target_box = [137, 280, 192, 297] # 目标初始bouding box

# initial_target_box = [193 ,342 ,250 ,474]

initial_box_state = xyxy_to_xywh(initial_target_box)

initial_state = np.array([[initial_box_state[0], initial_box_state[1], initial_box_state[2], initial_box_state[3],

0, 0]]).T # [中心x,中心y,宽w,高h,dx,dy]

IOU_Threshold = 0.1 # 匹配时的阈值

# 状态转移矩阵,上一时刻的状态转移到当前时刻

A = np.array([[1, 0, 0, 0, 1, 0],

[0, 1, 0, 0, 0, 1],

[0, 0, 1, 0, 0, 0],

[0, 0, 0, 1, 0, 0],

[0, 0, 0, 0, 1, 0],

[0, 0, 0, 0, 0, 1]])

# 状态观测矩阵,即教程中的大C

H = np.eye(6)

# 过程噪声w协方差矩阵Q,p(w)~N(0,Q),噪声w来自真实世界中的不确定性,w服从均值为0、协方差为Q的正态分布(高斯分布)

# 在跟踪任务当中,过程噪声来自于目标移动的不确定性(突然加速、减速、转弯等)

Q = np.eye(6) * 0.1

# 观测噪声v协方差矩阵R,p(v)~N(0,R)

# 观测噪声来自于检测框丢失、重叠等

R = np.eye(6) * 1

# 控制输入矩阵B

B = None

# 状态估计协方差矩阵P初始化,表示状态之间的变化关系,比如xywh之间有某种联系,比如y变化大那么h大概率也要发生较大变化

# 该矩阵用于估计各个权重值,包括卡尔曼增益K等

P = np.eye(6)

if __name__ == "__main__":

count = 0 #统计中间丢失的帧数

video_path = r'E:\dplearning\KCFTest\testVideo\fire\4.mp4'

cap = cv2.VideoCapture(video_path)

# cv2.namedWindow("track", cv2.WINDOW_NORMAL)

SAVE_VIDEO = True

if SAVE_VIDEO:

fourcc = cv2.VideoWriter_fourcc(*'XVID')

out = cv2.VideoWriter('kalman_output.avi', fourcc, 20,(1440,1080))

# ---------状态初始化----------------------------------------

frame_counter = 1

X_posterior = np.array(initial_state)

P_posterior = np.array(P)#状态估计协方差矩阵P

Z = np.array(initial_state)

trace_list = [] # 用于保存目标box的轨迹

while (True):

# Capture frame-by-frame

ret, frame = cap.read()

# print(frame.shape)

last_box_posterior = xywh_to_xyxy(X_posterior[0:4])

plot_one_box(last_box_posterior, frame, color=(0, 255, 0), target=False)

if not ret:

break

# print(frame_counter)

im, result = det.detect(frame)

print(result)

# ---------使用最大IOU来寻找观测值------------

max_iou = IOU_Threshold

max_iou_matched = False

if len(result) > 0:

xyxy = np.array(result[0][0:4], dtype="float")

# plot_one_box(xyxy, frame)

iou = cal_iou(xyxy, xywh_to_xyxy(X_posterior[0:4]))

# if iou > max_iou:

target_box = xyxy

# max_iou = iou

max_iou_matched = True

else:

count=count+1

max_iou_matched=False

# --------------------------------

if max_iou_matched == True:

# 如果找到了最大IOU BOX,则认为该框为观测值

plot_one_box(target_box, frame, target=True)

xywh = xyxy_to_xywh(target_box)

box_center = (int((target_box[0] + target_box[2]) // 2), int((target_box[1] + target_box[3]) // 2))

trace_list = updata_trace_list(box_center, trace_list, 100)

cv2.putText(frame, "Tracking", (int(target_box[0]), int(target_box[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX,

0.7,

(255, 0, 0), 2)

# 计算dx,dy

dx = xywh[0] - X_posterior[0]

dy = xywh[1] - X_posterior[1]

Z[0:4] = np.array([xywh]).T

Z[4::] = np.array([dx, dy])

if max_iou_matched:

# -----进行先验估计-----------------

X_prior = np.dot(A, X_posterior)

box_prior = xywh_to_xyxy(X_prior[0:4])

# plot_one_box(box_prior, frame, color=(0, 0, 0), target=False)

# -----计算状态估计协方差矩阵P--------

P_prior_1 = np.dot(A, P_posterior)

P_prior = np.dot(P_prior_1, A.T) + Q

# ------更新/计算卡尔曼增益---------------------卡尔曼增益的大小决定了在先验估计的基础上加上多少(观测值-先验估计值)

k1 = np.dot(P_prior, H.T)

k2 = np.dot(np.dot(H, P_prior), H.T) + R

K = np.dot(k1, np.linalg.inv(k2))

# --------------更新后验估计------------z是观测值,即检测结果;X_prior是先验估计,X_posterior是后验估计

X_posterior_1 = Z - np.dot(H, X_prior)

X_posterior = X_prior + np.dot(K, X_posterior_1)

box_posterior = xywh_to_xyxy(X_posterior[0:4])

plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)

# ---------更新状态估计协方差矩阵P-----

P_posterior_1 = np.eye(6) - np.dot(K, H)

P_posterior = np.dot(P_posterior_1, P_prior)

else:

# 如果IOU匹配失败,此时失去观测值,那么直接使用上一次的最优估计作为先验估计

# 此时直接迭代,不使用卡尔曼滤波

X_posterior = np.dot(A, X_posterior)

# X_posterior = np.dot(A_, X_posterior)

box_posterior = xywh_to_xyxy(X_posterior[0:4])

# plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)

box_center = (

(int(box_posterior[0] + box_posterior[2]) // 2), int((box_posterior[1] + box_posterior[3]) // 2))

trace_list = updata_trace_list(box_center, trace_list, 20)

cv2.putText(frame, "Lost", (box_center[0], box_center[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7,

(255, 0, 0), 2)

# draw_trace(frame, trace_list)

# cv2.putText(frame, "ALL BOXES(Green)", (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 200, 0), 2)

cv2.putText(frame, "TRACKED BOX(Red)", (25, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

cv2.putText(frame, "Last frame best estimation(White)", (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

cv2.imshow('track', frame)

if SAVE_VIDEO:

out.write(frame)

frame_counter = frame_counter + 1

if cv2.waitKey(10) & 0xFF == ord('q'):

break

# When everything done, release the capture

print(count)

cap.release()

cv2.destroyAllWindows()

2.1 AIDetector_pytorch.py

import torch

import numpy as np

from models.experimental import attempt_load

from utils.general import non_max_suppression, scale_coords

from utils.datasets import letterbox

from utils.torch_utils import select_device

from utils.BaseDetector import baseDet

class Detector(baseDet):

def __init__(self):

super(Detector, self).__init__()

self.init_model()

self.build_config()

def init_model(self):

self.weights = 'weights/best-4-28.pt' # 如果要更换参数,更换这行

self.device = '0' if torch.cuda.is_available() else 'cpu'

self.device = select_device(self.device)

model = attempt_load(self.weights, map_location=self.device)

model.to(self.device).eval()

model.half()

# torch.save(model, 'test.pt')

self.m = model

self.names = model.module.names if hasattr(

model, 'module') else model.names

def preprocess(self, img):

img0 = img.copy()

img = letterbox(img, new_shape=self.img_size)[0]

img = img[:, :, ::-1].transpose(2, 0, 1)

img = np.ascontiguousarray(img)

img = torch.from_numpy(img).to(self.device)

img = img.half() # 半精度

img /= 255.0 # 图像归一化

if img.ndimension() == 3:

img = img.unsqueeze(0)

return img0, img

def detect(self, im):

im0, img = self.preprocess(im)

pred = self.m(img, augment=False)[0]

pred = pred.float()

pred = non_max_suppression(pred, self.threshold, 0.40)

pred_boxes = []

for det in pred:

if det is not None and len(det):

det[:, :4] = scale_coords(

img.shape[2:], det[:, :4], im0.shape).round()

for *x, conf, cls_id in det:

lbl = self.names[int(cls_id)]

x1, y1 = int(x[0]), int(x[1])

x2, y2 = int(x[2]), int(x[3])

pred_boxes.append(

(x1, y1, x2, y2, lbl, conf))

return im, pred_boxes

卡尔曼理论

观测值:传感器读数,因为某些客观原因,可能存在较大误差 估计值:卡尔曼滤波的状态转移方程进行估计–先验估计, 最优估计值:综合前两项–后验估计,用到观测值进行修正 然后基于估计值和观测值进行综合,如下一帧的预测值和下一帧检测值,下一帧的预测值是根据当前帧得到的 先估计(先验估计),后更新(后验估计),更新的除了状态量,也包括状态协方差矩阵,增益矩阵

卡尔曼增益的作用 ![在这里插入图片描述](https://img-blog.csdnimg.cn/9f6bf022cbf1453b9c74ea78d54bdd04.png

卡尔曼滤波中的式子p(w) ~ N(0,Q),p(v) ~ N(0,R)

标准的卡尔曼滤波主要针对线性系统(过程方程+观测方程), w和v分别指的是过程方程和观测方程中的噪声。 w服从于均值为0、协方差为Q的正态分布(也成为高斯分布), v服从于均值为0、协方差为R的正态分布。 即w和v都是零均值的高斯噪声。

实现过程中的问题积累

1,获取tensor里的值

试验效果: 对静态镜头效果好,而且需要低速运动 ![在这里插入图片描述](https://img-blog.csdnimg.cn/92a9f4bdd466420eb87a0667701a2923.png

参考连接

https://www.bilibili.com/video/BV1Qf4y1J7D4?p=1&vd_source=9f55bd955a3e8c3a2f2960ba719c5890 https://github.com/liuchangji/kalman-filter-in-single-object-tracking https://github.com/ZhangPHEngr/Kalman-in-MOT

在我们的跟踪算法中假如整个预测,解决目标短暂丢失问题

相关阅读

评论可见,请评论后查看内容,谢谢!!!
 您阅读本篇文章共花了: