图像处理计算机视觉使用卡尔曼滤波实现单目标跟踪过程中的目标运动轨迹预测

针对目标被遮挡的情况，使用卡尔曼滤波进行轨迹预测，试验表明，具有较好的轨迹预测效果

观测值：传感器读数，因为某些客观原因，可能存在较大误差。估计值：也称为先验估计，卡尔曼滤波的状态转移方程进行估计。最优估计值:也称为后验估计，综合观测值和先验估计得到后验估计。后验估计基于估计值和观测值进行综合，如下一帧的预测值和下一帧检测值，下一帧的预测值是根据当前帧得到的先估计（先验估计），后更新（后验估计），更新的除了状态量，也包括状态协方差矩阵，增益矩阵

使用卡尔曼滤波实现单目标跟踪过程中的目标运动轨迹预测

两个版本的代码版本1，直接读取用检测算法生成的目标框文件txt1.1 main.py1.2 utils.py

版本2，用检测算法实时生成目标框坐标2.1 ai-kalman.py2.1 AIDetector_pytorch.py

卡尔曼理论卡尔曼滤波中的式子p(w) ~ N(0,Q),p(v) ~ N(0,R)实现过程中的问题积累1，获取tensor里的值

参考连接

两个版本的代码

版本1，直接读取用检测算法生成的目标框文件txt

单目标跟踪检测器获得检测框，全程只赋予1个ID，有两个相同的东西进来时，不会丢失唯一跟踪目标检测器的检测框为观测值目标的状态X = [x,y,h,w,delta_x,delta_y],中心坐标，宽高，中心坐标速度观测值如何寻找目标的观测值观测到的是N个框怎么找到目标的观测值 t时刻的框与t-1后验估计时刻IOU最大的框的那个作为观测值（存在误差，交叉情况下观测值会有误差）所以需要使用先验估计值进行融合

1.1 main.py

import os

import cv2

import numpy as np

from utils import plot_one_box, cal_iou, xyxy_to_xywh, xywh_to_xyxy, updata_trace_list, draw_trace

# 单目标跟踪

# 检测器获得检测框，全程只赋予1个ID，有两个相同的东西进来时，不会丢失唯一跟踪目标

# 检测器的检测框为测量值

# 目标的状态X = [x,y,h,w,delta_x,delta_y],中心坐标，宽高，中心坐标速度

# 观测值

# 如何寻找目标的观测值

# 观测到的是N个框

# 怎么找到目标的观测值

# t时刻的框与t-1后验估计时刻IOU最大的框的那个作为观测值（存在误差，交叉情况下观测值会有误差）

# 所以需要使用先验估计值进行融合

# 状态初始化

initial_target_box = [729, 238, 764, 339] # 目标初始bouding box

# initial_target_box = [193 ,342 ,250 ,474]

initial_box_state = xyxy_to_xywh(initial_target_box)

initial_state = np.array([[initial_box_state[0], initial_box_state[1], initial_box_state[2], initial_box_state[3],

0, 0]]).T # [中心x,中心y,宽w,高h,dx,dy]

IOU_Threshold = 0.3 # 匹配时的阈值

# 状态转移矩阵，上一时刻的状态转移到当前时刻

A = np.array([[1, 0, 0, 0, 1, 0],

[0, 1, 0, 0, 0, 1],

[0, 0, 1, 0, 0, 0],

[0, 0, 0, 1, 0, 0],

[0, 0, 0, 0, 1, 0],

[0, 0, 0, 0, 0, 1]])

# 状态观测矩阵，即教程中的大C

H = np.eye(6)

# 过程噪声w协方差矩阵Q，p(w)~N(0,Q)，噪声w来自真实世界中的不确定性,w服从均值为0、协方差为Q的正态分布（高斯分布）

# 在跟踪任务当中，过程噪声来自于目标移动的不确定性（突然加速、减速、转弯等）

Q = np.eye(6) * 0.1

# 观测噪声v协方差矩阵R，p(v)~N(0,R)

# 观测噪声来自于检测框丢失、重叠等,代码中没有用到观测噪声

R = np.eye(6) * 1

# 控制输入矩阵B

B = None

# 状态估计协方差矩阵P初始化，表示状态之间的变化关系，比如xywh之间有某种联系，比如y变化大那么h大概率也要发生较大变化

# 该矩阵用于估计各个权重值，包括卡尔曼增益K等

P = np.eye(6)

if __name__ == "__main__":

video_path = "./data/testvideo1.mp4"

label_path = "./data/labels"

file_name = "testvideo1"

cap = cv2.VideoCapture(video_path)

# cv2.namedWindow("track", cv2.WINDOW_NORMAL)

SAVE_VIDEO = True

if SAVE_VIDEO:

fourcc = cv2.VideoWriter_fourcc(*'XVID')

out = cv2.VideoWriter('kalman_output.avi', fourcc, 20,(768,576))

# ---------状态初始化----------------------------------------

frame_counter = 1

X_posterior = np.array(initial_state)

P_posterior = np.array(P)#状态估计协方差矩阵P

Z = np.array(initial_state)

trace_list = [] # 用于保存目标box的轨迹

while (True):

# Capture frame-by-frame

ret, frame = cap.read()

last_box_posterior = xywh_to_xyxy(X_posterior[0:4])

plot_one_box(last_box_posterior, frame, color=(255, 255, 255), target=False)

if not ret:

break

# print(frame_counter)

label_file_path = os.path.join(label_path, file_name + "_" + str(frame_counter) + ".txt")

with open(label_file_path, "r") as f:

content = f.readlines()

max_iou = IOU_Threshold

max_iou_matched = False

# ---------使用最大IOU来寻找观测值------------

for j, data_ in enumerate(content):

data = data_.replace('\n', "").split(" ")

xyxy = np.array(data[1:5], dtype="float")

plot_one_box(xyxy, frame)

iou = cal_iou(xyxy, xywh_to_xyxy(X_posterior[0:4]))

if iou > max_iou:

target_box = xyxy

max_iou = iou

max_iou_matched = True

if max_iou_matched == True:

# 如果找到了最大IOU BOX,则认为该框为观测值

plot_one_box(target_box, frame, target=True)

xywh = xyxy_to_xywh(target_box)

box_center = (int((target_box[0] + target_box[2]) // 2), int((target_box[1] + target_box[3]) // 2))

trace_list = updata_trace_list(box_center, trace_list, 100)

cv2.putText(frame, "Tracking", (int(target_box[0]), int(target_box[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX,

0.7,

(255, 0, 0), 2)

# 计算dx,dy

dx = xywh[0] - X_posterior[0]

dy = xywh[1] - X_posterior[1]

Z[0:4] = np.array([xywh]).T

Z[4::] = np.array([dx, dy])

if max_iou_matched:

# -----进行先验估计-----------------

X_prior = np.dot(A, X_posterior)

box_prior = xywh_to_xyxy(X_prior[0:4])

# plot_one_box(box_prior, frame, color=(0, 0, 0), target=False)

# -----计算状态估计协方差矩阵P--------

P_prior_1 = np.dot(A, P_posterior)

P_prior = np.dot(P_prior_1, A.T) + Q

# ------更新/计算卡尔曼增益---------------------卡尔曼增益的大小决定了在先验估计的基础上加上多少（观测值-先验估计值）

k1 = np.dot(P_prior, H.T)

k2 = np.dot(np.dot(H, P_prior), H.T) + R

K = np.dot(k1, np.linalg.inv(k2))

# --------------更新后验估计------------z是观测值，即检测结果；X_prior是先验估计，X_posterior是后验估计

X_posterior_1 = Z - np.dot(H, X_prior)

X_posterior = X_prior + np.dot(K, X_posterior_1)

box_posterior = xywh_to_xyxy(X_posterior[0:4])

# plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)

# ---------更新状态估计协方差矩阵P-----

P_posterior_1 = np.eye(6) - np.dot(K, H)

P_posterior = np.dot(P_posterior_1, P_prior)

else:

# 如果IOU匹配失败，此时失去观测值，那么直接使用上一次的最优估计作为先验估计

# 此时直接迭代，不使用卡尔曼滤波

X_posterior = np.dot(A, X_posterior)

# X_posterior = np.dot(A_, X_posterior)

box_posterior = xywh_to_xyxy(X_posterior[0:4])

# plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)

box_center = (

(int(box_posterior[0] + box_posterior[2]) // 2), int((box_posterior[1] + box_posterior[3]) // 2))

trace_list = updata_trace_list(box_center, trace_list, 20)

cv2.putText(frame, "Lost", (box_center[0], box_center[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7,

(255, 0, 0), 2)

draw_trace(frame, trace_list)

cv2.putText(frame, "ALL BOXES(Green)", (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 200, 0), 2)

cv2.putText(frame, "TRACKED BOX(Red)", (25, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

cv2.putText(frame, "Last frame best estimation(White)", (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

cv2.imshow('track', frame)

if SAVE_VIDEO:

out.write(frame)

frame_counter = frame_counter + 1

if cv2.waitKey(10) & 0xFF == ord('q'):

break

# When everything done, release the capture

cap.release()

cv2.destroyAllWindows()

1.2 utils.py

import cv2

def xyxy_to_xywh(xyxy):

center_x = (xyxy[0] + xyxy[2]) / 2

center_y = (xyxy[1] + xyxy[3]) / 2

w = xyxy[2] - xyxy[0]

h = xyxy[3] - xyxy[1]

return (center_x, center_y, w, h)

def plot_one_box(xyxy, img, color=(0, 200, 0), target=False):

xy1 = (int(xyxy[0]), int(xyxy[1]))

xy2 = (int(xyxy[2]), int(xyxy[3]))

if target:

color = (0, 0, 255)

cv2.rectangle(img, xy1, xy2, color, 1, cv2.LINE_AA) # filled

def updata_trace_list(box_center, trace_list, max_list_len=50):

if len(trace_list) <= max_list_len:

trace_list.append(box_center)

else:

trace_list.pop(0)

trace_list.append(box_center)

return trace_list

def draw_trace(img, trace_list):

"""

更新trace_list,绘制trace

:param trace_list:

:param max_list_len:

:return:

"""

for i, item in enumerate(trace_list):

if i < 1:

continue

cv2.line(img,

(trace_list[i][0], trace_list[i][1]), (trace_list[i - 1][0], trace_list[i - 1][1]),

(255, 255, 0), 3)

def cal_iou(box1, box2):

"""

:param box1: xyxy 左上右下

:param box2: xyxy

:return:

"""

x1min, y1min, x1max, y1max = box1[0], box1[1], box1[2], box1[3]

x2min, y2min, x2max, y2max = box2[0], box2[1], box2[2], box2[3]

# 计算两个框的面积

s1 = (y1max - y1min + 1.) * (x1max - x1min + 1.)

s2 = (y2max - y2min + 1.) * (x2max - x2min + 1.)

# 计算相交部分的坐标

xmin = max(x1min, x2min)

ymin = max(y1min, y2min)

xmax = min(x1max, x2max)

ymax = min(y1max, y2max)

inter_h = max(ymax - ymin + 1, 0)

inter_w = max(xmax - xmin + 1, 0)

intersection = inter_h * inter_w

union = s1 + s2 - intersection

# 计算iou

iou = intersection / union

return iou

def cal_distance(box1, box2):

"""

计算两个box中心点的距离

:param box1: xyxy 左上右下

:param box2: xyxy

:return:

"""

center1 = ((box1[0] + box1[2]) // 2, (box1[1] + box1[3]) // 2)

center2 = ((box2[0] + box2[2]) // 2, (box2[1] + box2[3]) // 2)

dis = ((center1[0] - center2[0]) ** 2 + (center1[1] - center2[1]) ** 2) ** 0.5

return dis

def xywh_to_xyxy(xywh):

x1 = xywh[0] - xywh[2]//2

y1 = xywh[1] - xywh[3]//2

x2 = xywh[0] + xywh[2] // 2

y2 = xywh[1] + xywh[3] // 2

return [x1, y1, x2, y2]

if __name__ == "__main__":

box1 = [100, 100, 200, 200]

box2 = [100, 100, 200, 300]

iou = cal_iou(box1, box2)

print(iou)

box1.pop(0)

box1.append(555)

print(box1)

版本2，用检测算法实时生成目标框坐标

AIDetector_pytorch对YOLO检测算法进行了封装，导入后可实时对视频的每一帧进行检测，输出检测结果，由于我的视频是单目标视频，所以一直是一个目标，可实现单目标跟踪。使用卡尔曼滤波对运动轨迹预测，连续起来，就是一个目标该跟踪算法，本文实现的代码，是基于检测算法实现的，即，每一帧执行检测算法，当目标消失的时候，使用卡尔曼滤波进行轨迹预测。

2.1 ai-kalman.py

from AIDetector_pytorch import Detector

import cv2

import os

import numpy as np

from utilss import plot_one_box, cal_iou, xyxy_to_xywh, xywh_to_xyxy, updata_trace_list, draw_trace

import os

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

det = Detector()

# 状态初始化

initial_target_box = [308, 630, 431, 670]

# initial_target_box = [137, 280, 192, 297] # 目标初始bouding box

# initial_target_box = [193 ,342 ,250 ,474]

initial_box_state = xyxy_to_xywh(initial_target_box)

initial_state = np.array([[initial_box_state[0], initial_box_state[1], initial_box_state[2], initial_box_state[3],

0, 0]]).T # [中心x,中心y,宽w,高h,dx,dy]

IOU_Threshold = 0.1 # 匹配时的阈值

# 状态转移矩阵，上一时刻的状态转移到当前时刻

A = np.array([[1, 0, 0, 0, 1, 0],

[0, 1, 0, 0, 0, 1],

[0, 0, 1, 0, 0, 0],

[0, 0, 0, 1, 0, 0],

[0, 0, 0, 0, 1, 0],

[0, 0, 0, 0, 0, 1]])

# 状态观测矩阵，即教程中的大C

H = np.eye(6)

# 过程噪声w协方差矩阵Q，p(w)~N(0,Q)，噪声w来自真实世界中的不确定性,w服从均值为0、协方差为Q的正态分布（高斯分布）

# 在跟踪任务当中，过程噪声来自于目标移动的不确定性（突然加速、减速、转弯等）

Q = np.eye(6) * 0.1

# 观测噪声v协方差矩阵R，p(v)~N(0,R)

# 观测噪声来自于检测框丢失、重叠等

R = np.eye(6) * 1

# 控制输入矩阵B

B = None

# 状态估计协方差矩阵P初始化，表示状态之间的变化关系，比如xywh之间有某种联系，比如y变化大那么h大概率也要发生较大变化

# 该矩阵用于估计各个权重值，包括卡尔曼增益K等

P = np.eye(6)

if __name__ == "__main__":

count = 0 #统计中间丢失的帧数

video_path = r'E:\dplearning\KCFTest\testVideo\fire\4.mp4'

cap = cv2.VideoCapture(video_path)

# cv2.namedWindow("track", cv2.WINDOW_NORMAL)

SAVE_VIDEO = True

if SAVE_VIDEO:

fourcc = cv2.VideoWriter_fourcc(*'XVID')

out = cv2.VideoWriter('kalman_output.avi', fourcc, 20,(1440,1080))

# ---------状态初始化----------------------------------------

frame_counter = 1

X_posterior = np.array(initial_state)

P_posterior = np.array(P)#状态估计协方差矩阵P

Z = np.array(initial_state)

trace_list = [] # 用于保存目标box的轨迹

while (True):

# Capture frame-by-frame

ret, frame = cap.read()

# print(frame.shape)

last_box_posterior = xywh_to_xyxy(X_posterior[0:4])

plot_one_box(last_box_posterior, frame, color=(0, 255, 0), target=False)

if not ret:

break

# print(frame_counter)

im, result = det.detect(frame)

print(result)

# ---------使用最大IOU来寻找观测值------------

max_iou = IOU_Threshold

max_iou_matched = False

if len(result) > 0:

xyxy = np.array(result[0][0:4], dtype="float")

# plot_one_box(xyxy, frame)

iou = cal_iou(xyxy, xywh_to_xyxy(X_posterior[0:4]))

# if iou > max_iou:

target_box = xyxy

# max_iou = iou

max_iou_matched = True

else:

count=count+1

max_iou_matched=False

# --------------------------------

if max_iou_matched == True:

# 如果找到了最大IOU BOX,则认为该框为观测值

plot_one_box(target_box, frame, target=True)

xywh = xyxy_to_xywh(target_box)

box_center = (int((target_box[0] + target_box[2]) // 2), int((target_box[1] + target_box[3]) // 2))

trace_list = updata_trace_list(box_center, trace_list, 100)

cv2.putText(frame, "Tracking", (int(target_box[0]), int(target_box[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX,

0.7,

(255, 0, 0), 2)

# 计算dx,dy

dx = xywh[0] - X_posterior[0]

dy = xywh[1] - X_posterior[1]

Z[0:4] = np.array([xywh]).T

Z[4::] = np.array([dx, dy])

if max_iou_matched:

# -----进行先验估计-----------------

X_prior = np.dot(A, X_posterior)

box_prior = xywh_to_xyxy(X_prior[0:4])

# plot_one_box(box_prior, frame, color=(0, 0, 0), target=False)

# -----计算状态估计协方差矩阵P--------

P_prior_1 = np.dot(A, P_posterior)

P_prior = np.dot(P_prior_1, A.T) + Q

# ------更新/计算卡尔曼增益---------------------卡尔曼增益的大小决定了在先验估计的基础上加上多少（观测值-先验估计值）

k1 = np.dot(P_prior, H.T)

k2 = np.dot(np.dot(H, P_prior), H.T) + R

K = np.dot(k1, np.linalg.inv(k2))

# --------------更新后验估计------------z是观测值，即检测结果；X_prior是先验估计，X_posterior是后验估计

X_posterior_1 = Z - np.dot(H, X_prior)

X_posterior = X_prior + np.dot(K, X_posterior_1)

box_posterior = xywh_to_xyxy(X_posterior[0:4])

plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)

# ---------更新状态估计协方差矩阵P-----

P_posterior_1 = np.eye(6) - np.dot(K, H)

P_posterior = np.dot(P_posterior_1, P_prior)

else:

# 如果IOU匹配失败，此时失去观测值，那么直接使用上一次的最优估计作为先验估计

# 此时直接迭代，不使用卡尔曼滤波

X_posterior = np.dot(A, X_posterior)

# X_posterior = np.dot(A_, X_posterior)

box_posterior = xywh_to_xyxy(X_posterior[0:4])

# plot_one_box(box_posterior, frame, color=(255, 255, 255), target=False)

box_center = (

(int(box_posterior[0] + box_posterior[2]) // 2), int((box_posterior[1] + box_posterior[3]) // 2))

trace_list = updata_trace_list(box_center, trace_list, 20)

cv2.putText(frame, "Lost", (box_center[0], box_center[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7,

(255, 0, 0), 2)

# draw_trace(frame, trace_list)

# cv2.putText(frame, "ALL BOXES(Green)", (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 200, 0), 2)

cv2.putText(frame, "TRACKED BOX(Red)", (25, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

cv2.putText(frame, "Last frame best estimation(White)", (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

cv2.imshow('track', frame)

if SAVE_VIDEO:

out.write(frame)

frame_counter = frame_counter + 1

if cv2.waitKey(10) & 0xFF == ord('q'):

break

# When everything done, release the capture

print(count)

cap.release()

cv2.destroyAllWindows()

2.1 AIDetector_pytorch.py

import torch

import numpy as np

from models.experimental import attempt_load

from utils.general import non_max_suppression, scale_coords

from utils.datasets import letterbox

from utils.torch_utils import select_device

from utils.BaseDetector import baseDet

class Detector(baseDet):

def __init__(self):

super(Detector, self).__init__()

self.init_model()

self.build_config()

def init_model(self):

self.weights = 'weights/best-4-28.pt' # 如果要更换参数，更换这行

self.device = '0' if torch.cuda.is_available() else 'cpu'

self.device = select_device(self.device)

model = attempt_load(self.weights, map_location=self.device)

model.to(self.device).eval()

model.half()

# torch.save(model, 'test.pt')

self.m = model

self.names = model.module.names if hasattr(

model, 'module') else model.names

def preprocess(self, img):

img0 = img.copy()

img = letterbox(img, new_shape=self.img_size)[0]

img = img[:, :, ::-1].transpose(2, 0, 1)

img = np.ascontiguousarray(img)

img = torch.from_numpy(img).to(self.device)

img = img.half() # 半精度

img /= 255.0 # 图像归一化

if img.ndimension() == 3:

img = img.unsqueeze(0)

return img0, img

def detect(self, im):

im0, img = self.preprocess(im)

pred = self.m(img, augment=False)[0]

pred = pred.float()

pred = non_max_suppression(pred, self.threshold, 0.40)

pred_boxes = []

for det in pred:

if det is not None and len(det):

det[:, :4] = scale_coords(

img.shape[2:], det[:, :4], im0.shape).round()

for *x, conf, cls_id in det:

lbl = self.names[int(cls_id)]

x1, y1 = int(x[0]), int(x[1])

x2, y2 = int(x[2]), int(x[3])

pred_boxes.append(

(x1, y1, x2, y2, lbl, conf))

return im, pred_boxes

卡尔曼理论

观测值：传感器读数，因为某些客观原因，可能存在较大误差估计值：卡尔曼滤波的状态转移方程进行估计–先验估计，最优估计值:综合前两项–后验估计，用到观测值进行修正然后基于估计值和观测值进行综合，如下一帧的预测值和下一帧检测值，下一帧的预测值是根据当前帧得到的先估计（先验估计），后更新（后验估计），更新的除了状态量，也包括状态协方差矩阵，增益矩阵

卡尔曼增益的作用 ![在这里插入图片描述](https://img-blog.csdnimg.cn/9f6bf022cbf1453b9c74ea78d54bdd04.png

卡尔曼滤波中的式子p(w) ~ N(0,Q),p(v) ~ N(0,R)

标准的卡尔曼滤波主要针对线性系统（过程方程+观测方程）， w和v分别指的是过程方程和观测方程中的噪声。 w服从于均值为0、协方差为Q的正态分布（也成为高斯分布）， v服从于均值为0、协方差为R的正态分布。即w和v都是零均值的高斯噪声。

实现过程中的问题积累

1，获取tensor里的值

试验效果：对静态镜头效果好，而且需要低速运动 ![在这里插入图片描述](https://img-blog.csdnimg.cn/92a9f4bdd466420eb87a0667701a2923.png

参考连接

https://www.bilibili.com/video/BV1Qf4y1J7D4?p=1&vd_source=9f55bd955a3e8c3a2f2960ba719c5890 https://github.com/liuchangji/kalman-filter-in-single-object-tracking https://github.com/ZhangPHEngr/Kalman-in-MOT

在我们的跟踪算法中假如整个预测，解决目标短暂丢失问题

金钥匙

图像处理计算机视觉使用卡尔曼滤波实现单目标跟踪过程中的目标运动轨迹预测

计算机计算机视觉如何运行代码mikel-brostrom/yolov8

RustDesk最新版本编译与打包

发表评论取消回复

金钥匙

图像处理 计算机视觉 使用卡尔曼滤波实现单目标跟踪过程中的目标运动轨迹预测

计算机 计算机视觉 如何运行代码mikel-brostrom/yolov8

RustDesk最新版本编译与打包

相关文章

发表评论取消回复

图像处理计算机视觉使用卡尔曼滤波实现单目标跟踪过程中的目标运动轨迹预测

计算机计算机视觉如何运行代码mikel-brostrom/yolov8