pytorch 深度学习图神经网络：(语义分割)三维网格语义分割

文章说明： 1)参考资料：PYG的文档。文档超链。斯坦福大学的机器学习课程。课程超链。(要挂梯子)。博客原文。原文超链。(要挂梯子)。原文理论参考文献。提取码8848。 2)我在百度网盘上传这篇文章的jupyter notebook以及预训练模型。提取码8848. 3)博主水平不高，如有错误，还望批评指正一些建议：注重理论建议直接去看文献；注重实践建议直接去看代码。他的代码会有详细注释，但实际没啥用，如果不看原文参考文献。建议手敲一遍代码，会对理解很有帮助。变量名字取得很好，如果有图神经基础，不看文献也是可以。

文章目录

前言1：硬件问题前言2：有关综述数据描述数据下载任务描述代码演示

前言1：硬件问题

如果电脑不是很好，并不建议自己训练。我的电脑不是很好，训练大概有20分钟。最后电脑特别的烫，感觉对电脑很不好。我的电脑配置如下(应该是看这个，对于硬件我不清楚)。直接下载预训练的模型就好。

前言2：有关综述

对于一般图像分割以及图像分类任务，卷积神经网络取得巨大成功。但是卷积神经网络不能处理不规则的数据结构。我们希望推广卷积神经网络到不规则数据结构。卷积神经网络博主不很了解，不所以作过多评价。图神经网络为解决问题，应孕而生。我们使用3D点云进行演示。

数据描述

我们使用两个矩阵表示数据：十分简单，看图易懂。图片自源博客。我们需要一个矩阵存储n个点的位置。我们需要一个矩阵存储点间的边关系(3点确定一个平面，这就解释为什么是3个点了)。

数据下载

提取码8848

任务描述

正如标题：一个简单分类任务。我们需要对3D点云进行分类。头部点云，躯干点云，左臂点云，左手点云，右臂点云，右手点云，左大腿点云，左小腿点云，左脚点云，右大腿点云，右小腿点云，右脚点云。

代码演示

import torch

device='cuda' if torch.cuda.is_available() else 'cpu'

路径有关注意事项1：下载数据之后不要进行解压，放在一个文件之中就可以了。路径有关注意事项2：复制文件地址需要进行修改，可能这跟操作系统有关但是我不清楚，我就只说我的。直接复制是这样"C:\Users\19216\Desktop\project\3DImage_Classification_And_Segmentation"，我们需要更改所有"\“变为”/"。

root="C:/Users/19216/Desktop/project/3DImage_Classification_And_Segmentation"

以下定义数据变换。

from torch_geometric.transforms import BaseTransform

from torch_geometric.data import Data

#BaseTransform的构造十分简单，建议自己去看源码

class NormalizeUnitSphere(BaseTransform):

#静态方法，不依赖类(加了这个应该就不用加self了)

@staticmethod

def _re_center(x):

centroid=torch.mean(x,dim=0)

return x-centroid

@staticmethod

def _re_scale_to_unit_length(x):

max_dist=torch.max(torch.norm(x,dim=1))

return x/max_dist

#类的默认调用方法

def __call__(self,data:Data):

if data.x is not None:

data.x=self._re_scale_to_unit_length(self._re_center(data.x))

return data

#就是打印类的名字

def __repr__(self):

return "{}()".format(self.__class__.__name__)

from torch_geometric.transforms import Compose,FaceToEdge

pre_transform=Compose([FaceToEdge(remove_faces=False),NormalizeUnitSphere()])

以下加载变换数据。

from pathlib import Path

import trimesh

def load_mesh(mesh_filename:Path):

mesh=trimesh.load_mesh(mesh_filename,process=False)

vertices=torch.from_numpy(mesh.vertices).to(torch.float)

faces=torch.from_numpy(mesh.faces).t().to(torch.long).contiguous()

return vertices,faces

from torch_geometric.data import InMemoryDataset,extract_zip

from functools import lru_cache

import numpy as np

关于这部分的代码，必须看这，看了你就知道了吧。这里代码逻辑是挺有意思的，由于篇幅原因读者自行研究。我来讲下逻辑，不一定正确哈。首先train_data申请调用SegmentationFaust。父类立马开始调用四个方法(如果没有直接跳过) raw_file_names()，processed_file_names()，download()，process()。具体到这里就只有processed_file_names()、process()。父类发现文件夹中没有processed_file_names()的对应文件，立即用process()处理数据生成processed_file_names()的对应文件。然后赋值[“training.pt”,“test.pt”]给self.processed_paths。最后子类开始运作读取数据并且赋值。所有数据在第一步处理好了。

class SegmentationFaust(InMemoryDataset):

map_seg_label_to_id=dict(head=0,torso=1,left_arm=2,left_hand=3,

right_arm=4,right_hand=5,left_upper_leg=6,left_lower_leg=7,

left_foot=8,right_upper_leg=9,right_lower_leg=10,right_foot=11)

def __init__(self,root,train:bool=True,pre_transform=None):

super().__init__(root,pre_transform)

path=self.processed_paths[0] if train else self.processed_paths[1]

self.data,self.slices=torch.load(path)

#将方法转换为属性

@property

def processed_file_names(self)->list:

return ["training.pt","test.pt"]

@property

#结果缓存，提高效率

@lru_cache(maxsize=32)

def _segmentation_labels(self):

path_to_labels=Path(self.root)/"MPI-FAUST"/"segmentations.npz"

seg_labels=np.load(str(path_to_labels))["segmentation_labels"]

return torch.from_numpy(seg_labels).type(torch.int64)

def _mesh_filenames(self):

path_to_meshes=Path(self.root)/"MPI-FAUST"/"meshes"

#正则匹配

return path_to_meshes.glob("*.ply")

def _unzip_dataset(self):

path_to_zip=Path(self.root)/"MPI-FAUST.zip"

extract_zip(str(path_to_zip),self.root,log=False)

def process(self):

self._unzip_dataset()

data_list=[]

for mesh_filename in sorted(self._mesh_filenames()):

vertices, faces=load_mesh(mesh_filename)

data=Data(x=vertices, face=faces)

data.segmentation_labels=self._segmentation_labels

if self.pre_transform is not None:

data=self.pre_transform(data)

data_list.append(data)

torch.save(self.collate(data_list[:80]),self.processed_paths[0])

torch.save(self.collate(data_list[80:]),self.processed_paths[1])

train_data=SegmentationFaust(root=root,pre_transform=pre_transform)

#输出：

#Processing...

#Done!

test_data=SegmentationFaust(root=root,train=False,pre_transform=pre_transform)

from torch_geometric.loader import DataLoader

train_loader=DataLoader(train_data,shuffle=True)

test_loader=DataLoader(test_data,shuffle=False)

from itertools import tee

这段代码特别抽象，读者自行理解研究(我的意思语法抽象不指代码逻辑)

def pairwise(iterable):

a,b=tee(iterable)

next(b,None)

return zip(a,b)

import torch.nn as nn

这段代码同样抽象，读者自行理解研究(我的意思语法抽象不指代码逻辑)

def get_mlp_layers(channels:list,activation,output_activation=nn.Identity):

layers=[]

*intermediate_layer_definitions,final_layer_definition=pairwise(channels)

for in_ch,out_ch in intermediate_layer_definitions:

intermediate_layer=nn.Linear(in_ch,out_ch)

layers+=[intermediate_layer,activation()]

layers+=[nn.Linear(*final_layer_definition),output_activation()]

return nn.Sequential(*layers)

from torch_geometric.nn import MessagePassing

def get_conv_layers(channels:list,conv:MessagePassing,conv_params:dict):

conv_layers=[conv(in_ch,out_ch,**conv_params) for in_ch,out_ch in pairwise(channels)]

return conv_layers

from torch_geometric.utils import add_self_loops,remove_self_loops

import torch.nn.functional as F

最后介绍参考论文，这里暂时放下不表以下部分均为模型建立

class FeatureSteeredConvolution(MessagePassing):

def __init__(self,in_channels:int,out_channels:int,num_heads:int,ensure_trans_invar:bool=True,bias:bool=True,with_self_loops:bool=True):

super().__init__(aggr="mean")

self.in_channels=in_channels;self.out_channels=out_channels;self.num_heads=num_heads;self.with_self_loops=with_self_loops

self.linear=torch.nn.Linear(in_features=in_channels,out_features=out_channels*num_heads,bias=False)

self.u=torch.nn.Linear(in_features=in_channels,out_features=num_heads,bias=False)

self.c=torch.nn.Parameter(torch.Tensor(num_heads))

if not ensure_trans_invar:

self.v=torch.nn.Linear(in_features=in_channels,out_features=num_heads,bias=False)

else:

self.register_parameter("v",None)

if bias:

self.bias=torch.nn.Parameter(torch.Tensor(out_channels))

else:

self.register_parameter("bias",None)

self.reset_parameters()

def reset_parameters(self):

torch.nn.init.uniform_(self.linear.weight)

torch.nn.init.uniform_(self.u.weight)

torch.nn.init.normal_(self.c,mean=0.0,std=0.1)

if self.v is not None:

torch.nn.init.uniform_(self.v.weight)

if self.bias is not None:

torch.nn.init.normal_(self.bias,mean=0.0,std=0.1)

def forward(self,x,edge_index):

if self.with_self_loops:

edge_index,_=remove_self_loops(edge_index)

edge_index,_=add_self_loops(edge_index=edge_index,num_nodes=x.shape[0])

out=self.propagate(edge_index,x=x)

return out if self.bias is None else out+self.bias

def _compute_attention_weights(self,x_i,x_j):

if x_j.shape[-1]!=self.in_channels:

raise ValueError(

f"Expected input features with {self.in_channels} channels."

f"Instead received features with {x_j.shape[-1]} channels."

)

if self.v is None:

attention_logits=self.u(x_i-x_j)+self.c

else:

attention_logits=self.u(x_i)+self.b(x_j)+self.c

return F.softmax(attention_logits,dim=1)

def message(self,x_i,x_j):

attention_weights=self._compute_attention_weights(x_i,x_j)

x_j=self.linear(x_j).view(-1,self.num_heads,self.out_channels)

return (attention_weights.view(-1,self.num_heads,1)*x_j).sum(dim=1)

class GraphFeatureEncoder(torch.nn.Module):

def __init__(self,in_features,conv_channels,num_heads,apply_batch_norm:int=True,ensure_trans_invar:bool=True,bias:bool=True,with_self_loops:bool=True):

super().__init__()

self.apply_batch_norm=apply_batch_norm;conv_params=dict(num_heads=num_heads,ensure_trans_invar=ensure_trans_invar,bias=bias,with_self_loops=with_self_loops)

conv_layers=get_conv_layers(channels=[in_features]+conv_channels,conv=FeatureSteeredConvolution,conv_params=conv_params)

self.conv_layers=nn.ModuleList(conv_layers)

*first_conv_channels,final_conv_channel=conv_channels

self.batch_layers=[None for _ in first_conv_channels]

if apply_batch_norm:

self.batch_layers=nn.ModuleList([nn.BatchNorm1d(channel) for channel in first_conv_channels])

def forward(self,x,edge_index):

*first_conv_layers,final_conv_layer=self.conv_layers

for conv_layer,batch_layer in zip(first_conv_layers,self.batch_layers):

x=conv_layer(x,edge_index)

x=F.relu(x)

if batch_layer is not None:

x=batch_layer(x)

return final_conv_layer(x,edge_index)

class MeshSeg(torch.nn.Module):

def __init__(self,in_features,encoder_features,conv_channels,encoder_channels,decoder_channels,num_heads,num_classes,apply_batch_norm=True):

super().__init__()

self.input_encoder=get_mlp_layers(channels=[in_features]+encoder_channels,activation=nn.ReLU)

self.gnn=GraphFeatureEncoder(in_features=encoder_features,conv_channels=conv_channels,num_heads=num_heads,apply_batch_norm=apply_batch_norm)

*_,final_conv_channel=conv_channels

self.final_projection=get_mlp_layers([final_conv_channel]+decoder_channels+[num_classes],activation=nn.ReLU)

def forward(self,data):

x,edge_index=data.x,data.edge_index

x=self.input_encoder(x)

x=self.gnn(x,edge_index)

return self.final_projection(x)

设定参数

model_params=dict(in_features=3,encoder_features=16,conv_channels=[32,64,128,64],encoder_channels=[16],decoder_channels=[32],num_heads=12,num_classes=12,apply_batch_norm=True)

net=MeshSeg(**model_params).to(device)

best_test_acc=0.0;num_epochs=50;lr=0.001;optimizer=torch.optim.Adam(net.parameters(),lr=lr);loss_fn=torch.nn.CrossEntropyLoss()

开始训练

def train(net,train_data,optimizer,loss_fn,device):

net.train()

cumulative_loss=0.0

for data in train_data:

data=data.to(device)

optimizer.zero_grad()

out=net(data)

loss=loss_fn(out,data.segmentation_labels.squeeze())

loss.backward()

cumulative_loss+=loss.item()

optimizer.step()

return cumulative_loss/len(train_data)

def accuracy(predictions,gt_seg_labels):

predicted_seg_labels=predictions.argmax(dim=-1,keepdim=True)

if predicted_seg_labels.shape!=gt_seg_labels.shape:

raise ValueError("Expected Shapes to be equivalent")

correct_assignments=(predicted_seg_labels==gt_seg_labels).sum()

num_assignemnts=predicted_seg_labels.shape[0]

return float(correct_assignments/num_assignemnts)

def evaluate_performance(dataset,net,device):

prediction_accuracies=[]

for data in dataset:

data=data.to(device)

predictions=net(data)

prediction_accuracies.append(accuracy(predictions,data.segmentation_labels))

return sum(prediction_accuracies)/len(prediction_accuracies)

@torch.no_grad()

def test(net,train_data,test_data,device):

net.eval()

train_acc=evaluate_performance(train_data,net,device)

test_acc=evaluate_performance(test_data,net,device)

return train_acc,test_acc

from tqdm import tqdm

with tqdm(range(num_epochs),unit="Epoch") as tepochs:

for epoch in tepochs:

train_loss=train(net,train_loader,optimizer,loss_fn,device)

train_acc,test_acc=test(net,train_loader,test_loader,device)

tepochs.set_postfix(train_loss=train_loss,train_accuracy=100*train_acc,test_accuracy=100*test_acc)

if test_acc>best_test_acc:

best_test_acc=test_acc

torch.save(net.state_dict(),root+"/checkpoint_best_colab")

开始画图

def load_model(model_params,path_to_checkpoint,device):

try:

model=MeshSeg(**model_params)

model.load_state_dict(torch.load(str(path_to_checkpoint)),strict=True)

model.to(device)

return model

except RuntimeError as err_msg:

raise ValueError(

f"Given checkpoint {str(path_to_checkpoint)} could not be loaded. {err_msg}"

)

def get_best_model(model_params,device):

path_to_trained_model=Path(root+"/checkpoint_best_colab")

trained_model=load_model(model_params,path_to_trained_model,device)

return trained_model

net=get_best_model(model_params,device)

segmentation_colors=dict(head=torch.tensor([255,255,255],dtype=torch.int),torso=torch.tensor([255,255,128],dtype=torch.int),

left_arm=torch.tensor([255,255,0],dtype=torch.int),left_hand=torch.tensor([255,128,255],dtype=torch.int),

right_arm=torch.tensor([255,128,128],dtype=torch.int),right_hand=torch.tensor([255,128,0],dtype=torch.int),

left_upper_leg=torch.tensor([255,0,255],dtype=torch.int),left_lower_leg =torch.tensor([255,0,128],dtype=torch.int),

left_foot=torch.tensor([255,0,0],dtype=torch.int),right_upper_leg=torch.tensor([128,255,255],dtype=torch.int),

right_lower_leg=torch.tensor([128,255,128],dtype=torch.int),right_foot=torch.tensor([128,255,0],dtype=torch.int)

)

map_seg_id_to_color=dict((_value,segmentation_colors[_key]) for _key,_value in train_data.map_seg_label_to_id.items())

@torch.no_grad()

def visualize_prediction(net,data,device,map_seg_id_to_color):

def _map_seg_label_to_color(seg_ids,map_seg_id_to_color):

return torch.vstack([map_seg_id_to_color[int(seg_ids[idx])] for idx in range(seg_ids.shape[0])])

data=data.to(device)

predictions=net(data)

predicted_seg_labels=predictions.argmax(dim=-1,keepdim=True)

mesh_colors=_map_seg_label_to_color(predicted_seg_labels,map_seg_id_to_color)

segmented_mesh=trimesh.base.Trimesh(vertices=data.x.cpu().numpy(),faces=data.face.t().cpu().numpy(),process=False)

segmented_mesh.visual.vertex_colors=mesh_colors.cpu().numpy()

return segmented_mesh

segmented_meshes=[]

mesh_ids=[0,1,2,3,4,5,6,7,8,9]

for idx,mesh_id in enumerate(mesh_ids):

segmented_mesh=visualize_prediction(net,test_data[mesh_id],device,map_seg_id_to_color)

segmented_mesh.vertices+=[idx*1.0,0.0,0.0]

segmented_meshes.append(segmented_mesh)

scene=trimesh.scene.Scene(segmented_meshes)

scene.show()

论文部分不想写了。以后再来吧，那就这样吧。

评论可见，请评论后查看内容，谢谢！！！

您阅读本篇文章共花了：

金钥匙

pytorch 深度学习图神经网络：(语义分割)三维网格语义分割

Viewport3D显示不全 WPF Viewport3D 在Window10和Window11系统，WPF使用Viewport3D 渲染失败问题解决方案

3D全景虚拟旅游在旅游行业中具备哪些应用价值？

发表评论取消回复

金钥匙

pytorch 深度学习 图神经网络：(语义分割)三维网格语义分割

Viewport3D显示不全 WPF Viewport3D 在Window10和Window11系统，WPF使用Viewport3D 渲染失败问题解决方案

3D全景虚拟旅游在旅游行业中具备哪些应用价值？

相关文章

发表评论取消回复

pytorch 深度学习图神经网络：(语义分割)三维网格语义分割