目录
介绍
效果
模型
decoder_fc_nsc.onnx
encoder.onnx
项目
代码
下载
C# Image Caption
介绍
地址:https://github.com/ruotianluo/ImageCaptioning.pytorch
I decide to sync up this repo and self-critical.pytorch. (The old master is in old master branch for archive)
效果
模型
decoder_fc_nsc.onnx
Inputs ------------------------- name:fc_feats tensor:Float[1, 2048] ---------------------------------------------------------------
Outputs ------------------------- name:seq tensor:Int64[1, 20] name:logprobs tensor:Float[1, 20, 9488] ---------------------------------------------------------------
encoder.onnx
Inputs ------------------------- name:img tensor:Float[1, 3, 640, 640] ---------------------------------------------------------------
Outputs ------------------------- name:fc tensor:Float[2048] ---------------------------------------------------------------
项目
代码
using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using OpenCvSharp; using OpenCvSharp.Dnn; using System; using System.Collections.Generic; using System.Drawing; using System.Drawing.Imaging; using System.IO; using System.Linq; using System.Windows.Forms;
namespace ImageCaption { public partial class Form1 : Form { public Form1() { InitializeComponent(); }
string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png"; string image_path = ""; string startupPath; string classer_path; DateTime dt1 = DateTime.Now; DateTime dt2 = DateTime.Now; string model_path; Mat image; Mat result_image;
SessionOptions options; InferenceSession onnx_session; Tensor
Tensor
Net net;
int feat_len; int D; int inpWidth = 640; int inpHeight = 640; float[] mean = new float[] { 0.485f, 0.456f, 0.406f }; float[] std = new float[] { 0.229f, 0.224f, 0.225f };
Dictionary
private void button1_Click(object sender, EventArgs e) { OpenFileDialog ofd = new OpenFileDialog(); ofd.Filter = fileFilter; if (ofd.ShowDialog() != DialogResult.OK) return; pictureBox1.Image = null; image_path = ofd.FileName; pictureBox1.Image = new Bitmap(image_path); textBox1.Text = ""; image = new Mat(image_path); pictureBox2.Image = null; }
private unsafe void button2_Click(object sender, EventArgs e) { if (image_path == "") { return; }
button2.Enabled = false; pictureBox2.Image = null; textBox1.Text = ""; pictureBox2.Image = null; Application.DoEvents();
//图片缩放 image = new Mat(image_path);
Mat temp_image = new Mat(); Cv2.Resize(image, temp_image, new OpenCvSharp.Size(inpWidth, inpHeight)); Normalize(temp_image);
Mat blob = CvDnn.BlobFromImage(temp_image);
//配置图片输入数据 net.SetInput(blob);
Mat result_mat = net.Forward();
float* ptr_feat = (float*)result_mat.Data;
for (int i = 0; i < 2048; i++) { input_tensor[0, i] = ptr_feat[i]; }
//将 input_tensor 放入一个输入参数的容器,并指定名称 input_container.Add(NamedOnnxValue.CreateFromTensor("fc_feats", input_tensor));
//运行 Inference 并获取结果 result_infer = onnx_session.Run(input_container);
// 将输出结果转为DisposableNamedOnnxValue数组 results_onnxvalue = result_infer.ToArray();
// 读取第一个节点输出并转为Tensor数据 result_tensors = results_onnxvalue[0].AsTensor
Int64[] result_array = result_tensors.ToArray();
string words = ""; for (int k = 0; k < D; k++) { if (result_array[k] > 0) { if (words.Length > 0) { words += " "; } words += ix_to_word[result_array[k].ToString()]; } else { break; } }
result_image = image.Clone();
Cv2.PutText(result_image, words , new OpenCvSharp.Point(10, 60) , HersheyFonts.HersheySimplex , 1 , new Scalar(0, 0, 255) , 2 );
pictureBox2.Image = new Bitmap(result_image.ToMemoryStream());
textBox1.Text = words;
button2.Enabled = true; }
public void Normalize(Mat src) { src.ConvertTo(src, MatType.CV_32FC3, 1.0 / 255);
Mat[] bgr = src.Split(); for (int i = 0; i < bgr.Length; ++i) { bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1 / std[i], (0.0 - mean[i]) / std[i]); }
Cv2.Merge(bgr, src);
foreach (Mat channel in bgr) { channel.Dispose(); } }
private void Form1_Load(object sender, EventArgs e) { startupPath = System.Windows.Forms.Application.StartupPath;
model_path = "model/decoder_fc_nsc.onnx";
// 创建输出会话,用于输出模型读取信息 options = new SessionOptions(); options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO; options.AppendExecutionProvider_CPU(0);// 设置为CPU上运行
// 创建推理模型类,读取本地模型文件 onnx_session = new InferenceSession(model_path, options);//model_path 为onnx模型文件的路径
// 输入Tensor input_tensor = new DenseTensor
feat_len = 2048; D = 20;
//初始化网络类,读取本地模型 net = CvDnn.ReadNetFromOnnx("model/encoder.onnx");
StreamReader sr = new StreamReader("model/vocab.txt"); string line; while ((line = sr.ReadLine()) != null) { ix_to_word.Add(line.Split(':')[0], line.Split(':')[1]); }
image_path = "test_img/1.jpg"; pictureBox1.Image = new Bitmap(image_path); image = new Mat(image_path); }
private void pictureBox1_DoubleClick(object sender, EventArgs e) { Common.ShowNormalImg(pictureBox1.Image); }
private void pictureBox2_DoubleClick(object sender, EventArgs e) { Common.ShowNormalImg(pictureBox2.Image); }
SaveFileDialog sdf = new SaveFileDialog(); private void button3_Click(object sender, EventArgs e) { if (pictureBox2.Image == null) { return; } Bitmap output = new Bitmap(pictureBox2.Image); sdf.Title = "保存"; sdf.Filter = "Images (*.jpg)|*.jpg|Images (*.png)|*.png|Images (*.bmp)|*.bmp|Images (*.emf)|*.emf|Images (*.exif)|*.exif|Images (*.gif)|*.gif|Images (*.ico)|*.ico|Images (*.tiff)|*.tiff|Images (*.wmf)|*.wmf"; if (sdf.ShowDialog() == DialogResult.OK) { switch (sdf.FilterIndex) { case 1: { output.Save(sdf.FileName, ImageFormat.Jpeg); break; } case 2: { output.Save(sdf.FileName, ImageFormat.Png); break; } case 3: { output.Save(sdf.FileName, ImageFormat.Bmp); break; } case 4: { output.Save(sdf.FileName, ImageFormat.Emf); break; } case 5: { output.Save(sdf.FileName, ImageFormat.Exif); break; } case 6: { output.Save(sdf.FileName, ImageFormat.Gif); break; } case 7: { output.Save(sdf.FileName, ImageFormat.Icon); break; }
case 8: { output.Save(sdf.FileName, ImageFormat.Tiff); break; } case 9: { output.Save(sdf.FileName, ImageFormat.Wmf); break; } } MessageBox.Show("保存成功,位置:" + sdf.FileName); } } } }
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using OpenCvSharp;
using OpenCvSharp.Dnn;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Windows.Forms;
namespace ImageCaption
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string image_path = "";
string startupPath;
string classer_path;
DateTime dt1 = DateTime.Now;
DateTime dt2 = DateTime.Now;
string model_path;
Mat image;
Mat result_image;
SessionOptions options;
InferenceSession onnx_session;
Tensor
List
IDisposableReadOnlyCollection
DisposableNamedOnnxValue[] results_onnxvalue;
Tensor
Net net;
int feat_len;
int D;
int inpWidth = 640;
int inpHeight = 640;
float[] mean = new float[] { 0.485f, 0.456f, 0.406f };
float[] std = new float[] { 0.229f, 0.224f, 0.225f };
Dictionary
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
pictureBox1.Image = null;
image_path = ofd.FileName;
pictureBox1.Image = new Bitmap(image_path);
textBox1.Text = "";
image = new Mat(image_path);
pictureBox2.Image = null;
}
private unsafe void button2_Click(object sender, EventArgs e)
{
if (image_path == "")
{
return;
}
button2.Enabled = false;
pictureBox2.Image = null;
textBox1.Text = "";
pictureBox2.Image = null;
Application.DoEvents();
//图片缩放
image = new Mat(image_path);
Mat temp_image = new Mat();
Cv2.Resize(image, temp_image, new OpenCvSharp.Size(inpWidth, inpHeight));
Normalize(temp_image);
Mat blob = CvDnn.BlobFromImage(temp_image);
//配置图片输入数据
net.SetInput(blob);
Mat result_mat = net.Forward();
float* ptr_feat = (float*)result_mat.Data;
for (int i = 0; i < 2048; i++)
{
input_tensor[0, i] = ptr_feat[i];
}
//将 input_tensor 放入一个输入参数的容器,并指定名称
input_container.Add(NamedOnnxValue.CreateFromTensor("fc_feats", input_tensor));
//运行 Inference 并获取结果
result_infer = onnx_session.Run(input_container);
// 将输出结果转为DisposableNamedOnnxValue数组
results_onnxvalue = result_infer.ToArray();
// 读取第一个节点输出并转为Tensor数据
result_tensors = results_onnxvalue[0].AsTensor
Int64[] result_array = result_tensors.ToArray();
string words = "";
for (int k = 0; k < D; k++)
{
if (result_array[k] > 0)
{
if (words.Length > 0)
{
words += " ";
}
words += ix_to_word[result_array[k].ToString()];
}
else
{
break;
}
}
result_image = image.Clone();
Cv2.PutText(result_image, words
, new OpenCvSharp.Point(10, 60)
, HersheyFonts.HersheySimplex
, 1
, new Scalar(0, 0, 255)
, 2
);
pictureBox2.Image = new Bitmap(result_image.ToMemoryStream());
textBox1.Text = words;
button2.Enabled = true;
}
public void Normalize(Mat src)
{
src.ConvertTo(src, MatType.CV_32FC3, 1.0 / 255);
Mat[] bgr = src.Split();
for (int i = 0; i < bgr.Length; ++i)
{
bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1 / std[i], (0.0 - mean[i]) / std[i]);
}
Cv2.Merge(bgr, src);
foreach (Mat channel in bgr)
{
channel.Dispose();
}
}
private void Form1_Load(object sender, EventArgs e)
{
startupPath = System.Windows.Forms.Application.StartupPath;
model_path = "model/decoder_fc_nsc.onnx";
// 创建输出会话,用于输出模型读取信息
options = new SessionOptions();
options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
options.AppendExecutionProvider_CPU(0);// 设置为CPU上运行
// 创建推理模型类,读取本地模型文件
onnx_session = new InferenceSession(model_path, options);//model_path 为onnx模型文件的路径
// 输入Tensor
input_tensor = new DenseTensor
// 创建输入容器
input_container = new List
feat_len = 2048;
D = 20;
//初始化网络类,读取本地模型
net = CvDnn.ReadNetFromOnnx("model/encoder.onnx");
StreamReader sr = new StreamReader("model/vocab.txt");
string line;
while ((line = sr.ReadLine()) != null)
{
ix_to_word.Add(line.Split(':')[0], line.Split(':')[1]);
}
image_path = "test_img/1.jpg";
pictureBox1.Image = new Bitmap(image_path);
image = new Mat(image_path);
}
private void pictureBox1_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox1.Image);
}
private void pictureBox2_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox2.Image);
}
SaveFileDialog sdf = new SaveFileDialog();
private void button3_Click(object sender, EventArgs e)
{
if (pictureBox2.Image == null)
{
return;
}
Bitmap output = new Bitmap(pictureBox2.Image);
sdf.Title = "保存";
sdf.Filter = "Images (*.jpg)|*.jpg|Images (*.png)|*.png|Images (*.bmp)|*.bmp|Images (*.emf)|*.emf|Images (*.exif)|*.exif|Images (*.gif)|*.gif|Images (*.ico)|*.ico|Images (*.tiff)|*.tiff|Images (*.wmf)|*.wmf";
if (sdf.ShowDialog() == DialogResult.OK)
{
switch (sdf.FilterIndex)
{
case 1:
{
output.Save(sdf.FileName, ImageFormat.Jpeg);
break;
}
case 2:
{
output.Save(sdf.FileName, ImageFormat.Png);
break;
}
case 3:
{
output.Save(sdf.FileName, ImageFormat.Bmp);
break;
}
case 4:
{
output.Save(sdf.FileName, ImageFormat.Emf);
break;
}
case 5:
{
output.Save(sdf.FileName, ImageFormat.Exif);
break;
}
case 6:
{
output.Save(sdf.FileName, ImageFormat.Gif);
break;
}
case 7:
{
output.Save(sdf.FileName, ImageFormat.Icon);
break;
}
case 8:
{
output.Save(sdf.FileName, ImageFormat.Tiff);
break;
}
case 9:
{
output.Save(sdf.FileName, ImageFormat.Wmf);
break;
}
}
MessageBox.Show("保存成功,位置:" + sdf.FileName);
}
}
}
}
下载
源码下载
好文链接
发表评论