C# Onnx Chinese CLIP 通过一句话从图库中搜出来符合要求的图片

本文主要是介绍C# Onnx Chinese CLIP 通过一句话从图库中搜出来符合要求的图片,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!

目录

效果

生成图片特征

查找踢足球的人

测试图片

模型信息

image_model.onnx

text_model.onnx

项目

代码

Form1.cs

Clip.cs

下载


C# Onnx Chinese CLIP 通过一句话从图库中搜出来符合要求的图片

效果

生成图片特征

查找踢足球的人

测试图片

模型信息

image_model.onnx

Inputs
-------------------------
name:image
tensor:Float[1, 3, 224, 224]
---------------------------------------------------------------

Outputs
-------------------------
name:unnorm_image_features
tensor:Float[1, 512]
---------------------------------------------------------------

text_model.onnx

Inputs
-------------------------
name:text
tensor:Int64[1, 52]
---------------------------------------------------------------

Outputs
-------------------------
name:unnorm_text_features
tensor:Float[1, 512]
---------------------------------------------------------------

项目

代码

Form1.cs


using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Windows.Forms;

namespace Onnx_Demo
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        Clip mynet = new Clip("model/image_model.onnx", "model/text_model.onnx", "model/myvocab.txt");

        float[] imagedir_features;
        string image_dir = "test_img";
        StringBuilder sb = new StringBuilder();

        private void button2_Click(object sender, EventArgs e)
        {
            //特征向量 可以存二进制文件或者向量数据库
            imagedir_features = mynet.generate_imagedir_features(image_dir);
            txtInfo.Text = "生成完成!";
            txtInfo.Text += "有" + mynet.imgnum + "张图片,特征向量长度=" + imagedir_features.Length;
        }

        private void button3_Click(object sender, EventArgs e)
        {
            if (imagedir_features == null)
            {
                MessageBox.Show("请先生成图片特征!");
                return;
            }

            sb.Clear();
            txtInfo.Text = "";
            lblInfo.Text = "";
            pictureBox1.Image = null;

            string input_text = txt_input_text.Text;
            if (string.IsNullOrEmpty(input_text))
            {
                return;
            }
            List<Dictionary<string, float>> top5imglist = mynet.input_text_search_image(input_text, imagedir_features, mynet.imglist);

            sb.AppendLine("top5:");
            foreach (var item in top5imglist)
            {
                sb.AppendLine(Path.GetFileName(item.Keys.First()) + "  相似度:" + item[item.Keys.First()].ToString("F2"));
            }

            txtInfo.Text = sb.ToString();
            lblInfo.Text = Path.GetFileName(top5imglist[0].Keys.First());
            pictureBox1.Image = new Bitmap(top5imglist[0].Keys.First());

        }

        private void Form1_Load(object sender, EventArgs e)
        {

        }
    }
}


using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Windows.Forms;namespace Onnx_Demo
{public partial class Form1 : Form{public Form1(){InitializeComponent();}Clip mynet = new Clip("model/image_model.onnx", "model/text_model.onnx", "model/myvocab.txt");float[] imagedir_features;string image_dir = "test_img";StringBuilder sb = new StringBuilder();private void button2_Click(object sender, EventArgs e){//特征向量 可以存二进制文件或者向量数据库imagedir_features = mynet.generate_imagedir_features(image_dir);txtInfo.Text = "生成完成!";txtInfo.Text += "有" + mynet.imgnum + "张图片,特征向量长度=" + imagedir_features.Length;}private void button3_Click(object sender, EventArgs e){if (imagedir_features == null){MessageBox.Show("请先生成图片特征!");return;}sb.Clear();txtInfo.Text = "";lblInfo.Text = "";pictureBox1.Image = null;string input_text = txt_input_text.Text;if (string.IsNullOrEmpty(input_text)){return;}List<Dictionary<string, float>> top5imglist = mynet.input_text_search_image(input_text, imagedir_features, mynet.imglist);sb.AppendLine("top5:");foreach (var item in top5imglist){sb.AppendLine(Path.GetFileName(item.Keys.First()) + "  相似度:" + item[item.Keys.First()].ToString("F2"));}txtInfo.Text = sb.ToString();lblInfo.Text = Path.GetFileName(top5imglist[0].Keys.First());pictureBox1.Image = new Bitmap(top5imglist[0].Keys.First());}private void Form1_Load(object sender, EventArgs e){}}
}

Clip.cs

public class Clip
    {
        int inpWidth = 224;
        int inpHeight = 224;
        float[] mean = new float[] { 0.48145466f, 0.4578275f, 0.40821073f };
        float[] std = new float[] { 0.26862954f, 0.26130258f, 0.27577711f };

        int context_length = 52;
        int len_text_feature = 512;

        Net net;
        float[] image_features_input;

        SessionOptions options;
        InferenceSession onnx_session;
        Tensor<long> input_tensor;
        List<NamedOnnxValue> input_container;
        IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result_infer;
        DisposableNamedOnnxValue[] results_onnxvalue;
        Tensor<float> result_tensors;

        TokenizerBase tokenizer;

        int[] text_tokens_input;
        float[,] text_features_input;

        public int imgnum = 0;
        public List<string> imglist = new List<string>();

        public Clip(string image_modelpath, string text_modelpath, string vocab_path)
        {
            net = CvDnn.ReadNetFromOnnx(image_modelpath);

            // 创建输出会话,用于输出模型读取信息
            options = new SessionOptions();
            options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
            options.AppendExecutionProvider_CPU(0);// 设置为CPU上运行
            // 创建推理模型类,读取本地模型文件
            onnx_session = new InferenceSession(text_modelpath, options);//model_path 为onnx模型文件的路径
            // 创建输入容器
            input_container = new List<NamedOnnxValue>();

            load_tokenizer(vocab_path);

        }

        void load_tokenizer(string vocab_path)
        {

            tokenizer = new TokenizerClipChinese();
            tokenizer.load_tokenize(vocab_path);
            text_tokens_input = new int[1024 * context_length];
        }

        Mat normalize_(Mat src)
        {
            Cv2.CvtColor(src, src, ColorConversionCodes.BGR2RGB);

            Mat[] bgr = src.Split();
            for (int i = 0; i < bgr.Length; ++i)
            {
                bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 / (255.0 * std[i]), (0.0 - mean[i]) / std[i]);
            }

            Cv2.Merge(bgr, src);

            foreach (Mat channel in bgr)
            {
                channel.Dispose();
            }

            return src;
        }

        unsafe void generate_image_feature(Mat srcimg)
        {
            Mat temp_image = new Mat();
            Cv2.Resize(srcimg, temp_image, new Size(inpWidth, inpHeight), 0, 0, InterpolationFlags.Cubic);
            Mat normalized_mat = normalize_(temp_image);
            Mat blob = CvDnn.BlobFromImage(normalized_mat);
            net.SetInput(blob);
            //模型推理,读取推理结果
            Mat[] outs = new Mat[1] { new Mat() };
            string[] outBlobNames = net.GetUnconnectedOutLayersNames().ToArray();
            net.Forward(outs, outBlobNames);
            float* ptr_feat = (float*)outs[0].Data;
            int len_image_feature = outs[0].Size(1);  //忽略第0维batchsize=1, len_image_feature是定值512,跟len_text_feature相等的, 也可以写死在类成员变量里
            image_features_input = new float[len_image_feature];
            float norm = 0.0f;
            for (int i = 0; i < len_image_feature; i++)
            {
                norm += ptr_feat[i] * ptr_feat[i];
            }
            norm = (float)Math.Sqrt(norm);
            for (int i = 0; i < len_image_feature; i++)
            {
                image_features_input[i] = ptr_feat[i] / norm;
            }
        }

        unsafe void generate_text_feature(List<string> texts)
        {
            List<List<int>> text_token = new List<List<int>>(texts.Count);
            for (int i = 0; i < texts.Count; i++)
            {
                text_token.Add(new List<int>());
            }

            for (int i = 0; i < texts.Count; i++)
            {
                tokenizer.encode_text(texts[i], text_token[i]);
            }

            if (text_token.Count * context_length > text_tokens_input.Length)
            {
                text_tokens_input = new int[text_token.Count * context_length];
            }

            foreach (int i in text_tokens_input) { text_tokens_input[i] = 0; }

            for (int i = 0; i < text_token.Count; i++)
            {
                if (text_token[i].Count > context_length)
                {
                    Console.WriteLine("text_features index " + i + " ,bigger than " + context_length + "\n");
                    continue;
                }
                for (int j = 0; j < text_token[i].Count; j++)
                {
                    text_tokens_input[i * context_length + j] = text_token[i][j];
                }

            }

            int[] text_token_shape = new int[] { 1, context_length };

            text_features_input = new float[text_token.Count, len_text_feature];

            long[] text_tokens_input_64 = new long[texts.Count * context_length];
            for (int i = 0; i < text_tokens_input_64.Length; i++)
            {
                text_tokens_input_64[i] = text_tokens_input[i];
            }

            for (int i = 0; i < text_token.Count; i++)
            {
                input_tensor = new DenseTensor<long>(text_tokens_input_64, new[] { 1, 52 });
                input_container.Clear();
                input_container.Add(NamedOnnxValue.CreateFromTensor("text", input_tensor));

                //运行 Inference 并获取结果
                result_infer = onnx_session.Run(input_container);

                // 将输出结果转为DisposableNamedOnnxValue数组
                results_onnxvalue = result_infer.ToArray();

                // 读取第一个节点输出并转为Tensor数据
                result_tensors = results_onnxvalue[0].AsTensor<float>();

                float[] text_feature_ptr = results_onnxvalue[0].AsTensor<float>().ToArray();

                float norm = 0.0f;
                for (int j = 0; j < len_text_feature; j++)
                {
                    norm += text_feature_ptr[j] * text_feature_ptr[j];
                }
                norm = (float)Math.Sqrt(norm);
                for (int j = 0; j < len_text_feature; j++)
                {
                    text_features_input[i, j] = text_feature_ptr[j] / norm;
                }

            }
        }

        void softmax(float[] input)
        {
            int length = input.Length;
            float[] exp_x = new float[length];
            float maxVal = input.Max();
            float sum = 0;
            for (int i = 0; i < length; i++)
            {
                float expval = (float)Math.Exp(input[i] - maxVal);
                exp_x[i] = expval;
                sum += expval;
            }
            for (int i = 0; i < length; i++)
            {
                input[i] = exp_x[i] / sum;
            }
        }

        int[] argsort_ascend(float[] array)
        {
            int array_len = array.Length;
            int[] array_index = new int[array_len];
            for (int i = 0; i < array_len; ++i)
            {
                array_index[i] = i;
            }

            var temp = array_index.ToList();

            temp.Sort((pos1, pos2) =>
             {

                 if (array[pos1] < array[pos2])
                 {
                     return -1;
                 }
                 else if (array[pos1] == array[pos2])
                 {
                     return 0;
                 }
                 else
                 {
                     return 0;
                 }

             });

            return temp.ToArray();
        }

        public List<Dictionary<string, float>> input_text_search_image(string text, float[] image_features, List<string> imglist)
        {

            int imgnum = imglist.Count;
            List<string> texts = new List<string> { text };

            generate_text_feature(texts);

            float[] logits_per_image = new float[imgnum];

            for (int i = 0; i < imgnum; i++)
            {
                float sum = 0;
                for (int j = 0; j < len_text_feature; j++)
                {
                    sum += image_features[i * len_text_feature + j] * text_features_input[0, j]; //图片特征向量跟文本特征向量做内积
                }
                logits_per_image[i] = 100 * sum;
            }

            softmax(logits_per_image);

            int[] index = argsort_ascend(logits_per_image);

            List<Dictionary<string, float>> top5imglist = new List<Dictionary<string, float>>(5);

            for (int i = 0; i < 5; i++)
            {
                int ind = index[imgnum - 1 - i];
                Dictionary<string, float> result = new Dictionary<string, float>();
                result.Add(imglist[ind], logits_per_image[ind]);
                top5imglist.Add(result);
            }
            return top5imglist;
        }

        public float[] generate_imagedir_features(string image_dir)
        {

            imglist = Common.listdir(image_dir);
            imgnum = imglist.Count;
            Console.WriteLine("遍历到" + imgnum + "张图片");

            float[] imagedir_features = new float[0];

            for (int i = 0; i < imgnum; i++)
            {
                string imgpath = imglist[i];

                Mat srcimg = Cv2.ImRead(imgpath);

                generate_image_feature(srcimg);

                imagedir_features = imagedir_features.Concat(image_features_input).ToArray();

                srcimg.Dispose();
            }

            return imagedir_features;

        }

    }

public class Clip{int inpWidth = 224;int inpHeight = 224;float[] mean = new float[] { 0.48145466f, 0.4578275f, 0.40821073f };float[] std = new float[] { 0.26862954f, 0.26130258f, 0.27577711f };int context_length = 52;int len_text_feature = 512;Net net;float[] image_features_input;SessionOptions options;InferenceSession onnx_session;Tensor<long> input_tensor;List<NamedOnnxValue> input_container;IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result_infer;DisposableNamedOnnxValue[] results_onnxvalue;Tensor<float> result_tensors;TokenizerBase tokenizer;int[] text_tokens_input;float[,] text_features_input;public int imgnum = 0;public List<string> imglist = new List<string>();public Clip(string image_modelpath, string text_modelpath, string vocab_path){net = CvDnn.ReadNetFromOnnx(image_modelpath);// 创建输出会话,用于输出模型读取信息options = new SessionOptions();options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;options.AppendExecutionProvider_CPU(0);// 设置为CPU上运行// 创建推理模型类,读取本地模型文件onnx_session = new InferenceSession(text_modelpath, options);//model_path 为onnx模型文件的路径// 创建输入容器input_container = new List<NamedOnnxValue>();load_tokenizer(vocab_path);}void load_tokenizer(string vocab_path){tokenizer = new TokenizerClipChinese();tokenizer.load_tokenize(vocab_path);text_tokens_input = new int[1024 * context_length];}Mat normalize_(Mat src){Cv2.CvtColor(src, src, ColorConversionCodes.BGR2RGB);Mat[] bgr = src.Split();for (int i = 0; i < bgr.Length; ++i){bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 / (255.0 * std[i]), (0.0 - mean[i]) / std[i]);}Cv2.Merge(bgr, src);foreach (Mat channel in bgr){channel.Dispose();}return src;}unsafe void generate_image_feature(Mat srcimg){Mat temp_image = new Mat();Cv2.Resize(srcimg, temp_image, new Size(inpWidth, inpHeight), 0, 0, InterpolationFlags.Cubic);Mat normalized_mat = normalize_(temp_image);Mat blob = CvDnn.BlobFromImage(normalized_mat);net.SetInput(blob);//模型推理,读取推理结果Mat[] outs = new Mat[1] { new Mat() };string[] outBlobNames = net.GetUnconnectedOutLayersNames().ToArray();net.Forward(outs, outBlobNames);float* ptr_feat = (float*)outs[0].Data;int len_image_feature = outs[0].Size(1);  //忽略第0维batchsize=1, len_image_feature是定值512,跟len_text_feature相等的, 也可以写死在类成员变量里image_features_input = new float[len_image_feature];float norm = 0.0f;for (int i = 0; i < len_image_feature; i++){norm += ptr_feat[i] * ptr_feat[i];}norm = (float)Math.Sqrt(norm);for (int i = 0; i < len_image_feature; i++){image_features_input[i] = ptr_feat[i] / norm;}}unsafe void generate_text_feature(List<string> texts){List<List<int>> text_token = new List<List<int>>(texts.Count);for (int i = 0; i < texts.Count; i++){text_token.Add(new List<int>());}for (int i = 0; i < texts.Count; i++){tokenizer.encode_text(texts[i], text_token[i]);}if (text_token.Count * context_length > text_tokens_input.Length){text_tokens_input = new int[text_token.Count * context_length];}foreach (int i in text_tokens_input) { text_tokens_input[i] = 0; }for (int i = 0; i < text_token.Count; i++){if (text_token[i].Count > context_length){Console.WriteLine("text_features index " + i + " ,bigger than " + context_length + "\n");continue;}for (int j = 0; j < text_token[i].Count; j++){text_tokens_input[i * context_length + j] = text_token[i][j];}}int[] text_token_shape = new int[] { 1, context_length };text_features_input = new float[text_token.Count, len_text_feature];long[] text_tokens_input_64 = new long[texts.Count * context_length];for (int i = 0; i < text_tokens_input_64.Length; i++){text_tokens_input_64[i] = text_tokens_input[i];}for (int i = 0; i < text_token.Count; i++){input_tensor = new DenseTensor<long>(text_tokens_input_64, new[] { 1, 52 });input_container.Clear();input_container.Add(NamedOnnxValue.CreateFromTensor("text", input_tensor));//运行 Inference 并获取结果result_infer = onnx_session.Run(input_container);// 将输出结果转为DisposableNamedOnnxValue数组results_onnxvalue = result_infer.ToArray();// 读取第一个节点输出并转为Tensor数据result_tensors = results_onnxvalue[0].AsTensor<float>();float[] text_feature_ptr = results_onnxvalue[0].AsTensor<float>().ToArray();float norm = 0.0f;for (int j = 0; j < len_text_feature; j++){norm += text_feature_ptr[j] * text_feature_ptr[j];}norm = (float)Math.Sqrt(norm);for (int j = 0; j < len_text_feature; j++){text_features_input[i, j] = text_feature_ptr[j] / norm;}}}void softmax(float[] input){int length = input.Length;float[] exp_x = new float[length];float maxVal = input.Max();float sum = 0;for (int i = 0; i < length; i++){float expval = (float)Math.Exp(input[i] - maxVal);exp_x[i] = expval;sum += expval;}for (int i = 0; i < length; i++){input[i] = exp_x[i] / sum;}}int[] argsort_ascend(float[] array){int array_len = array.Length;int[] array_index = new int[array_len];for (int i = 0; i < array_len; ++i){array_index[i] = i;}var temp = array_index.ToList();temp.Sort((pos1, pos2) =>{if (array[pos1] < array[pos2]){return -1;}else if (array[pos1] == array[pos2]){return 0;}else{return 0;}});return temp.ToArray();}public List<Dictionary<string, float>> input_text_search_image(string text, float[] image_features, List<string> imglist){int imgnum = imglist.Count;List<string> texts = new List<string> { text };generate_text_feature(texts);float[] logits_per_image = new float[imgnum];for (int i = 0; i < imgnum; i++){float sum = 0;for (int j = 0; j < len_text_feature; j++){sum += image_features[i * len_text_feature + j] * text_features_input[0, j]; //图片特征向量跟文本特征向量做内积}logits_per_image[i] = 100 * sum;}softmax(logits_per_image);int[] index = argsort_ascend(logits_per_image);List<Dictionary<string, float>> top5imglist = new List<Dictionary<string, float>>(5);for (int i = 0; i < 5; i++){int ind = index[imgnum - 1 - i];Dictionary<string, float> result = new Dictionary<string, float>();result.Add(imglist[ind], logits_per_image[ind]);top5imglist.Add(result);}return top5imglist;}public float[] generate_imagedir_features(string image_dir){imglist = Common.listdir(image_dir);imgnum = imglist.Count;Console.WriteLine("遍历到" + imgnum + "张图片");float[] imagedir_features = new float[0];for (int i = 0; i < imgnum; i++){string imgpath = imglist[i];Mat srcimg = Cv2.ImRead(imgpath);generate_image_feature(srcimg);imagedir_features = imagedir_features.Concat(image_features_input).ToArray();srcimg.Dispose();}return imagedir_features;}}

下载

源码下载

这篇关于C# Onnx Chinese CLIP 通过一句话从图库中搜出来符合要求的图片的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!



http://www.chinasem.cn/article/575678

相关文章

C#实现千万数据秒级导入的代码

《C#实现千万数据秒级导入的代码》在实际开发中excel导入很常见,现代社会中很容易遇到大数据处理业务,所以本文我就给大家分享一下千万数据秒级导入怎么实现,文中有详细的代码示例供大家参考,需要的朋友可... 目录前言一、数据存储二、处理逻辑优化前代码处理逻辑优化后的代码总结前言在实际开发中excel导入很

Java实现将HTML文件与字符串转换为图片

《Java实现将HTML文件与字符串转换为图片》在Java开发中,我们经常会遇到将HTML内容转换为图片的需求,本文小编就来和大家详细讲讲如何使用FreeSpire.DocforJava库来实现这一功... 目录前言核心实现:html 转图片完整代码场景 1:转换本地 HTML 文件为图片场景 2:转换 H

C#使用Spire.Doc for .NET实现HTML转Word的高效方案

《C#使用Spire.Docfor.NET实现HTML转Word的高效方案》在Web开发中,HTML内容的生成与处理是高频需求,然而,当用户需要将HTML页面或动态生成的HTML字符串转换为Wor... 目录引言一、html转Word的典型场景与挑战二、用 Spire.Doc 实现 HTML 转 Word1

C#实现一键批量合并PDF文档

《C#实现一键批量合并PDF文档》这篇文章主要为大家详细介绍了如何使用C#实现一键批量合并PDF文档功能,文中的示例代码简洁易懂,感兴趣的小伙伴可以跟随小编一起学习一下... 目录前言效果展示功能实现1、添加文件2、文件分组(书签)3、定义页码范围4、自定义显示5、定义页面尺寸6、PDF批量合并7、其他方法

Java实现在Word文档中添加文本水印和图片水印的操作指南

《Java实现在Word文档中添加文本水印和图片水印的操作指南》在当今数字时代,文档的自动化处理与安全防护变得尤为重要,无论是为了保护版权、推广品牌,还是为了在文档中加入特定的标识,为Word文档添加... 目录引言Spire.Doc for Java:高效Word文档处理的利器代码实战:使用Java为Wo

C#下Newtonsoft.Json的具体使用

《C#下Newtonsoft.Json的具体使用》Newtonsoft.Json是一个非常流行的C#JSON序列化和反序列化库,它可以方便地将C#对象转换为JSON格式,或者将JSON数据解析为C#对... 目录安装 Newtonsoft.json基本用法1. 序列化 C# 对象为 JSON2. 反序列化

C#文件复制异常:"未能找到文件"的解决方案与预防措施

《C#文件复制异常:未能找到文件的解决方案与预防措施》在C#开发中,文件操作是基础中的基础,但有时最基础的File.Copy()方法也会抛出令人困惑的异常,当targetFilePath设置为D:2... 目录一个看似简单的文件操作问题问题重现与错误分析错误代码示例错误信息根本原因分析全面解决方案1. 确保

基于C#实现PDF转图片的详细教程

《基于C#实现PDF转图片的详细教程》在数字化办公场景中,PDF文件的可视化处理需求日益增长,本文将围绕Spire.PDFfor.NET这一工具,详解如何通过C#将PDF转换为JPG、PNG等主流图片... 目录引言一、组件部署二、快速入门:PDF 转图片的核心 C# 代码三、分辨率设置 - 清晰度的决定因

C# LiteDB处理时间序列数据的高性能解决方案

《C#LiteDB处理时间序列数据的高性能解决方案》LiteDB作为.NET生态下的轻量级嵌入式NoSQL数据库,一直是时间序列处理的优选方案,本文将为大家大家简单介绍一下LiteDB处理时间序列数... 目录为什么选择LiteDB处理时间序列数据第一章:LiteDB时间序列数据模型设计1.1 核心设计原则

Python从Word文档中提取图片并生成PPT的操作代码

《Python从Word文档中提取图片并生成PPT的操作代码》在日常办公场景中,我们经常需要从Word文档中提取图片,并将这些图片整理到PowerPoint幻灯片中,手动完成这一任务既耗时又容易出错,... 目录引言背景与需求解决方案概述代码解析代码核心逻辑说明总结引言在日常办公场景中,我们经常需要从 W