LLaVA: Large Language and Vision Assistant 图片解析图生文

本文主要是介绍LLaVA: Large Language and Vision Assistant 图片解析图生文，希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

LLaVA: Large Language and Vision Assistant 图片解析图生文

介绍

效果

编辑项目

测试代码

Form1.cs

Helper.cs

下载

介绍

LLaVA，一种新的大型多模态模型，称为“大型语言和视觉助手”，旨在开发一种通用视觉助手，可以遵循语言和图像指令来完成各种现实世界的任务。这个想法是将 GPT-4 等大型语言模型 (LLM) 的强大功能与 CLIP 等视觉编码器相结合，创建一个经过端到端训练的神经助手，可以理解多模态指令并根据多模态指令采取行动。

项目地址：https://github.com/IntptrMax/LLavaSharp
模型下载地址:https://hf-mirror.com/jartine/llava-v1.5-7B-GGUF/tree/main

效果

项目

测试代码

Form1.cs

using LLavaSharp;
using System;
using System.Diagnostics;
using System.Drawing;
using System.Text;
using System.Windows.Forms;

namespace WinformTest
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
/*
项目地址：https://github.com/IntptrMax/LLavaSharp
模型下载地址:https://hf-mirror.com/jartine/llava-v1.5-7B-GGUF/tree/main
*/

IntPtr llamaDllPtr = Lib.LoadLibrary(@".\dll\cuda12\llama.dll");
IntPtr llavaSharedDllPtr = Lib.LoadLibrary(@".\dll\cuda12\llava_shared.dll");

string model = @"C:\MyStudy\llava\llava-v1.5-7b-Q4_K.gguf";
string mmproj = @"C:\MyStudy\llava\llava-v1.5-7b-mmproj-Q4_0.gguf";

Helper helper;

private void Form1_Load(object sender, EventArgs e)
{
helper = new Helper(model, mmproj);
}

string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string image_path = "";
Bitmap bitmap;
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
pictureBox1.Image = null;
image_path = ofd.FileName;
bitmap = new Bitmap(image_path);
pictureBox1.Image = bitmap;
txtInfo.Text = "";
}

StringBuilder sb = new StringBuilder();
private void button2_Click(object sender, EventArgs e)
{
if (image_path == "")
{
return;
}

if (String.IsNullOrEmpty(txtPrompt.Text))
{
return;
}

txtInfo.Text = "";
button2.Enabled = false;
sb.Clear();
System.Windows.Forms.Application.DoEvents();
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
string result = helper.ProcessImage(bitmap, txtPrompt.Text);
double totalTime = stopwatch.Elapsed.TotalSeconds;
stopwatch.Stop();
sb.AppendLine($"totalTime: {totalTime:F2}s");
sb.AppendLine("- - - - - - - - - - - - - - - - ");
sb.AppendLine(result);
txtInfo.Text = sb.ToString();
button2.Enabled = true;
}
}
}

using LLavaSharp;
using System;
using System.Diagnostics;
using System.Drawing;
using System.Text;
using System.Windows.Forms;namespace WinformTest
{public partial class Form1 : Form{public Form1(){InitializeComponent();}/*项目地址：https://github.com/IntptrMax/LLavaSharp模型下载地址:https://hf-mirror.com/jartine/llava-v1.5-7B-GGUF/tree/main  */IntPtr llamaDllPtr = Lib.LoadLibrary(@".\dll\cuda12\llama.dll");IntPtr llavaSharedDllPtr = Lib.LoadLibrary(@".\dll\cuda12\llava_shared.dll");string model = @"C:\MyStudy\llava\llava-v1.5-7b-Q4_K.gguf";string mmproj = @"C:\MyStudy\llava\llava-v1.5-7b-mmproj-Q4_0.gguf";Helper helper;private void Form1_Load(object sender, EventArgs e){helper = new Helper(model, mmproj);}string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";string image_path = "";Bitmap bitmap;private void button1_Click(object sender, EventArgs e){OpenFileDialog ofd = new OpenFileDialog();ofd.Filter = fileFilter;if (ofd.ShowDialog() != DialogResult.OK) return;pictureBox1.Image = null;image_path = ofd.FileName;bitmap = new Bitmap(image_path);pictureBox1.Image = bitmap;txtInfo.Text = "";}StringBuilder sb = new StringBuilder();private void button2_Click(object sender, EventArgs e){if (image_path == ""){return;}if (String.IsNullOrEmpty(txtPrompt.Text)){return;}txtInfo.Text = "";button2.Enabled = false;sb.Clear();System.Windows.Forms.Application.DoEvents();Stopwatch stopwatch = new Stopwatch();stopwatch.Start();string result = helper.ProcessImage(bitmap, txtPrompt.Text);double totalTime = stopwatch.Elapsed.TotalSeconds;stopwatch.Stop();sb.AppendLine($"totalTime: {totalTime:F2}s");sb.AppendLine("- - - - - - - - - - - - - - - - ");sb.AppendLine(result);txtInfo.Text = sb.ToString();button2.Enabled = true;}}
}

Helper.cs

using System;
using System.Drawing;

namespace LLavaSharp
{
public class Helper : IDisposable
{
private llava_context ctx_llava;
gpt_params @params = new gpt_params();

public Helper(string model_path, string mmproj_path, int ngl = 32)
{
@params.model = model_path;
@params.mmproj = mmproj_path;
@params.n_gpu_layers = ngl;
@params.n_gpu_layers_draft = ngl;
ctx_llava = Lib.llava_init(@params);
}

public string ProcessImage(Bitmap bitmap, string prompt, float temp = 0)
{
llava_image_embed image_embed = Lib.load_image(ctx_llava, bitmap, @params.n_threads);
string result = Lib.process_prompt(ctx_llava, image_embed, @params, prompt, temp);
Lib.llava_image_embed_free(image_embed);
Lib.llama_free_kv_cache(ctx_llava.ctx_llama);
GC.Collect();
return result;
}

public void Dispose()
{
Lib.llava_free(ctx_llava);
}

}
}

using System;
using System.Drawing;namespace LLavaSharp
{public class Helper : IDisposable{private llava_context ctx_llava;gpt_params @params = new gpt_params();public Helper(string model_path, string mmproj_path, int ngl = 32){@params.model = model_path;@params.mmproj = mmproj_path;@params.n_gpu_layers = ngl;@params.n_gpu_layers_draft = ngl;ctx_llava = Lib.llava_init(@params);}public string ProcessImage(Bitmap bitmap, string prompt, float temp = 0){llava_image_embed image_embed = Lib.load_image(ctx_llava, bitmap, @params.n_threads);string result = Lib.process_prompt(ctx_llava, image_embed, @params, prompt, temp);Lib.llava_image_embed_free(image_embed);Lib.llama_free_kv_cache(ctx_llava.ctx_llama);GC.Collect();return result;}public void Dispose(){Lib.llava_free(ctx_llava);}}
}

下载

源码下载

模型下载地址:https://hf-mirror.com/jartine/llava-v1.5-7B-GGUF/tree/main

这篇关于LLaVA: Large Language and Vision Assistant 图片解析图生文的文章就介绍到这儿，希望我们推荐的文章对编程师们有所帮助！

LLaVA: Large Language and Vision Assistant 图片解析图生文

介绍

效果

项目

测试代码

Form1.cs

Helper.cs

下载

相关文章

C++ 右值引用(rvalue references)与移动语义(move semantics)深度解析

MySQL 筛选条件放 ON后 vs 放 WHERE 后的区别解析

Mybatis的mapper文件中#和$的区别示例解析

Agent开发核心技术解析以及现代Agent架构设计

MySQL字符串转数值的方法全解析

Java使用Spire.Doc for Java实现Word自动化插入图片

SQL 注入攻击(SQL Injection)原理、利用方式与防御策略深度解析

Python多任务爬虫实现爬取图片和GDP数据

C++ 多态性实战之何时使用 virtual 和 override的问题解析

Springboot主配置文件解析

LLaVA: Large Language and Vision Assistant 图片解析 图生文

介绍

效果

项目

测试代码

Form1.cs

Helper.cs

下载

相关文章

LLaVA: Large Language and Vision Assistant 图片解析图生文