mirror of
https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web.git
synced 2025-05-30 01:20:22 +09:00
1. Modify tool calling description into ['url','instructions','keyword for searching']
2. Modify prompts for pdf reading 3. Add page limit (20 default) (cherry picked from commit fe3167190278673a2bdea850e305f1ad24fb365e)
This commit is contained in:
parent
5c524bca0f
commit
8b501ccf2c
@ -14,8 +14,12 @@ import { formatDocumentsAsString } from "langchain/util/document";
|
|||||||
import { RunnableSequence } from "@langchain/core/runnables";
|
import { RunnableSequence } from "@langchain/core/runnables";
|
||||||
import { StringOutputParser } from "@langchain/core/output_parsers";
|
import { StringOutputParser } from "@langchain/core/output_parsers";
|
||||||
|
|
||||||
export const parseInputs = (inputs: string): [string, string] => {
|
export const parseInputs = (inputs: string): [string, string, string] => {
|
||||||
const [baseUrl, task] = inputs.split(",").map((input) => {
|
// Sometimes the inputs are enclosed in brackets, remove them
|
||||||
|
if (inputs.startsWith("[") && inputs.endsWith("]")) {
|
||||||
|
inputs = inputs.slice(1, -1);
|
||||||
|
}
|
||||||
|
const [baseUrl, task, search] = inputs.split(",").map((input) => {
|
||||||
let t = input.trim();
|
let t = input.trim();
|
||||||
t = t.startsWith('"') ? t.slice(1) : t;
|
t = t.startsWith('"') ? t.slice(1) : t;
|
||||||
t = t.endsWith('"') ? t.slice(0, -1) : t;
|
t = t.endsWith('"') ? t.slice(0, -1) : t;
|
||||||
@ -23,7 +27,7 @@ export const parseInputs = (inputs: string): [string, string] => {
|
|||||||
return t.trim();
|
return t.trim();
|
||||||
});
|
});
|
||||||
|
|
||||||
return [baseUrl, task];
|
return [baseUrl, task, search];
|
||||||
};
|
};
|
||||||
|
|
||||||
const getPdfBlob = async (baseUrl: string) => {
|
const getPdfBlob = async (baseUrl: string) => {
|
||||||
@ -91,8 +95,8 @@ export class PDFBrowser extends Tool {
|
|||||||
|
|
||||||
/** @ignore */
|
/** @ignore */
|
||||||
async _call(inputs: string, runManager?: CallbackManagerForToolRun) {
|
async _call(inputs: string, runManager?: CallbackManagerForToolRun) {
|
||||||
const [baseUrl, task] = parseInputs(inputs);
|
const [baseUrl, task, search_item] = parseInputs(inputs);
|
||||||
const doSummary = !task;
|
const doSearch = !search_item.includes("EMPTY");
|
||||||
|
|
||||||
let pdfBlob;
|
let pdfBlob;
|
||||||
try {
|
try {
|
||||||
@ -104,7 +108,7 @@ export class PDFBrowser extends Tool {
|
|||||||
return "There was a problem connecting to the site";
|
return "There was a problem connecting to the site";
|
||||||
}
|
}
|
||||||
|
|
||||||
const loader = new WebPDFLoader(pdfBlob);
|
const loader = new WebPDFLoader(pdfBlob, { parsedItemSeparator: "" });
|
||||||
const docs = await loader.load();
|
const docs = await loader.load();
|
||||||
const vectorStore = await MemoryVectorStore.fromDocuments(
|
const vectorStore = await MemoryVectorStore.fromDocuments(
|
||||||
docs,
|
docs,
|
||||||
@ -113,26 +117,33 @@ export class PDFBrowser extends Tool {
|
|||||||
|
|
||||||
const texts = await this.textSplitter.splitText(getDocsText(docs));
|
const texts = await this.textSplitter.splitText(getDocsText(docs));
|
||||||
|
|
||||||
|
const PAGE_CUTOFF_LIMIT = 20;
|
||||||
|
const page_cutoff =
|
||||||
|
docs.length > PAGE_CUTOFF_LIMIT ? PAGE_CUTOFF_LIMIT : docs.length;
|
||||||
|
|
||||||
let context;
|
let context;
|
||||||
// if we want a summary grab first 4
|
let input;
|
||||||
if (doSummary) {
|
|
||||||
context = texts.slice(0, 4).join("\n");
|
if (doSearch) {
|
||||||
}
|
// search term well embed and grab top 10 pages
|
||||||
// search term well embed and grab top 4
|
|
||||||
else {
|
|
||||||
const results = await vectorStore.similaritySearch(
|
const results = await vectorStore.similaritySearch(
|
||||||
task,
|
search_item,
|
||||||
4,
|
page_cutoff,
|
||||||
undefined,
|
undefined,
|
||||||
runManager?.getChild("vectorstore"),
|
runManager?.getChild("vectorstore"),
|
||||||
);
|
);
|
||||||
context = formatDocumentsAsString(results);
|
context = formatDocumentsAsString(results);
|
||||||
|
input = `Please conduct ${task} relating ${search_item} on the following text,
|
||||||
|
you should first read and comprehend the text and find out how ${search_item} is involved in the text\n
|
||||||
|
TEXT:\n ${context}.`;
|
||||||
|
} else {
|
||||||
|
// In other cases all pages will be used
|
||||||
|
context = texts.slice(0, page_cutoff).join("\n");
|
||||||
|
input = `Please conduct ${task} on the following text:
|
||||||
|
you should first read and comprehend the text before finishing the ${task}\n
|
||||||
|
TEXT:\n ${context}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const input = `Text:${context}\n\nI need ${
|
|
||||||
doSummary ? "a summary" : task
|
|
||||||
} from the above text.`;
|
|
||||||
|
|
||||||
console.log("[pdf-browser]", input);
|
console.log("[pdf-browser]", input);
|
||||||
|
|
||||||
const chain = RunnableSequence.from([this.model, new StringOutputParser()]);
|
const chain = RunnableSequence.from([this.model, new StringOutputParser()]);
|
||||||
@ -141,5 +152,8 @@ export class PDFBrowser extends Tool {
|
|||||||
|
|
||||||
name = "pdf-browser";
|
name = "pdf-browser";
|
||||||
|
|
||||||
description = `useful for when you need to find something on or summarize a pdf file. input should be a comma separated list of "ONE valid http URL including protocol","what you want to find on the pdf page or empty string for a summary".`;
|
description = `useful for when you need to deal with pdf files. input should be a comma separated list of items without enclosing brackets.
|
||||||
|
"ONE valid http URL including protocol",
|
||||||
|
"plain instruction upon what you want to do with this file",
|
||||||
|
"keywords if some searching is requested, set empty tag [EMPTY] if none"`;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user