1. Modify tool calling description into ['url','instructions','keyword for searching']

2. Modify prompts for pdf reading 3. Add page limit (20 default) (cherry picked from commit fe3167190278673a2bdea850e305f1ad24fb365e)
2025-05-29 00:50:22 +09:00 · 2025-01-09 22:00:11 +08:00 · 2025-01-09 22:00:11 +08:00 · 8b501ccf2c
commit 8b501ccf2c
parent 5c524bca0f
1 changed files with 33 additions and 19 deletions
--- a/app/api/langchain-tools/pdf_browser.ts
+++ b/app/api/langchain-tools/pdf_browser.ts
@ -14,8 +14,12 @@ import { formatDocumentsAsString } from "langchain/util/document";
 import { RunnableSequence } from "@langchain/core/runnables";
 import { StringOutputParser } from "@langchain/core/output_parsers";

-export const parseInputs = (inputs: string): [string, string] => {
-  const [baseUrl, task] = inputs.split(",").map((input) => {
+export const parseInputs = (inputs: string): [string, string, string] => {
+  // Sometimes the inputs are enclosed in brackets, remove them
+  if (inputs.startsWith("[") && inputs.endsWith("]")) {
+    inputs = inputs.slice(1, -1);
+  }
+  const [baseUrl, task, search] = inputs.split(",").map((input) => {
    let t = input.trim();
    t = t.startsWith('"') ? t.slice(1) : t;
    t = t.endsWith('"') ? t.slice(0, -1) : t;
@ -23,7 +27,7 @@ export const parseInputs = (inputs: string): [string, string] => {
    return t.trim();
  });

-  return [baseUrl, task];
+  return [baseUrl, task, search];
 };

 const getPdfBlob = async (baseUrl: string) => {
@ -91,8 +95,8 @@ export class PDFBrowser extends Tool {

  /** @ignore */
  async _call(inputs: string, runManager?: CallbackManagerForToolRun) {
-    const [baseUrl, task] = parseInputs(inputs);
-    const doSummary = !task;
+    const [baseUrl, task, search_item] = parseInputs(inputs);
+    const doSearch = !search_item.includes("EMPTY");

    let pdfBlob;
    try {
@ -104,7 +108,7 @@ export class PDFBrowser extends Tool {
      return "There was a problem connecting to the site";
    }

-    const loader = new WebPDFLoader(pdfBlob);
+    const loader = new WebPDFLoader(pdfBlob, { parsedItemSeparator: "" });
    const docs = await loader.load();
    const vectorStore = await MemoryVectorStore.fromDocuments(
      docs,
@ -113,26 +117,33 @@ export class PDFBrowser extends Tool {

    const texts = await this.textSplitter.splitText(getDocsText(docs));

+    const PAGE_CUTOFF_LIMIT = 20;
+    const page_cutoff =
+      docs.length > PAGE_CUTOFF_LIMIT ? PAGE_CUTOFF_LIMIT : docs.length;
+
    let context;
-    // if we want a summary grab first 4
-    if (doSummary) {
-      context = texts.slice(0, 4).join("\n");
-    }
-    // search term well embed and grab top 4
-    else {
+    let input;
+
+    if (doSearch) {
+      // search term well embed and grab top 10 pages
      const results = await vectorStore.similaritySearch(
-        task,
-        4,
+        search_item,
+        page_cutoff,
        undefined,
        runManager?.getChild("vectorstore"),
      );
      context = formatDocumentsAsString(results);
+      input = `Please conduct ${task} relating ${search_item} on the following text,
+        you should first read and comprehend the text and find out how ${search_item} is involved in the text\n 
+        TEXT:\n ${context}.`;
+    } else {
+      // In other cases all pages will be used
+      context = texts.slice(0, page_cutoff).join("\n");
+      input = `Please conduct ${task} on the following text:
+        you should first read and comprehend the text before finishing the ${task}\n 
+        TEXT:\n ${context}`;
    }

-    const input = `Text:${context}\n\nI need ${
-      doSummary ? "a summary" : task
-    } from the above text.`;
-
    console.log("[pdf-browser]", input);

    const chain = RunnableSequence.from([this.model, new StringOutputParser()]);
@ -141,5 +152,8 @@ export class PDFBrowser extends Tool {

  name = "pdf-browser";

-  description = `useful for when you need to find something on or summarize a pdf file. input should be a comma separated list of "ONE valid http URL including protocol","what you want to find on the pdf page or empty string for a summary".`;
+  description = `useful for when you need to deal with pdf files. input should be a comma separated list of items without enclosing brackets. 
+  "ONE valid http URL including protocol",
+  "plain instruction upon what you want to do with this file",
+  "keywords if some searching is requested, set empty tag [EMPTY] if none"`;
 }