Modify ArxivAPIWrapper

1. Replace raw xml input to parsed data
2. Modify prompts and default values of zod request
This commit is contained in:
bili 2025-01-14 17:04:49 +08:00 committed by Hk-Gosuto
parent 7618eaf20d
commit 3f153fe8bd

View File

@ -1,5 +1,84 @@
import { StructuredTool } from "@langchain/core/tools";
import { z } from "zod";
import { XMLParser } from "fast-xml-parser";
// Credit: ArxivRetriever from Langchain.js
interface ArxivEntry {
id: string;
title: string;
summary: string;
published: string;
authors: string[];
pdfUrl: string;
links: any[];
}
function parseArxivEntry(entry: any): ArxivEntry {
const title = entry.title.replace(/\s+/g, " ").trim();
const summary = entry.summary.replace(/\s+/g, " ").trim();
const published = entry.published;
// Extract authors
let authors: string[] = [];
if (Array.isArray(entry.author)) {
authors = entry.author.map((author: any) => author.name);
} else if (entry.author) {
authors = [entry.author.name];
}
// Extract links
let links: any[] = [];
if (Array.isArray(entry.link)) {
links = entry.link;
} else if (entry.link) {
links = [entry.link];
}
// Extract PDF link
let pdfUrl = entry.id.replace("/abs/", "/pdf/") + ".pdf";
const pdfLinkObj = links.find((link: any) => link["@_title"] === "pdf");
if (pdfLinkObj && pdfLinkObj["@_href"]) {
pdfUrl = pdfLinkObj["@_href"];
}
return {
id: entry.id,
title: title,
summary: summary,
published: published,
authors,
pdfUrl,
links: entry.links,
};
}
function parseArxivResponse(response: string): ArxivEntry[] {
const options = {
attributeNamePrefix: "@_",
ignoreAttributes: false,
parseNodeValue: true,
parseAttributeValue: true,
trimValues: true,
ignoreNameSpace: true,
};
const parser = new XMLParser(options);
const results = parser.parse(response);
const entries = results.feed.entry;
if (!entries) {
return [];
}
return entries.map(parseArxivEntry);
}
async function buildArxivResponse(query: string): Promise<string> {
const article_list = await parseArxivResponse(query);
if (article_list.length === 0) {
return `Found no article in arxiv database`;
} else {
let response = `Found these articles in arxiv database \n\n`;
const articles_str = article_list.map((article) => {
return `Title: ${article.title}\nAuthors: ${article.authors.join(", ")}\n
Summary: ${article.summary}\nPublished: ${article.published}\n
PDF: ${article.pdfUrl}`;
});
return `${response} \n \n ${articles_str.join("\n\n")}`;
}
}
export class ArxivAPIWrapper extends StructuredTool {
get lc_namespace() {
@ -7,7 +86,8 @@ export class ArxivAPIWrapper extends StructuredTool {
}
name = "arxiv";
description = "Run Arxiv search and get the article information.";
description =
"Useful if you need to look for academical papers on arxiv. You can search by title, author, abstract, etc.";
SORT_BY = {
RELEVANCE: "relevance",
@ -21,23 +101,32 @@ export class ArxivAPIWrapper extends StructuredTool {
};
schema = z.object({
searchQuery: z
.string()
.describe("same as the search_query parameter rules of the arxiv API."),
searchQuery: z.string().describe("topic of your query"),
sortBy: z
.string()
.describe('can be "relevance", "lastUpdatedDate", "submittedDate".'),
.optional()
.default(this.SORT_BY.RELEVANCE)
.describe(
'sort rules, can be "relevance", "lastUpdatedDate", "submittedDate". Default by relevance if no' +
"additional request is made.",
),
sortOrder: z
.string()
.describe('can be either "ascending" or "descending".'),
.optional()
.default(this.SORT_ORDER.DESCENDING)
.describe(
'order of sort, can be either "ascending" or "descending". Default by descending.',
),
start: z
.number()
.optional()
.default(0)
.describe("the index of the first returned result."),
.describe("the index of the first returned result. Default 0."),
maxResults: z
.number()
.default(10)
.describe("the number of results returned by the query."),
.optional()
.default(20)
.describe("the number of returned items. Default 20."),
});
async _call({
@ -62,17 +151,18 @@ export class ArxivAPIWrapper extends StructuredTool {
);
}
try {
let url = `https://export.arxiv.org/api/query?search_query=${searchQuery}&start=${start}&max_results=${maxResults}${
let url = `https://export.arxiv.org/api/query?search_query=all:${searchQuery}&start=${start}&max_results=${maxResults}${
sortBy ? `&sortBy=${sortBy}` : ""
}${sortOrder ? `&sortOrder=${sortOrder}` : ""}`;
console.log("[arxiv]", url);
const response = await fetch(url);
const data = await response.text();
console.log("[arxiv]", data);
return data;
const api_response = await fetch(url);
const response_text = await api_response.text();
const arxiv_data = await buildArxivResponse(response_text);
console.log("[arxiv]", arxiv_data);
return arxiv_data;
} catch (e) {
console.error("[arxiv]", e);
}
return "not found";
return `Invalid request ${searchQuery}`;
}
}