mirror of
https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web.git
synced 2025-06-03 19:30:22 +09:00
Modify ArxivAPIWrapper
1. Replace raw xml input to parsed data 2. Modify prompts and default values of zod request
This commit is contained in:
parent
7618eaf20d
commit
3f153fe8bd
@ -1,5 +1,84 @@
|
|||||||
import { StructuredTool } from "@langchain/core/tools";
|
import { StructuredTool } from "@langchain/core/tools";
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
|
import { XMLParser } from "fast-xml-parser";
|
||||||
|
|
||||||
|
// Credit: ArxivRetriever from Langchain.js
|
||||||
|
interface ArxivEntry {
|
||||||
|
id: string;
|
||||||
|
title: string;
|
||||||
|
summary: string;
|
||||||
|
published: string;
|
||||||
|
authors: string[];
|
||||||
|
pdfUrl: string;
|
||||||
|
links: any[];
|
||||||
|
}
|
||||||
|
function parseArxivEntry(entry: any): ArxivEntry {
|
||||||
|
const title = entry.title.replace(/\s+/g, " ").trim();
|
||||||
|
const summary = entry.summary.replace(/\s+/g, " ").trim();
|
||||||
|
const published = entry.published;
|
||||||
|
|
||||||
|
// Extract authors
|
||||||
|
let authors: string[] = [];
|
||||||
|
if (Array.isArray(entry.author)) {
|
||||||
|
authors = entry.author.map((author: any) => author.name);
|
||||||
|
} else if (entry.author) {
|
||||||
|
authors = [entry.author.name];
|
||||||
|
}
|
||||||
|
// Extract links
|
||||||
|
let links: any[] = [];
|
||||||
|
if (Array.isArray(entry.link)) {
|
||||||
|
links = entry.link;
|
||||||
|
} else if (entry.link) {
|
||||||
|
links = [entry.link];
|
||||||
|
}
|
||||||
|
// Extract PDF link
|
||||||
|
let pdfUrl = entry.id.replace("/abs/", "/pdf/") + ".pdf";
|
||||||
|
const pdfLinkObj = links.find((link: any) => link["@_title"] === "pdf");
|
||||||
|
if (pdfLinkObj && pdfLinkObj["@_href"]) {
|
||||||
|
pdfUrl = pdfLinkObj["@_href"];
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
id: entry.id,
|
||||||
|
title: title,
|
||||||
|
summary: summary,
|
||||||
|
published: published,
|
||||||
|
authors,
|
||||||
|
pdfUrl,
|
||||||
|
links: entry.links,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseArxivResponse(response: string): ArxivEntry[] {
|
||||||
|
const options = {
|
||||||
|
attributeNamePrefix: "@_",
|
||||||
|
ignoreAttributes: false,
|
||||||
|
parseNodeValue: true,
|
||||||
|
parseAttributeValue: true,
|
||||||
|
trimValues: true,
|
||||||
|
ignoreNameSpace: true,
|
||||||
|
};
|
||||||
|
const parser = new XMLParser(options);
|
||||||
|
const results = parser.parse(response);
|
||||||
|
const entries = results.feed.entry;
|
||||||
|
if (!entries) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
return entries.map(parseArxivEntry);
|
||||||
|
}
|
||||||
|
async function buildArxivResponse(query: string): Promise<string> {
|
||||||
|
const article_list = await parseArxivResponse(query);
|
||||||
|
if (article_list.length === 0) {
|
||||||
|
return `Found no article in arxiv database`;
|
||||||
|
} else {
|
||||||
|
let response = `Found these articles in arxiv database \n\n`;
|
||||||
|
const articles_str = article_list.map((article) => {
|
||||||
|
return `Title: ${article.title}\nAuthors: ${article.authors.join(", ")}\n
|
||||||
|
Summary: ${article.summary}\nPublished: ${article.published}\n
|
||||||
|
PDF: ${article.pdfUrl}`;
|
||||||
|
});
|
||||||
|
return `${response} \n \n ${articles_str.join("\n\n")}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export class ArxivAPIWrapper extends StructuredTool {
|
export class ArxivAPIWrapper extends StructuredTool {
|
||||||
get lc_namespace() {
|
get lc_namespace() {
|
||||||
@ -7,7 +86,8 @@ export class ArxivAPIWrapper extends StructuredTool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
name = "arxiv";
|
name = "arxiv";
|
||||||
description = "Run Arxiv search and get the article information.";
|
description =
|
||||||
|
"Useful if you need to look for academical papers on arxiv. You can search by title, author, abstract, etc.";
|
||||||
|
|
||||||
SORT_BY = {
|
SORT_BY = {
|
||||||
RELEVANCE: "relevance",
|
RELEVANCE: "relevance",
|
||||||
@ -21,23 +101,32 @@ export class ArxivAPIWrapper extends StructuredTool {
|
|||||||
};
|
};
|
||||||
|
|
||||||
schema = z.object({
|
schema = z.object({
|
||||||
searchQuery: z
|
searchQuery: z.string().describe("topic of your query"),
|
||||||
.string()
|
|
||||||
.describe("same as the search_query parameter rules of the arxiv API."),
|
|
||||||
sortBy: z
|
sortBy: z
|
||||||
.string()
|
.string()
|
||||||
.describe('can be "relevance", "lastUpdatedDate", "submittedDate".'),
|
.optional()
|
||||||
|
.default(this.SORT_BY.RELEVANCE)
|
||||||
|
.describe(
|
||||||
|
'sort rules, can be "relevance", "lastUpdatedDate", "submittedDate". Default by relevance if no' +
|
||||||
|
"additional request is made.",
|
||||||
|
),
|
||||||
sortOrder: z
|
sortOrder: z
|
||||||
.string()
|
.string()
|
||||||
.describe('can be either "ascending" or "descending".'),
|
.optional()
|
||||||
|
.default(this.SORT_ORDER.DESCENDING)
|
||||||
|
.describe(
|
||||||
|
'order of sort, can be either "ascending" or "descending". Default by descending.',
|
||||||
|
),
|
||||||
start: z
|
start: z
|
||||||
.number()
|
.number()
|
||||||
|
.optional()
|
||||||
.default(0)
|
.default(0)
|
||||||
.describe("the index of the first returned result."),
|
.describe("the index of the first returned result. Default 0."),
|
||||||
maxResults: z
|
maxResults: z
|
||||||
.number()
|
.number()
|
||||||
.default(10)
|
.optional()
|
||||||
.describe("the number of results returned by the query."),
|
.default(20)
|
||||||
|
.describe("the number of returned items. Default 20."),
|
||||||
});
|
});
|
||||||
|
|
||||||
async _call({
|
async _call({
|
||||||
@ -62,17 +151,18 @@ export class ArxivAPIWrapper extends StructuredTool {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
let url = `https://export.arxiv.org/api/query?search_query=${searchQuery}&start=${start}&max_results=${maxResults}${
|
let url = `https://export.arxiv.org/api/query?search_query=all:${searchQuery}&start=${start}&max_results=${maxResults}${
|
||||||
sortBy ? `&sortBy=${sortBy}` : ""
|
sortBy ? `&sortBy=${sortBy}` : ""
|
||||||
}${sortOrder ? `&sortOrder=${sortOrder}` : ""}`;
|
}${sortOrder ? `&sortOrder=${sortOrder}` : ""}`;
|
||||||
console.log("[arxiv]", url);
|
console.log("[arxiv]", url);
|
||||||
const response = await fetch(url);
|
const api_response = await fetch(url);
|
||||||
const data = await response.text();
|
const response_text = await api_response.text();
|
||||||
console.log("[arxiv]", data);
|
const arxiv_data = await buildArxivResponse(response_text);
|
||||||
return data;
|
console.log("[arxiv]", arxiv_data);
|
||||||
|
return arxiv_data;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error("[arxiv]", e);
|
console.error("[arxiv]", e);
|
||||||
}
|
}
|
||||||
return "not found";
|
return `Invalid request ${searchQuery}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user