diff --git a/README.md b/README.md index 0a38fa53c..a70485bf6 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,15 @@ OpanAI 密钥,你在 openai 账户页面申请的 api key。 [Web Search API | Microsoft Bing](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) +### `CHOOSE_SEARCH_ENGINE` (可选) + +此项为直连搜索引擎,免去api试用量小的烦恼,但可能因为网络问题导致无法使用 + +可选项如下: + +- google +- baidu + ### `CODE` (可选) 访问密码,可选,可以使用逗号隔开多个密码。 diff --git a/app/api/langchain-tools/baidu_search.ts b/app/api/langchain-tools/baidu_search.ts new file mode 100644 index 000000000..cf960022d --- /dev/null +++ b/app/api/langchain-tools/baidu_search.ts @@ -0,0 +1,80 @@ +import { decode } from "html-entities"; +import { convert as htmlToText } from "html-to-text"; +import { Tool } from "langchain/tools"; +import * as cheerio from "cheerio"; +import { getRandomUserAgent } from "./ua_tools"; + +interface SearchResults { + /** The web results of the search. */ + results: SearchResult[]; +} + +interface SearchResult { + /** The URL of the result. */ + url: string; + /** The title of the result. */ + title: string; + /** + * The sanitized description of the result. + * Bold tags will still be present in this string. + */ + description: string; +} + +async function search( + input: string, + maxResults: number, +): Promise { + const results: SearchResults = { + results: [], + }; + const headers = new Headers(); + headers.append("User-Agent", getRandomUserAgent()); + const resp = await fetch( + `https://www.baidu.com/s?f=8&ie=utf-8&rn=${maxResults}&wd=${encodeURIComponent( + input, + )}`, + { + headers: headers, + }, + ); + const respCheerio = cheerio.load(await resp.text()); + respCheerio("div.c-container.new-pmd").each((i, elem) => { + const item = cheerio.load(elem); + const linkElement = item("a"); + const url = (linkElement.attr("href") ?? "").trim(); + if (url !== "" && url !== "#") { + const title = decode(linkElement.text()); + const description = item.text().replace(title, "").trim(); + results.results.push({ + url, + title, + description, + }); + } + }); + return results; +} + +export class BaiduSearch extends Tool { + name = "baidu_search"; + maxResults = 6; + + /** @ignore */ + async _call(input: string) { + const searchResults = await search(input, this.maxResults); + + if (searchResults.results.length === 0) { + return "No good search result found"; + } + + const results = searchResults.results + .slice(0, this.maxResults) + .map(({ title, description, url }) => htmlToText(description)) + .join("\n\n"); + return results; + } + + description = + "a search engine. useful for when you need to answer questions about current events. input should be a search query."; +} diff --git a/app/api/langchain-tools/google_search.ts b/app/api/langchain-tools/google_search.ts new file mode 100644 index 000000000..017e7c8d0 --- /dev/null +++ b/app/api/langchain-tools/google_search.ts @@ -0,0 +1,80 @@ +import { decode } from "html-entities"; +import { convert as htmlToText } from "html-to-text"; +import { Tool } from "langchain/tools"; +import * as cheerio from "cheerio"; +import { getRandomUserAgent } from "./ua_tools"; + +interface SearchResults { + /** The web results of the search. */ + results: SearchResult[]; +} + +interface SearchResult { + /** The URL of the result. */ + url: string; + /** The title of the result. */ + title: string; + /** + * The sanitized description of the result. + * Bold tags will still be present in this string. + */ + description: string; +} + +async function search( + input: string, + maxResults: number, +): Promise { + const results: SearchResults = { + results: [], + }; + const headers = new Headers(); + headers.append("User-Agent", getRandomUserAgent()); + const resp = await fetch( + `https://www.google.com/search?nfpr=1&num=${maxResults}&pws=0&q=${encodeURIComponent( + input, + )}`, + { + headers: headers, + }, + ); + const respCheerio = cheerio.load(await resp.text()); + respCheerio("div.g").each((i, elem) => { + const item = cheerio.load(elem); + const linkElement = item("a"); + const url = (linkElement.attr("href") ?? "").trim(); + if (url !== "" && url !== "#") { + const title = decode(item("h3").text()); + const description = item(`div[data-sncf~="1"]`).text().trim(); + results.results.push({ + url, + title, + description, + }); + } + }); + return results; +} + +export class GoogleSearch extends Tool { + name = "google_search"; + maxResults = 6; + + /** @ignore */ + async _call(input: string) { + const searchResults = await search(input, this.maxResults); + + if (searchResults.results.length === 0) { + return "No good search result found"; + } + + const results = searchResults.results + .slice(0, this.maxResults) + .map(({ title, description, url }) => htmlToText(description)) + .join("\n\n"); + return results; + } + + description = + "a search engine. useful for when you need to answer questions about current events. input should be a search query."; +} diff --git a/app/api/langchain-tools/ua_tools.ts b/app/api/langchain-tools/ua_tools.ts new file mode 100644 index 000000000..dce00b9a3 --- /dev/null +++ b/app/api/langchain-tools/ua_tools.ts @@ -0,0 +1,20 @@ +const uaList = [ + // Chrome + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36", + // Firefox + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:93.0) Gecko/20100101 Firefox/93.0", + "Mozilla/5.0 (X11; Linux x86_64; rv:93.0) Gecko/20100101 Firefox/93.0", + // Safari + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15", + // Microsoft Edge + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36 Edg/94.0.992.38", +]; + +export function getRandomUserAgent() { + const randomIndex = Math.floor(Math.random() * uaList.length); + return uaList[randomIndex]; +} diff --git a/app/api/langchain/tool/agent/route.ts b/app/api/langchain/tool/agent/route.ts index 34d6e5d00..b4e51e877 100644 --- a/app/api/langchain/tool/agent/route.ts +++ b/app/api/langchain/tool/agent/route.ts @@ -19,6 +19,8 @@ import { WebBrowser } from "langchain/tools/webbrowser"; import { Calculator } from "langchain/tools/calculator"; import { DynamicTool, Tool } from "langchain/tools"; import { DallEAPIWrapper } from "@/app/api/langchain-tools/dalle_image_generator"; +import { BaiduSearch } from "@/app/api/langchain-tools/baidu_search"; +import { GoogleSearch } from "@/app/api/langchain-tools/google_search"; const serverConfig = getServerSideConfig(); @@ -173,6 +175,16 @@ async function handle(req: NextRequest) { }); let searchTool: Tool = new DuckDuckGo(); + if (process.env.CHOOSE_SEARCH_ENGINE) { + switch (process.env.CHOOSE_SEARCH_ENGINE) { + case "google": + searchTool = new GoogleSearch(); + break; + case "baidu": + searchTool = new BaiduSearch(); + break; + } + } if (process.env.BING_SEARCH_API_KEY) { let bingSearchTool = new langchainTools["BingSerpAPI"]( process.env.BING_SEARCH_API_KEY,