支持直连google\baidu搜索引擎 (#30)

* 支持直连google\baidu搜索引擎

* 删除无用参数
This commit is contained in:
hang666 2023-09-22 09:35:38 +08:00 committed by GitHub
parent 61e00ed0ad
commit 83f5dc48d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 201 additions and 0 deletions

View File

@ -136,6 +136,15 @@ OpanAI 密钥,你在 openai 账户页面申请的 api key。
[Web Search API | Microsoft Bing](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api)
### `CHOOSE_SEARCH_ENGINE` (可选)
此项为直连搜索引擎免去api试用量小的烦恼但可能因为网络问题导致无法使用
可选项如下:
- google
- baidu
### `CODE` (可选)
访问密码,可选,可以使用逗号隔开多个密码。

View File

@ -0,0 +1,80 @@
import { decode } from "html-entities";
import { convert as htmlToText } from "html-to-text";
import { Tool } from "langchain/tools";
import * as cheerio from "cheerio";
import { getRandomUserAgent } from "./ua_tools";
interface SearchResults {
/** The web results of the search. */
results: SearchResult[];
}
interface SearchResult {
/** The URL of the result. */
url: string;
/** The title of the result. */
title: string;
/**
* The sanitized description of the result.
* Bold tags will still be present in this string.
*/
description: string;
}
async function search(
input: string,
maxResults: number,
): Promise<SearchResults> {
const results: SearchResults = {
results: [],
};
const headers = new Headers();
headers.append("User-Agent", getRandomUserAgent());
const resp = await fetch(
`https://www.baidu.com/s?f=8&ie=utf-8&rn=${maxResults}&wd=${encodeURIComponent(
input,
)}`,
{
headers: headers,
},
);
const respCheerio = cheerio.load(await resp.text());
respCheerio("div.c-container.new-pmd").each((i, elem) => {
const item = cheerio.load(elem);
const linkElement = item("a");
const url = (linkElement.attr("href") ?? "").trim();
if (url !== "" && url !== "#") {
const title = decode(linkElement.text());
const description = item.text().replace(title, "").trim();
results.results.push({
url,
title,
description,
});
}
});
return results;
}
export class BaiduSearch extends Tool {
name = "baidu_search";
maxResults = 6;
/** @ignore */
async _call(input: string) {
const searchResults = await search(input, this.maxResults);
if (searchResults.results.length === 0) {
return "No good search result found";
}
const results = searchResults.results
.slice(0, this.maxResults)
.map(({ title, description, url }) => htmlToText(description))
.join("\n\n");
return results;
}
description =
"a search engine. useful for when you need to answer questions about current events. input should be a search query.";
}

View File

@ -0,0 +1,80 @@
import { decode } from "html-entities";
import { convert as htmlToText } from "html-to-text";
import { Tool } from "langchain/tools";
import * as cheerio from "cheerio";
import { getRandomUserAgent } from "./ua_tools";
interface SearchResults {
/** The web results of the search. */
results: SearchResult[];
}
interface SearchResult {
/** The URL of the result. */
url: string;
/** The title of the result. */
title: string;
/**
* The sanitized description of the result.
* Bold tags will still be present in this string.
*/
description: string;
}
async function search(
input: string,
maxResults: number,
): Promise<SearchResults> {
const results: SearchResults = {
results: [],
};
const headers = new Headers();
headers.append("User-Agent", getRandomUserAgent());
const resp = await fetch(
`https://www.google.com/search?nfpr=1&num=${maxResults}&pws=0&q=${encodeURIComponent(
input,
)}`,
{
headers: headers,
},
);
const respCheerio = cheerio.load(await resp.text());
respCheerio("div.g").each((i, elem) => {
const item = cheerio.load(elem);
const linkElement = item("a");
const url = (linkElement.attr("href") ?? "").trim();
if (url !== "" && url !== "#") {
const title = decode(item("h3").text());
const description = item(`div[data-sncf~="1"]`).text().trim();
results.results.push({
url,
title,
description,
});
}
});
return results;
}
export class GoogleSearch extends Tool {
name = "google_search";
maxResults = 6;
/** @ignore */
async _call(input: string) {
const searchResults = await search(input, this.maxResults);
if (searchResults.results.length === 0) {
return "No good search result found";
}
const results = searchResults.results
.slice(0, this.maxResults)
.map(({ title, description, url }) => htmlToText(description))
.join("\n\n");
return results;
}
description =
"a search engine. useful for when you need to answer questions about current events. input should be a search query.";
}

View File

@ -0,0 +1,20 @@
const uaList = [
// Chrome
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
// Firefox
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:93.0) Gecko/20100101 Firefox/93.0",
"Mozilla/5.0 (X11; Linux x86_64; rv:93.0) Gecko/20100101 Firefox/93.0",
// Safari
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15",
// Microsoft Edge
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36 Edg/94.0.992.38",
];
export function getRandomUserAgent() {
const randomIndex = Math.floor(Math.random() * uaList.length);
return uaList[randomIndex];
}

View File

@ -19,6 +19,8 @@ import { WebBrowser } from "langchain/tools/webbrowser";
import { Calculator } from "langchain/tools/calculator";
import { DynamicTool, Tool } from "langchain/tools";
import { DallEAPIWrapper } from "@/app/api/langchain-tools/dalle_image_generator";
import { BaiduSearch } from "@/app/api/langchain-tools/baidu_search";
import { GoogleSearch } from "@/app/api/langchain-tools/google_search";
const serverConfig = getServerSideConfig();
@ -173,6 +175,16 @@ async function handle(req: NextRequest) {
});
let searchTool: Tool = new DuckDuckGo();
if (process.env.CHOOSE_SEARCH_ENGINE) {
switch (process.env.CHOOSE_SEARCH_ENGINE) {
case "google":
searchTool = new GoogleSearch();
break;
case "baidu":
searchTool = new BaiduSearch();
break;
}
}
if (process.env.BING_SEARCH_API_KEY) {
let bingSearchTool = new langchainTools["BingSerpAPI"](
process.env.BING_SEARCH_API_KEY,