diff --git a/app/api/langchain-tools/duckduckgo_search.ts b/app/api/langchain-tools/duckduckgo_search.ts new file mode 100644 index 000000000..58437d97e --- /dev/null +++ b/app/api/langchain-tools/duckduckgo_search.ts @@ -0,0 +1,534 @@ +import { decode } from "html-entities"; +import { convert as htmlToText } from "html-to-text"; +import { Tool } from "langchain/tools"; + +const SEARCH_REGEX = + /DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'/; +const IMAGES_REGEX = + /;DDG\.duckbar\.load\('images', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('news/; +const NEWS_REGEX = + /;DDG\.duckbar\.load\('news', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('videos/; +const VIDEOS_REGEX = + /;DDG\.duckbar\.load\('videos', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.loadModule\('related_searches/; +const RELATED_SEARCHES_REGEX = + /DDG\.duckbar\.loadModule\('related_searches', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('products/; +const VQD_REGEX = /vqd=['"](\d+-\d+(?:-\d+)?)['"]/; + +interface CallbackSearchResult { + /** Website description */ + a: string; + /** Unknown */ + ae: null; + /** ddg!bang information (ex. w Wikipedia en.wikipedia.org) */ + b?: string; + /** URL */ + c: string; + /** URL of some sort. */ + d: string; + /** Class name associations. */ + da?: string; + /** Unknown */ + h: number; + /** Website hostname */ + i: string; + /** Unknown */ + k: null; + /** Unknown */ + m: number; + /** Unknown */ + o: number; + /** Unknown */ + p: number; + /** Unknown */ + s: string; + /** Website Title */ + t: string; + /** Website URL */ + u: string; +} + +interface CallbackNextSearch { + /** URL to the next page of results */ + n: string; +} + +interface CallbackDuckbarPayload { + ads: null | any[]; + query: string; + queryEncoded: string; + response_type: string; + results: T[]; + vqd: { + [query: string]: string; + }; +} + +interface DuckbarImageResult { + /** The height of the image in pixels. */ + height: number; + /** The image URL. */ + image: string; + /** The source of the image. */ + source: string; + /** The thumbnail URL. */ + thumbnail: string; + /** The title (or caption) of the image. */ + title: string; + /** The website URL of where the image came from. */ + url: string; + /** The width of the image in pixels. */ + width: number; +} + +interface DuckbarVideoResult { + /** URL of the video */ + content: string; + /** Description of the video */ + description: string; + /** Duration of the video */ + duration: string; + /** Embed HTML for the video */ + embed_html: string; + /** Embed URL for the video */ + embed_url: string; + /** Thumbnail images of the video */ + images: { + large: string; + medium: string; + motion: string; + small: string; + }; + /** Where this search result came from */ + provider: string; + /** ISO timestamp of the upload */ + published: string; + /** What site the video was on */ + publisher: string; + /** Various statistics */ + statistics: { + /** View count of the video */ + viewCount: number | null; + }; + /** Title of the video */ + title: string; + /** Name of the video uploader(?) */ + uploader: string; +} + +interface DuckbarRelatedSearch { + display_text: string; + text: string; + web_search_url: string; +} + +interface DuckbarNewsResult { + date: number; + excerpt: string; + image?: string; + relative_time: string; + syndicate: string; + title: string; + url: string; + use_relevancy: number; + is_old?: number; + fetch_image?: number; +} + +interface SearchResults { + /** Whether there were no results found. */ + noResults: boolean; + /** The VQD of the search query. */ + vqd: string; + /** The web results of the search. */ + results: SearchResult[]; + /** The image results of the search. */ + images?: DuckbarImageResult[]; + /** The news article results of the search. */ + news?: NewsResult[]; + /** The video results of the search. */ + videos?: VideoResult[]; + /** The related searches of the query. */ + related?: RelatedResult[]; +} + +interface VideoResult { + /** The URL of the video. */ + url: string; + /** The title of the video. */ + title: string; + /** The description of the video. */ + description: string; + /** The image URL of the video. */ + image: string; + /** The duration of the video. (i.e. "9:20") */ + duration: string; + /** The ISO timestamp of when the video was published. */ + published: string; + /** Where the video was publised on. (i.e. "YouTube") */ + publishedOn: string; + /** The name of who uploaded the video. */ + publisher: string; + /** The view count of the video. */ + viewCount?: number; +} + +interface NewsResult { + /** The timestamp of when the article was created. */ + date: number; + /** An except of the article. */ + excerpt: string; + /** The image URL used in the article. */ + image?: string; + /** The relative time of when the article was posted, in human readable format. */ + relativeTime: string; + /** Where this article was indexed from. */ + syndicate: string; + /** The title of the article. */ + title: string; + /** The URL of the article. */ + url: string; + /** Whether this article is classified as old. */ + isOld: boolean; +} + +interface SearchResult { + /** The hostname of the website. (i.e. "google.com") */ + hostname: string; + /** The URL of the result. */ + url: string; + /** The title of the result. */ + title: string; + /** + * The sanitized description of the result. + * Bold tags will still be present in this string. + */ + description: string; + /** The description of the result. */ + rawDescription: string; + /** The icon of the website. */ + icon: string; + /** The ddg!bang information of the website, if any. */ + bang?: SearchResultBang; +} + +interface SearchResultBang { + /** The prefix of the bang. (i.e. "w" for !w) */ + prefix: string; + /** The title of the bang. */ + title: string; + /** The domain of the bang. */ + domain: string; +} + +interface RelatedResult { + text: string; + raw: string; +} + +enum SearchTimeType { + /** From any time. */ + ALL = "a", + /** From the past day. */ + DAY = "d", + /** From the past week. */ + WEEK = "w", + /** From the past month. */ + MONTH = "m", + /** From the past year. */ + YEAR = "y", +} + +interface SearchOptions { + /** The safe search type of the search. */ + safeSearch?: SafeSearchType; + /** The time range of the searches, can be a SearchTimeType or a date range ("2021-03-16..2021-03-30") */ + time?: SearchTimeType | string; + /** The locale(?) of the search. Defaults to "en-us". */ + locale?: string; + /** The region of the search. Defaults to "wt-wt" or all regions. */ + region?: string; + /** The market region(?) of the search. Defaults to "US". */ + marketRegion?: string; + /** The number to offset the results to. */ + offset?: number; + /** + * The string that acts like a key to a search. + * Set this if you made a search with the same query. + */ + vqd?: string; +} + +enum SafeSearchType { + /** Strict filtering, no NSFW content. */ + STRICT = 0, + /** Moderate filtering. */ + MODERATE = -1, + /** No filtering. */ + OFF = -2, +} + +const defaultOptions: SearchOptions = { + safeSearch: SafeSearchType.OFF, + time: SearchTimeType.ALL, + locale: "en-us", + region: "wt-wt", + offset: 0, + marketRegion: "us", +}; + +async function search( + query: string, + options?: SearchOptions, +): Promise { + if (!query) throw new Error("Query cannot be empty!"); + if (!options) options = defaultOptions; + else options = sanityCheck(options); + + let vqd = options.vqd!; + if (!vqd) vqd = await getVQD(query, "web"); + + const queryObject: Record = { + q: query, + ...(options.safeSearch !== SafeSearchType.STRICT ? { t: "D" } : {}), + l: options.locale!, + ...(options.safeSearch === SafeSearchType.STRICT ? { p: "1" } : {}), + kl: options.region || "wt-wt", + s: String(options.offset), + dl: "en", + ct: "US", + ss_mkt: options.marketRegion!, + df: options.time! as string, + vqd, + ...(options.safeSearch !== SafeSearchType.STRICT + ? { ex: String(options.safeSearch) } + : {}), + sp: "1", + bpa: "1", + biaexp: "b", + msvrtexp: "b", + ...(options.safeSearch === SafeSearchType.STRICT + ? { + videxp: "a", + nadse: "b", + eclsexp: "a", + stiaexp: "a", + tjsexp: "b", + related: "b", + msnexp: "a", + } + : { + nadse: "b", + eclsexp: "b", + tjsexp: "b", + // cdrexp: 'b' + }), + }; + + const response = await fetch( + `https://links.duckduckgo.com/d.js?${queryString(queryObject)}`, + ); + const data = await response.text(); + + if (data.includes("DDG.deep.is506")) + throw new Error("A server error occurred!"); + + const searchResults = JSON.parse( + SEARCH_REGEX.exec(data)![1].replace(/\t/g, " "), + ) as (CallbackSearchResult | CallbackNextSearch)[]; + + if (searchResults.length === 1 && !("n" in searchResults[0])) { + const onlyResult = searchResults[0] as CallbackSearchResult; + /* istanbul ignore next */ + if ( + (!onlyResult.da && onlyResult.t === "EOF") || + !onlyResult.a || + onlyResult.d === "google.com search" + ) + return { + noResults: true, + vqd, + results: [], + }; + } + + const results: SearchResults = { + noResults: false, + vqd, + results: [], + }; + + for (const search of searchResults) { + if ("n" in search) continue; + let bang: SearchResultBang | undefined; + if (search.b) { + const [prefix, title, domain] = search.b.split("\t"); + bang = { prefix, title, domain }; + } + results.results.push({ + title: search.t, + description: decode(search.a), + rawDescription: search.a, + hostname: search.i, + icon: `https://external-content.duckduckgo.com/ip3/${search.i}.ico`, + url: search.u, + bang, + }); + } + + // Images + const imagesMatch = IMAGES_REGEX.exec(data); + if (imagesMatch) { + const imagesResult = JSON.parse( + imagesMatch[1].replace(/\t/g, " "), + ) as CallbackDuckbarPayload; + results.images = imagesResult.results.map((i) => { + i.title = decode(i.title); + return i; + }); + } + + // News + const newsMatch = NEWS_REGEX.exec(data); + if (newsMatch) { + const newsResult = JSON.parse( + newsMatch[1].replace(/\t/g, " "), + ) as CallbackDuckbarPayload; + results.news = newsResult.results.map((article) => ({ + date: article.date, + excerpt: decode(article.excerpt), + image: article.image, + relativeTime: article.relative_time, + syndicate: article.syndicate, + title: decode(article.title), + url: article.url, + isOld: !!article.is_old, + })) as NewsResult[]; + } + + // Videos + const videosMatch = VIDEOS_REGEX.exec(data); + if (videosMatch) { + const videoResult = JSON.parse( + videosMatch[1].replace(/\t/g, " "), + ) as CallbackDuckbarPayload; + results.videos = []; + /* istanbul ignore next */ + for (const video of videoResult.results) { + results.videos.push({ + url: video.content, + title: decode(video.title), + description: decode(video.description), + image: + video.images.large || + video.images.medium || + video.images.small || + video.images.motion, + duration: video.duration, + publishedOn: video.publisher, + published: video.published, + publisher: video.uploader, + viewCount: video.statistics.viewCount || undefined, + }); + } + } + + // Related Searches + const relatedMatch = RELATED_SEARCHES_REGEX.exec(data); + if (relatedMatch) { + const relatedResult = JSON.parse( + relatedMatch[1].replace(/\t/g, " "), + ) as CallbackDuckbarPayload; + results.related = []; + for (const related of relatedResult.results) { + results.related.push({ + text: related.text, + raw: related.display_text, + }); + } + } + return results; +} + +function queryString(query: Record) { + return new URLSearchParams(query).toString(); +} + +async function getVQD(query: string, ia = "web") { + try { + const response = await fetch( + `https://duckduckgo.com/?${queryString({ q: query, ia })}`, + ); + const data = await response.text(); + return VQD_REGEX.exec(data)![1]; + } catch (e) { + throw new Error(`Failed to get the VQD for query "${query}".`); + } +} + +function sanityCheck(options: SearchOptions) { + options = Object.assign({}, defaultOptions, options); + + if (!(options.safeSearch! in SafeSearchType)) + throw new TypeError( + `${options.safeSearch} is an invalid safe search type!`, + ); + + /* istanbul ignore next */ + if (typeof options.safeSearch! === "string") + options.safeSearch = SafeSearchType[ + options.safeSearch! + ] as any as SafeSearchType; + + if (typeof options.offset !== "number") + throw new TypeError(`Search offset is not a number!`); + + if (options.offset! < 0) + throw new RangeError("Search offset cannot be below zero!"); + + if ( + options.time && + !Object.values(SearchTimeType).includes(options.time as SearchTimeType) && + !/\d{4}-\d{2}-\d{2}..\d{4}-\d{2}-\d{2}/.test(options.time as string) + ) + throw new TypeError(`${options.time} is an invalid search time!`); + + if (!options.locale || typeof options.locale! !== "string") + throw new TypeError("Search locale must be a string!"); + + if (!options.region || typeof options.region! !== "string") + throw new TypeError("Search region must be a string!"); + + if (!options.marketRegion || typeof options.marketRegion! !== "string") + throw new TypeError("Search market region must be a string!"); + + if (options.vqd && !/\d-\d+-\d+/.test(options.vqd)) + throw new Error(`${options.vqd} is an invalid VQD!`); + + return options; +} + +export class DuckDuckGo extends Tool { + name = "duckduckgo_search"; + maxResults = 4; + + /** @ignore */ + async _call(input: string) { + const searchResults = await search(input, { + safeSearch: SafeSearchType.OFF, + }); + + if (searchResults.noResults) { + return "No good search result found"; + } + + const results = searchResults.results + .slice(0, this.maxResults) + .map(({ title, description, url }) => htmlToText(description)) + .join("\n\n"); + + return results; + } + + description = + "a search engine. useful for when you need to answer questions about current events. input should be a search query."; +} diff --git a/app/api/langchain/tool/agent/route.ts b/app/api/langchain/tool/agent/route.ts index 5c9457a3b..f659880a4 100644 --- a/app/api/langchain/tool/agent/route.ts +++ b/app/api/langchain/tool/agent/route.ts @@ -16,7 +16,7 @@ import { BufferMemory, ChatMessageHistory } from "langchain/memory"; import { initializeAgentExecutorWithOptions } from "langchain/agents"; import { SerpAPI } from "langchain/tools"; import { Calculator } from "langchain/tools/calculator"; -import { DuckDuckGo } from "@/app/api/langchain-tools/duckduckgo"; +import { DuckDuckGo } from "@/app/api/langchain-tools/duckduckgo_search"; import { HttpGetTool } from "@/app/api/langchain-tools/http_get"; const serverConfig = getServerSideConfig(); @@ -220,4 +220,4 @@ async function handle(req: NextRequest) { export const GET = handle; export const POST = handle; -export const runtime = "nodejs"; +export const runtime = "edge"; diff --git a/package.json b/package.json index b30410cea..2ed0fcc58 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,7 @@ "duck-duck-scrape": "^2.2.4", "emoji-picker-react": "^4.4.7", "fuse.js": "^6.6.2", + "html-entities": "^2.4.0", "html-to-image": "^1.11.11", "html-to-text": "^9.0.5", "langchain": "^0.0.114", diff --git a/yarn.lock b/yarn.lock index 5a3dbdef1..6f3287bec 100644 --- a/yarn.lock +++ b/yarn.lock @@ -16,9 +16,9 @@ "@jridgewell/trace-mapping" "^0.3.9" "@anthropic-ai/sdk@^0.5.7": - version "0.5.7" - resolved "https://registry.yarnpkg.com/@anthropic-ai/sdk/-/sdk-0.5.7.tgz#8d5155342cdda95fcf0069594c84d7a1a3abe579" - integrity sha512-0uLvrn24D9ehe8KXBFKohmdvMdhPk8jGYGaROOZo46fgbHKSTEOkEv1zmbYw4fAiF/qdPDBNav+8zfW0iD2WOg== + version "0.5.9" + resolved "https://registry.yarnpkg.com/@anthropic-ai/sdk/-/sdk-0.5.9.tgz#8a78f3a0558b2aee6572bab1f38f0c996999b985" + integrity sha512-9/TYca4qSe0xG40LLNf5vemybw5JAKF5OE6Eiyc+O+h3+VGGPeOKo+1SHaWBP5zS7bGX2o3Ne6EonPWyh9oNqA== dependencies: "@types/node" "^18.11.18" "@types/node-fetch" "^2.6.4" @@ -1525,16 +1525,21 @@ "@types/node" "*" form-data "^3.0.0" -"@types/node@*", "@types/node@^20.3.3": +"@types/node@*": + version "20.4.5" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.4.5.tgz#9dc0a5cb1ccce4f7a731660935ab70b9c00a5d69" + integrity sha512-rt40Nk13II9JwQBdeYqmbn2Q6IVTA5uPhvSO+JVqdXw/6/4glI6oR9ezty/A9Hg5u7JH4OmYmuQ+XvjKm0Datg== + +"@types/node@^18.11.18": + version "18.17.1" + resolved "https://registry.yarnpkg.com/@types/node/-/node-18.17.1.tgz#84c32903bf3a09f7878c391d31ff08f6fe7d8335" + integrity sha512-xlR1jahfizdplZYRU59JlUx9uzF1ARa8jbhM11ccpCJya8kvos5jwdm2ZAgxSCwOl0fq21svP18EVwPBXMQudw== + +"@types/node@^20.3.3": version "20.3.3" resolved "https://registry.yarnpkg.com/@types/node/-/node-20.3.3.tgz#329842940042d2b280897150e023e604d11657d6" integrity sha512-wheIYdr4NYML61AjC8MKj/2jrR/kDQri/CIpVoZwldwhnIrD/j9jIU5bJ8yBKuB2VhpFV7Ab6G2XkBjv9r9Zzw== -"@types/node@^18.11.18": - version "18.16.19" - resolved "https://registry.yarnpkg.com/@types/node/-/node-18.16.19.tgz#cb03fca8910fdeb7595b755126a8a78144714eea" - integrity sha512-IXl7o+R9iti9eBW4Wg2hx1xQDig183jj7YLn8F7udNceyfkbn1ZxmzZXuak20gR40D7pIkIY1kYGx5VIGbaHKA== - "@types/parse-json@^4.0.0": version "4.0.0" resolved "https://registry.yarnpkg.com/@types/parse-json/-/parse-json-4.0.0.tgz#2f8bb441434d163b35fb8ffdccd7138927ffb8c0" @@ -3795,7 +3800,7 @@ hoist-non-react-statics@^3.3.0, hoist-non-react-statics@^3.3.2: dependencies: react-is "^16.7.0" -html-entities@^2.3.3: +html-entities@^2.3.3, html-entities@^2.4.0: version "2.4.0" resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.4.0.tgz#edd0cee70402584c8c76cc2c0556db09d1f45061" integrity sha512-igBTJcNNNhvZFRtm8uA6xMY6xYleeDwn3PeBCkDz7tHttv4F2hsDI2aPgNERWzvRcNYHNT3ymRaQzllmXj4YsQ== @@ -4264,9 +4269,9 @@ langchain@^0.0.114: zod-to-json-schema "^3.20.4" langsmith@~0.0.11: - version "0.0.11" - resolved "https://registry.yarnpkg.com/langsmith/-/langsmith-0.0.11.tgz#618099fcd4efc3c38cfedaf33d50d7bbf0957258" - integrity sha512-4JTYIog+l3DncDZ9qcHILWYRUz8aI3tfF5arLAKg1k3U7Ivk9SXaYJqF8HPHeCrFxwHeY66NdPc7DqLUKCyoHQ== + version "0.0.16" + resolved "https://registry.yarnpkg.com/langsmith/-/langsmith-0.0.16.tgz#5afdf6155ae58b8f99868b00cca575a25cb616ad" + integrity sha512-HD97KJaSpCcuixbjfRhpSFdo5rWz28OJiUVs5uBRZDKUN2Amg4PWd0NFzGO3xC8osnjPPRvgH9by6Ige79hjxQ== dependencies: "@types/uuid" "^9.0.1" commander "^10.0.1" @@ -4931,7 +4936,7 @@ micromatch@^4.0.4, micromatch@^4.0.5: mime-db@1.52.0: version "1.52.0" - resolved "https://registry.npmmirror.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70" + resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70" integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg== mime-types@^2.1.12, mime-types@^2.1.27: @@ -5758,7 +5763,7 @@ safe-regex-test@^1.0.0: "safer-buffer@>= 2.1.2 < 3.0.0": version "2.1.2" - resolved "https://registry.npmmirror.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" + resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== sass@^1.59.2: @@ -6372,7 +6377,7 @@ use-sync-external-store@1.2.0, use-sync-external-store@^1.0.0: uuid@^9.0.0: version "9.0.0" - resolved "https://registry.npmmirror.com/uuid/-/uuid-9.0.0.tgz#592f550650024a38ceb0c562f2f6aa435761efb5" + resolved "https://registry.yarnpkg.com/uuid/-/uuid-9.0.0.tgz#592f550650024a38ceb0c562f2f6aa435761efb5" integrity sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg== uvu@^0.5.0: