From ac57b2c770bda59584e984af33ab9d3d5753ab4c Mon Sep 17 00:00:00 2001 From: Hk-Gosuto Date: Thu, 4 Apr 2024 12:11:47 +0800 Subject: [PATCH] chore: temp commit --- app/api/langchain/rag/store/route.ts | 159 +++++++++++++++++++++++++++ app/client/platforms/utils.ts | 13 ++- app/components/chat.module.scss | 37 +++---- app/components/chat.tsx | 21 ++-- package.json | 2 + yarn.lock | 56 +++++++++- 6 files changed, 258 insertions(+), 30 deletions(-) create mode 100644 app/api/langchain/rag/store/route.ts diff --git a/app/api/langchain/rag/store/route.ts b/app/api/langchain/rag/store/route.ts new file mode 100644 index 000000000..d19bd31c2 --- /dev/null +++ b/app/api/langchain/rag/store/route.ts @@ -0,0 +1,159 @@ +import { NextRequest, NextResponse } from "next/server"; +import { auth } from "@/app/api/auth"; +import { NodeJSTool } from "@/app/api/langchain-tools/nodejs_tools"; +import { ACCESS_CODE_PREFIX, ModelProvider } from "@/app/constant"; +import { OpenAI, OpenAIEmbeddings } from "@langchain/openai"; +import path from "path"; +import { PDFLoader } from "langchain/document_loaders/fs/pdf"; +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; +import { Pinecone } from "@pinecone-database/pinecone"; +import { Document } from "@langchain/core/documents"; +import { PineconeStore } from "@langchain/pinecone"; +import { getServerSideConfig } from "@/app/config/server"; +import { RequestBody } from "../../tool/agent/agentapi"; + +async function handle(req: NextRequest) { + if (req.method === "OPTIONS") { + return NextResponse.json({ body: "OK" }, { status: 200 }); + } + try { + const authResult = auth(req, ModelProvider.GPT); + if (authResult.error) { + return NextResponse.json(authResult, { + status: 401, + }); + } + + const reqBody: RequestBody = await req.json(); + const authToken = req.headers.get("Authorization") ?? ""; + const token = authToken.trim().replaceAll("Bearer ", "").trim(); + + //https://js.langchain.com/docs/integrations/vectorstores/pinecone + // const formData = await req.formData(); + // const file = formData.get("file") as File; + // const originalFileName = file?.name; + + // const fileReader = file.stream().getReader(); + // const fileData: number[] = []; + + // while (true) { + // const { done, value } = await fileReader.read(); + // if (done) break; + // fileData.push(...value); + // } + + // const buffer = Buffer.from(fileData); + // const fileType = path.extname(originalFileName).slice(1); + // const fileBlob = bufferToBlob(buffer, "application/pdf") + + // const loader = new PDFLoader(fileBlob); + // const docs = await loader.load(); + // const textSplitter = new RecursiveCharacterTextSplitter({ + // chunkSize: 1000, + // chunkOverlap: 200, + // }); + // const splits = await textSplitter.splitDocuments(docs); + const pinecone = new Pinecone(); + // await pinecone.createIndex({ + // name: 'example-index', + // dimension: 1536, + // metric: 'cosine', + // spec: { + // pod: { + // environment: 'gcp-starter', + // podType: 'p1.x1', + // pods: 1 + // } + // } + // }); + const pineconeIndex = pinecone.Index("example-index"); + const docs = [ + new Document({ + metadata: { foo: "bar" }, + pageContent: "pinecone is a vector db", + }), + new Document({ + metadata: { foo: "bar" }, + pageContent: "the quick brown fox jumped over the lazy dog", + }), + new Document({ + metadata: { baz: "qux" }, + pageContent: "lorem ipsum dolor sit amet", + }), + new Document({ + metadata: { baz: "qux" }, + pageContent: "pinecones are the woody fruiting body and of a pine tree", + }), + ]; + const apiKey = getOpenAIApiKey(token); + const baseUrl = getOpenAIBaseUrl(reqBody.baseUrl); + console.log(baseUrl); + const embeddings = new OpenAIEmbeddings( + { + modelName: "text-embedding-ada-002", + openAIApiKey: apiKey, + }, + { basePath: baseUrl }, + ); + await PineconeStore.fromDocuments(docs, embeddings, { + pineconeIndex, + maxConcurrency: 5, + }); + const vectorStore = await PineconeStore.fromExistingIndex(embeddings, { + pineconeIndex, + }); + const results = await vectorStore.similaritySearch("pinecone", 1, { + foo: "bar", + }); + console.log(results); + return NextResponse.json( + { + storeId: "", + }, + { + status: 200, + }, + ); + } catch (e) { + console.error(e); + return new Response(JSON.stringify({ error: (e as any).message }), { + status: 500, + headers: { "Content-Type": "application/json" }, + }); + } +} + +function bufferToBlob(buffer: Buffer, mimeType?: string): Blob { + const arrayBuffer: ArrayBuffer = buffer.buffer.slice( + buffer.byteOffset, + buffer.byteOffset + buffer.byteLength, + ); + return new Blob([arrayBuffer], { type: mimeType || "" }); +} +function getOpenAIApiKey(token: string) { + const serverConfig = getServerSideConfig(); + const isApiKey = !token.startsWith(ACCESS_CODE_PREFIX); + + let apiKey = serverConfig.apiKey; + if (isApiKey && token) { + apiKey = token; + } + return apiKey; +} + +function getOpenAIBaseUrl(reqBaseUrl: string | undefined) { + const serverConfig = getServerSideConfig(); + let baseUrl = "https://api.openai.com/v1"; + if (serverConfig.baseUrl) baseUrl = serverConfig.baseUrl; + if (reqBaseUrl?.startsWith("http://") || reqBaseUrl?.startsWith("https://")) + baseUrl = reqBaseUrl; + if (!baseUrl.endsWith("/v1")) + baseUrl = baseUrl.endsWith("/") ? `${baseUrl}v1` : `${baseUrl}/v1`; + console.log("[baseUrl]", baseUrl); + return baseUrl; +} + +export const GET = handle; +export const POST = handle; + +export const runtime = "nodejs"; diff --git a/app/client/platforms/utils.ts b/app/client/platforms/utils.ts index 9973bb19a..543b96a12 100644 --- a/app/client/platforms/utils.ts +++ b/app/client/platforms/utils.ts @@ -1,7 +1,16 @@ import { getHeaders } from "../api"; +export interface FileInfo { + originalFilename: string; + fileName: string; + filePath: string; + size: number; +} + export class FileApi { - async upload(file: any): Promise { + async upload(file: any): Promise { + const fileName = file.name; + const fileSize = file.size; const formData = new FormData(); formData.append("file", file); var headers = getHeaders(true); @@ -16,6 +25,8 @@ export class FileApi { const resJson = await res.json(); console.log(resJson); return { + originalFilename: fileName, + size: fileSize, fileName: resJson.fileName, filePath: resJson.filePath, }; diff --git a/app/components/chat.module.scss b/app/components/chat.module.scss index 66f254a92..a6f706628 100644 --- a/app/components/chat.module.scss +++ b/app/components/chat.module.scss @@ -17,12 +17,30 @@ background-size: cover; background-position: center; background-color: var(--second); + display: flex; + + span { + position: absolute; + font-size: 12px; + overflow: hidden; + text-overflow: ellipsis; + display: -webkit-box; + -webkit-line-clamp: 3; + -webkit-box-orient: vertical; + line-height: 1.5; + top: 8px; + bottom: 8px; + left: 5px; + right: 10px; + pointer-events: none; + } .attach-file-mask { width: 100%; height: 100%; opacity: 0; transition: all ease 0.2s; + z-index: 999; } .attach-file-mask:hover { @@ -40,25 +58,6 @@ float: right; background-color: var(--white); } - - .attach-file-name { - font-size: 12px; - display: flex; - flex-direction: column; - justify-content: center; - width: 100%; - height: 100%; - padding: 4px; - box-sizing: border-box; - overflow: hidden; - text-overflow: ellipsis; - word-break: break-all; - // line-height: 1.2; - // max-height: 2.4em; - position: absolute; - top: 0; - left: 0; - } } .attach-images { diff --git a/app/components/chat.tsx b/app/components/chat.tsx index 701edf89f..767b87cfc 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -118,6 +118,7 @@ import { WebTranscriptionApi, } from "../utils/speech"; import { getServerSideConfig } from "../config/server"; +import { FileInfo } from "../client/platforms/utils"; const ttsPlayer = createTTSPlayer(); @@ -463,7 +464,7 @@ export function ChatActions(props: { uploadImage: () => void; setAttachImages: (images: string[]) => void; uploadFile: () => void; - setAttachFiles: (files: string[]) => void; + setAttachFiles: (files: FileInfo[]) => void; setUploading: (uploading: boolean) => void; showPromptModal: () => void; scrollToBottom: () => void; @@ -769,7 +770,7 @@ function _Chat() { const navigate = useNavigate(); const [attachImages, setAttachImages] = useState([]); const [uploading, setUploading] = useState(false); - const [attachFiles, setAttachFiles] = useState([]); + const [attachFiles, setAttachFiles] = useState([]); // prompt hints const promptStore = usePromptStore(); @@ -1353,11 +1354,11 @@ function _Chat() { } async function uploadFile() { - const uploadFiles: string[] = []; + const uploadFiles: FileInfo[] = []; uploadFiles.push(...attachFiles); uploadFiles.push( - ...(await new Promise((res, rej) => { + ...(await new Promise((res, rej) => { const fileInput = document.createElement("input"); fileInput.type = "file"; fileInput.accept = ".pdf,.txt,.json,.csv,.md"; @@ -1366,7 +1367,7 @@ function _Chat() { setUploading(true); const files = event.target.files; const api = new ClientApi(); - const fileDatas: string[] = []; + const fileDatas: FileInfo[] = []; for (let i = 0; i < files.length; i++) { const file = event.target.files[i]; api.file @@ -1375,7 +1376,7 @@ function _Chat() { console.log(fileInfo); fileDatas.push(fileInfo); if ( - fileDatas.length === 5 || + fileDatas.length === 3 || fileDatas.length === files.length ) { setUploading(false); @@ -1778,7 +1779,12 @@ function _Chat() {
{attachFiles.map((file, index) => { return ( -
+
+ {file.originalFilename}
{ @@ -1788,7 +1794,6 @@ function _Chat() { }} />
-
${file}
); })} diff --git a/package.json b/package.json index 90c75f216..d4d63aad8 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,9 @@ "@hello-pangea/dnd": "^16.5.0", "@langchain/community": "0.0.30", "@langchain/openai": "0.0.14", + "@langchain/pinecone": "^0.0.4", "@next/third-parties": "^14.1.0", + "@pinecone-database/pinecone": "^2.2.0", "@svgr/webpack": "^6.5.1", "@vercel/analytics": "^0.1.11", "@vercel/speed-insights": "^1.0.2", diff --git a/yarn.lock b/yarn.lock index e825e351e..5864e85f5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1819,7 +1819,7 @@ uuid "^9.0.0" zod "^3.22.3" -"@langchain/core@0.1.30", "@langchain/core@~0.1.13", "@langchain/core@~0.1.29", "@langchain/core@~0.1.44", "@langchain/core@~0.1.45": +"@langchain/core@0.1.30", "@langchain/core@~0.1", "@langchain/core@~0.1.13", "@langchain/core@~0.1.29", "@langchain/core@~0.1.44", "@langchain/core@~0.1.45": version "0.1.30" resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.1.30.tgz#6f9b5d3cfe9bb447f94bc8c71be1527b9decd302" integrity sha512-3oqEKgwe7U/efieZrCC6BLwOHm4YPj27mRNBZLaB5BwPh3a7gXIevxEnbjN3o5j9kJqc5acG7nn35h7Wkrf2Ag== @@ -1858,6 +1858,16 @@ zod "^3.22.4" zod-to-json-schema "^3.22.3" +"@langchain/pinecone@^0.0.4": + version "0.0.4" + resolved "https://registry.yarnpkg.com/@langchain/pinecone/-/pinecone-0.0.4.tgz#312f3ff4286b1278c47c676d7be5a4f0f5c1409c" + integrity sha512-9Rme771vHbRKXHy0IxOFFUyYtJfL+I1LgleF1cSkb2ZxgtOwN7uvh0Cp23WrCzEh8DHVEaRYPSbBC4W3YfqwfA== + dependencies: + "@langchain/core" "~0.1" + "@pinecone-database/pinecone" "^2.2.0" + flat "^5.0.2" + uuid "^9.0.0" + "@next/env@13.5.6": version "13.5.6" resolved "https://registry.yarnpkg.com/@next/env/-/env-13.5.6.tgz#c1148e2e1aa166614f05161ee8f77ded467062bc" @@ -1943,6 +1953,16 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" +"@pinecone-database/pinecone@^2.2.0": + version "2.2.0" + resolved "https://registry.yarnpkg.com/@pinecone-database/pinecone/-/pinecone-2.2.0.tgz#f895741756fc66c8b4e0c46eccbd806614879766" + integrity sha512-qfVs9n5YyTmerIV1GE1u89xF1W3oFSF53STW68Oqyxey0dGq4775cCw8G5pnwoy872uqfh+tMRDME9bcWfinUw== + dependencies: + "@sinclair/typebox" "^0.29.0" + ajv "^8.12.0" + cross-fetch "^3.1.5" + encoding "^0.1.13" + "@pkgr/core@^0.1.0": version "0.1.1" resolved "https://registry.yarnpkg.com/@pkgr/core/-/core-0.1.1.tgz#1ec17e2edbec25c8306d424ecfbf13c7de1aaa31" @@ -1966,6 +1986,11 @@ domhandler "^5.0.3" selderee "^0.11.0" +"@sinclair/typebox@^0.29.0": + version "0.29.6" + resolved "https://registry.yarnpkg.com/@sinclair/typebox/-/typebox-0.29.6.tgz#4cd8372f9247372edd5fc5af44f67e2032c46e2f" + integrity sha512-aX5IFYWlMa7tQ8xZr3b2gtVReCvg7f3LEhjir/JAjX2bJCMVJA5tIPv30wTD4KDfcwMd7DDYY3hFDeGmOgtrZQ== + "@smithy/abort-controller@^2.2.0": version "2.2.0" resolved "https://registry.yarnpkg.com/@smithy/abort-controller/-/abort-controller-2.2.0.tgz#18983401a5e2154b5c94057730024a7d14cbcd35" @@ -3040,6 +3065,16 @@ ajv@^6.12.4, ajv@^6.12.5: json-schema-traverse "^0.4.1" uri-js "^4.2.2" +ajv@^8.12.0: + version "8.12.0" + resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.12.0.tgz#d1a0527323e22f53562c567c00991577dfbe19d1" + integrity sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA== + dependencies: + fast-deep-equal "^3.1.1" + json-schema-traverse "^1.0.0" + require-from-string "^2.0.2" + uri-js "^4.2.2" + ansi-escapes@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-5.0.0.tgz#b6a0caf0eef0c41af190e9a749e0c00ec04bb2a6" @@ -3585,6 +3620,13 @@ cross-env@^7.0.3: dependencies: cross-spawn "^7.0.1" +cross-fetch@^3.1.5: + version "3.1.8" + resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.8.tgz#0327eba65fd68a7d119f8fb2bf9334a1a7956f82" + integrity sha512-cvA+JwZoU0Xq+h6WkMvAUqPEYy92Obet6UdKLfW60qn99ftItKjB5T+BkyWOFWe2pUyfQ+IJHmpOTznqk1M6Kg== + dependencies: + node-fetch "^2.6.12" + cross-spawn@^7.0.1, cross-spawn@^7.0.2, cross-spawn@^7.0.3: version "7.0.3" resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6" @@ -5511,6 +5553,11 @@ json-schema-traverse@^0.4.1: resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660" integrity sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg== +json-schema-traverse@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz#ae7bcb3656ab77a73ba5c49bf654f38e6b6860e2" + integrity sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug== + json-stable-stringify-without-jsonify@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz#9db7b59496ad3f3cfef30a75142d2d930ad72651" @@ -6420,7 +6467,7 @@ node-ensure@^0.0.0: resolved "https://registry.yarnpkg.com/node-ensure/-/node-ensure-0.0.0.tgz#ecae764150de99861ec5c810fd5d096b183932a7" integrity sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw== -node-fetch@^2.6.7: +node-fetch@^2.6.12, node-fetch@^2.6.7: version "2.7.0" resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d" integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A== @@ -7040,6 +7087,11 @@ remark-rehype@^10.0.0: mdast-util-to-hast "^12.1.0" unified "^10.0.0" +require-from-string@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909" + integrity sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw== + resolve-from@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6"