truefoundry · akashg3627 · Apr 17, 2025 · Apr 22, 2025 · Apr 22, 2025 · Apr 22, 2025
diff --git a/RAG/examples/basic_rag/llamaindex/docker-compose.yaml b/RAG/examples/basic_rag/llamaindex/docker-compose.yaml
@@ -25,19 +25,18 @@ services:
       APP_VECTORSTORE_URL: "http://milvus:19530"
       # Type of vectordb used to store embedding supported type milvus, pgvector
       APP_VECTORSTORE_NAME: "milvus"
-      APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5}
+      APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-openai-main/text-embedding-ada-002}
       # embedding model engine used for inference, supported type nvidia-ai-endpoints, huggingface
-      APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-nvidia-ai-endpoints}
+      APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-openai}
       # url on which embedding model is hosted. If "", Nvidia hosted API is used
-      APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""}
+      APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-"https://internal.devtest.truefoundry.tech/api/llm/api/inference/openai"}
       # url on which llm model is hosted. If "", Nvidia hosted API is used
-      APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
-      APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"}
+      APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-"https://internal.devtest.truefoundry.tech/api/llm/api/inference/openai"}
+      APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"openai-main/gpt-4o"}
       # embedding model engine used for inference, supported type nvidia-ai-endpoints
-      APP_LLM_MODELENGINE: ${APP_LLM_MODELENGINE:-nvidia-ai-endpoints}
-      NVIDIA_API_KEY: ${NVIDIA_API_KEY}
+      APP_LLM_MODELENGINE: ${APP_LLM_MODELENGINE:-openai}
       # vectorstore collection name to store embeddings 
-      COLLECTION_NAME: ${COLLECTION_NAME:-developer_rag}
+      COLLECTION_NAME: ${COLLECTION_NAME:-openai-rag}
       APP_RETRIEVER_TOPK: 4
       APP_RETRIEVER_SCORETHRESHOLD: 0.25
       # observability server url
@@ -81,7 +80,7 @@ services:
       APP_SERVERURL: http://chain-server
       APP_SERVERPORT: 8081
       # model name displayed on UI
-      APP_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"}
+      APP_MODELNAME: ${APP_LLM_MODELNAME:-"openai-main/gpt-4o"}
       # observability server url
       OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
       OTEL_EXPORTER_OTLP_PROTOCOL: grpc
@@ -96,4 +95,4 @@ services:
 
 networks:
   default:
-    name: nvidia-rag
+    name: openai-rag
diff --git a/RAG/examples/local_deploy/docker-compose-nim-ms.yaml b/RAG/examples/local_deploy/docker-compose-nim-ms.yaml
@@ -10,7 +10,7 @@ services:
     expose:
     - "8000"
     environment:
-      NGC_API_KEY: ${NGC_API_KEY}
+      NGC_API_KEY: ${NGC_API_KEY:-1234567890}
     shm_size: 20gb
     deploy:
       resources:
@@ -37,7 +37,7 @@ services:
     expose:
     - "8000"
     environment:
-      NGC_API_KEY: ${NGC_API_KEY}
+      NGC_API_KEY: ${NGC_API_KEY:-1234567890}
     user: "${USERID}"
     shm_size: 16GB
     deploy:
@@ -65,22 +65,22 @@ services:
     expose:
     - "8000"
     environment:
-      NGC_API_KEY: ${NGC_API_KEY}
+      NGC_API_KEY: ${NGC_API_KEY:-1234567890}
     user: "${USERID}"
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
       interval: 10s
       timeout: 20s
       retries: 100
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              device_ids: ['${RANKING_MS_GPU_ID:-0}']
-              capabilities: [gpu]
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           device_ids: ['${RANKING_MS_GPU_ID:-0}']
+    #           capabilities: [gpu]
     profiles: ["nemo-retriever"]
 
 networks:
   default:
-    name: nvidia-rag
+    name: openai-rag
diff --git a/RAG/examples/local_deploy/docker-compose-vectordb.yaml b/RAG/examples/local_deploy/docker-compose-vectordb.yaml
@@ -74,13 +74,13 @@ services:
     depends_on:
       - "etcd"
       - "minio"
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              capabilities: ["gpu"]
-              device_ids: ['${VECTORSTORE_GPU_DEVICE_ID:-0}']
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           capabilities: ["gpu"]
+    #           device_ids: ['${VECTORSTORE_GPU_DEVICE_ID:-0}']
     profiles: ["nemo-retriever", "milvus", ""]
 
   elasticsearch:
@@ -119,4 +119,4 @@ services:
 
 networks:
   default:
-    name: nvidia-rag
+    name: openai-rag
diff --git a/RAG/src/chain_server/Dockerfile b/RAG/src/chain_server/Dockerfile
@@ -57,4 +57,5 @@ COPY RAG/src/pandasai /opt/RAG/src/pandasai
 COPY RAG/tools /opt/RAG/tools
 
 WORKDIR /opt
-ENTRYPOINT ["uvicorn", "RAG.src.chain_server.server:app"]
+ENV PYTHONPATH=/opt
+ENTRYPOINT ["python3.10", "-m", "uvicorn", "RAG.src.chain_server.server:app", "--host", "0.0.0.0"]
diff --git a/RAG/src/chain_server/RAG_Chain_Server_API_Client.ipynb b/RAG/src/chain_server/RAG_Chain_Server_API_Client.ipynb
@@ -22,7 +22,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "id": "263a7a8b",
    "metadata": {},
    "outputs": [],
@@ -33,7 +33,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
+   "id": "6970b566",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dockerfile\n",
+      "RAG_Chain_Server_API_Client.ipynb\n",
+      "TRUEF-SQL and Plot Agent Workflow API-100325-165032.pdf\n",
+      "__init__.py\n",
+      "base.py\n",
+      "configuration.py\n",
+      "configuration_wizard.py\n",
+      "requirements.txt\n",
+      "server.py\n",
+      "tracing.py\n",
+      "utils.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "!ls"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
    "id": "c2244b8c",
    "metadata": {},
    "outputs": [],
@@ -50,8 +78,10 @@
     "    files = {\n",
     "        'file': (file_path, open(file_path, 'rb'), mime_type)\n",
     "    }\n",
+    "\n",
     "    response = requests.post(url, headers=headers, files=files)\n",
     "\n",
+    "\n",
     "    return response.text\n",
     "\n",
     "def upload_pdf_files(folder_path, upload_url, num_files):\n",
@@ -69,16 +99,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 80,
    "id": "4f5c99ac",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\"message\":\"File uploaded successfully\"}\n",
+      "--- 4.276328086853027 seconds ---\n"
+     ]
+    }
+   ],
    "source": [
     "import time\n",
     "\n",
     "start_time = time.time()\n",
     "NUM_DOCS_TO_UPLOAD=100\n",
-    "upload_pdf_files(\"dataset\", \"http://chain-server:8081/documents\", NUM_DOCS_TO_UPLOAD)\n",
+    "upload_pdf_files(\"./\", \"http://localhost:8081/documents\", NUM_DOCS_TO_UPLOAD)\n",
     "print(f\"--- {time.time() - start_time} seconds ---\")"
    ]
   },
@@ -93,10 +132,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 78,
    "id": "4eb862fd",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The NVIDIA Grace Superchip contains 144 Arm cores. It is designed to deliver high performance for data center and AI workloads by integrating these cores with high-bandwidth memory and other advanced features.--- 1.9746460914611816 seconds ---\n"
+     ]
+    }
+   ],
    "source": [
     "import time\n",
     "import json\n",
@@ -112,7 +159,7 @@
     "  \"max_tokens\": 256\n",
     "}\n",
     "\n",
-    "url = \"http://chain-server:8081/generate\"\n",
+    "url = \"http://localhost:8081/generate\"\n",
     "\n",
     "start_time = time.time()\n",
     "with requests.post(url, stream=True, json=data) as req:\n",
@@ -139,10 +186,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 74,
    "id": "e904a658",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "I don't know.\n",
+      "--- Generated 0 tokens in 2.855475902557373 seconds ---\n",
+      "--- 0.0 tokens/sec\n"
+     ]
+    }
+   ],
    "source": [
     "data = {\n",
     " \"messages\": [\n",
@@ -155,7 +212,7 @@
     "  \"max_tokens\": 50\n",
     "}\n",
     "\n",
-    "url = \"http://chain-server:8081/generate\"\n",
+    "url = \"http://localhost:8081/generate\"\n",
     "\n",
     "start_time = time.time()\n",
     "tokens_generated = 0\n",
@@ -188,7 +245,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -202,7 +259,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.12.9"
   }
  },
  "nbformat": 4,

diff --git a/RAG/src/chain_server/configuration.py b/RAG/src/chain_server/configuration.py
@@ -14,8 +14,12 @@
 # limitations under the License.
 
 """The definition of the application configuration."""
-from RAG.src.chain_server.configuration_wizard import ConfigWizard, configclass, configfield
-
+from RAG.src.chain_server.configuration_wizard import (
+    ConfigWizard,
+    configclass,
+    configfield,
+)
+import os
 
 @configclass
 class VectorStoreConfig(ConfigWizard):
@@ -61,13 +65,18 @@ class LLMConfig(ConfigWizard):
     model_engine: str = configfield(
         "model_engine",
         default="nvidia-ai-endpoints",
-        help_txt="The server type of the hosted model. Allowed values are nvidia-ai-endpoints",
+        help_txt="The server type of the hosted model. Allowed values are nvidia-ai-endpoints,openai",
     )
     model_name_pandas_ai: str = configfield(
         "model_name_pandas_ai",
         default="ai-mixtral-8x7b-instruct",
         help_txt="The name of the ai catalog model to be used with PandasAI agent",
     )
+    api_key: str = configfield(
+        "api_key",
+        default=os.getenv("OPENAI_API_KEY"),
+        help_txt="API KEY",
+    )
 
 
 @configclass
@@ -104,16 +113,21 @@ class EmbeddingConfig(ConfigWizard):
     model_engine: str = configfield(
         "model_engine",
         default="nvidia-ai-endpoints",
-        help_txt="The server type of the hosted model. Allowed values are hugginface",
+        help_txt="The server type of the hosted model. Allowed values are hugginface,openai",
     )
     dimensions: int = configfield(
         "dimensions",
-        default=1024,
+        default=1536,
         help_txt="The required dimensions of the embedding model. Currently utilized for vector DB indexing.",
     )
     server_url: str = configfield(
         "server_url", default="", help_txt="The url of the server hosting nemo embedding model",
     )
+    api_key: str = configfield(
+        "api_key",
+        default=os.getenv("OPENAI_API_KEY"),
+        help_txt="API KEY",
+    )
 
 
 @configclass
@@ -129,11 +143,16 @@ class RankingConfig(ConfigWizard):
     model_engine: str = configfield(
         "model_engine",
         default="nvidia-ai-endpoints",
-        help_txt="The server type of the hosted model. Allowed values are nvidia-ai-endpoints",
+        help_txt="The server type of the hosted model. Allowed values are nvidia-ai-endpoints,openai",
     )
     server_url: str = configfield(
         "server_url", default="", help_txt="The url of the server hosting nemo Ranking model",
     )
+    api_key: str = configfield(
+        "api_key",
+        default=os.getenv("OPENAI_API_KEY"),
+        help_txt="API KEY",
+    )
 
 
 @configclass

diff --git a/RAG/src/chain_server/requirements.txt b/RAG/src/chain_server/requirements.txt
@@ -10,6 +10,7 @@ llama-index-llms-langchain==0.1.3
 llama-index-embeddings-langchain==0.1.2
 llama-index-vector-stores-milvus==0.1.6
 llama-index-vector-stores-postgres==0.1.5
+langchain-openai>=0.0.2
 pymilvus==2.4.0
 dataclass-wizard==0.22.3
 opencv-python==4.8.0.74