Skip to content

update rag app to work with openai #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 25 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions RAG/examples/basic_rag/llamaindex/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,18 @@ services:
APP_VECTORSTORE_URL: "http://milvus:19530"
# Type of vectordb used to store embedding supported type milvus, pgvector
APP_VECTORSTORE_NAME: "milvus"
APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5}
APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-openai-main/text-embedding-ada-002}
# embedding model engine used for inference, supported type nvidia-ai-endpoints, huggingface
APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-nvidia-ai-endpoints}
APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-openai}
# url on which embedding model is hosted. If "", Nvidia hosted API is used
APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""}
APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-"https://internal.devtest.truefoundry.tech/api/llm/api/inference/openai"}
# url on which llm model is hosted. If "", Nvidia hosted API is used
APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"}
APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-"https://internal.devtest.truefoundry.tech/api/llm/api/inference/openai"}
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"openai-main/gpt-4o"}
# embedding model engine used for inference, supported type nvidia-ai-endpoints
APP_LLM_MODELENGINE: ${APP_LLM_MODELENGINE:-nvidia-ai-endpoints}
NVIDIA_API_KEY: ${NVIDIA_API_KEY}
APP_LLM_MODELENGINE: ${APP_LLM_MODELENGINE:-openai}
# vectorstore collection name to store embeddings
COLLECTION_NAME: ${COLLECTION_NAME:-developer_rag}
COLLECTION_NAME: ${COLLECTION_NAME:-openai-rag}
APP_RETRIEVER_TOPK: 4
APP_RETRIEVER_SCORETHRESHOLD: 0.25
# observability server url
Expand Down Expand Up @@ -81,7 +80,7 @@ services:
APP_SERVERURL: http://chain-server
APP_SERVERPORT: 8081
# model name displayed on UI
APP_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"}
APP_MODELNAME: ${APP_LLM_MODELNAME:-"openai-main/gpt-4o"}
# observability server url
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
Expand All @@ -96,4 +95,4 @@ services:

networks:
default:
name: nvidia-rag
name: openai-rag
22 changes: 11 additions & 11 deletions RAG/examples/local_deploy/docker-compose-nim-ms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
expose:
- "8000"
environment:
NGC_API_KEY: ${NGC_API_KEY}
NGC_API_KEY: ${NGC_API_KEY:-1234567890}
shm_size: 20gb
deploy:
resources:
Expand All @@ -37,7 +37,7 @@ services:
expose:
- "8000"
environment:
NGC_API_KEY: ${NGC_API_KEY}
NGC_API_KEY: ${NGC_API_KEY:-1234567890}
user: "${USERID}"
shm_size: 16GB
deploy:
Expand Down Expand Up @@ -65,22 +65,22 @@ services:
expose:
- "8000"
environment:
NGC_API_KEY: ${NGC_API_KEY}
NGC_API_KEY: ${NGC_API_KEY:-1234567890}
user: "${USERID}"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 10s
timeout: 20s
retries: 100
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['${RANKING_MS_GPU_ID:-0}']
capabilities: [gpu]
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# device_ids: ['${RANKING_MS_GPU_ID:-0}']
# capabilities: [gpu]
profiles: ["nemo-retriever"]

networks:
default:
name: nvidia-rag
name: openai-rag
16 changes: 8 additions & 8 deletions RAG/examples/local_deploy/docker-compose-vectordb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ services:
depends_on:
- "etcd"
- "minio"
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: ["gpu"]
device_ids: ['${VECTORSTORE_GPU_DEVICE_ID:-0}']
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# capabilities: ["gpu"]
# device_ids: ['${VECTORSTORE_GPU_DEVICE_ID:-0}']
profiles: ["nemo-retriever", "milvus", ""]

elasticsearch:
Expand Down Expand Up @@ -119,4 +119,4 @@ services:

networks:
default:
name: nvidia-rag
name: openai-rag
3 changes: 2 additions & 1 deletion RAG/src/chain_server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,5 @@ COPY RAG/src/pandasai /opt/RAG/src/pandasai
COPY RAG/tools /opt/RAG/tools

WORKDIR /opt
ENTRYPOINT ["uvicorn", "RAG.src.chain_server.server:app"]
ENV PYTHONPATH=/opt
ENTRYPOINT ["python3.10", "-m", "uvicorn", "RAG.src.chain_server.server:app", "--host", "0.0.0.0"]
83 changes: 70 additions & 13 deletions RAG/src/chain_server/RAG_Chain_Server_API_Client.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 36,
"id": "263a7a8b",
"metadata": {},
"outputs": [],
Expand All @@ -33,7 +33,35 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 37,
"id": "6970b566",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dockerfile\n",
"RAG_Chain_Server_API_Client.ipynb\n",
"TRUEF-SQL and Plot Agent Workflow API-100325-165032.pdf\n",
"__init__.py\n",
"base.py\n",
"configuration.py\n",
"configuration_wizard.py\n",
"requirements.txt\n",
"server.py\n",
"tracing.py\n",
"utils.py\n"
]
}
],
"source": [
"!ls"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "c2244b8c",
"metadata": {},
"outputs": [],
Expand All @@ -50,8 +78,10 @@
" files = {\n",
" 'file': (file_path, open(file_path, 'rb'), mime_type)\n",
" }\n",
"\n",
" response = requests.post(url, headers=headers, files=files)\n",
"\n",
"\n",
" return response.text\n",
"\n",
"def upload_pdf_files(folder_path, upload_url, num_files):\n",
Expand All @@ -69,16 +99,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 80,
"id": "4f5c99ac",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"message\":\"File uploaded successfully\"}\n",
"--- 4.276328086853027 seconds ---\n"
]
}
],
"source": [
"import time\n",
"\n",
"start_time = time.time()\n",
"NUM_DOCS_TO_UPLOAD=100\n",
"upload_pdf_files(\"dataset\", \"http://chain-server:8081/documents\", NUM_DOCS_TO_UPLOAD)\n",
"upload_pdf_files(\"./\", \"http://localhost:8081/documents\", NUM_DOCS_TO_UPLOAD)\n",
"print(f\"--- {time.time() - start_time} seconds ---\")"
]
},
Expand All @@ -93,10 +132,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 78,
"id": "4eb862fd",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The NVIDIA Grace Superchip contains 144 Arm cores. It is designed to deliver high performance for data center and AI workloads by integrating these cores with high-bandwidth memory and other advanced features.--- 1.9746460914611816 seconds ---\n"
]
}
],
"source": [
"import time\n",
"import json\n",
Expand All @@ -112,7 +159,7 @@
" \"max_tokens\": 256\n",
"}\n",
"\n",
"url = \"http://chain-server:8081/generate\"\n",
"url = \"http://localhost:8081/generate\"\n",
"\n",
"start_time = time.time()\n",
"with requests.post(url, stream=True, json=data) as req:\n",
Expand All @@ -139,10 +186,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 74,
"id": "e904a658",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"I don't know.\n",
"--- Generated 0 tokens in 2.855475902557373 seconds ---\n",
"--- 0.0 tokens/sec\n"
]
}
],
"source": [
"data = {\n",
" \"messages\": [\n",
Expand All @@ -155,7 +212,7 @@
" \"max_tokens\": 50\n",
"}\n",
"\n",
"url = \"http://chain-server:8081/generate\"\n",
"url = \"http://localhost:8081/generate\"\n",
"\n",
"start_time = time.time()\n",
"tokens_generated = 0\n",
Expand Down Expand Up @@ -188,7 +245,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "base",
"language": "python",
"name": "python3"
},
Expand All @@ -202,7 +259,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.12.9"
}
},
"nbformat": 4,
Expand Down
31 changes: 25 additions & 6 deletions RAG/src/chain_server/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@
# limitations under the License.

"""The definition of the application configuration."""
from RAG.src.chain_server.configuration_wizard import ConfigWizard, configclass, configfield

from RAG.src.chain_server.configuration_wizard import (
ConfigWizard,
configclass,
configfield,
)
import os

@configclass
class VectorStoreConfig(ConfigWizard):
Expand Down Expand Up @@ -61,13 +65,18 @@ class LLMConfig(ConfigWizard):
model_engine: str = configfield(
"model_engine",
default="nvidia-ai-endpoints",
help_txt="The server type of the hosted model. Allowed values are nvidia-ai-endpoints",
help_txt="The server type of the hosted model. Allowed values are nvidia-ai-endpoints,openai",
)
model_name_pandas_ai: str = configfield(
"model_name_pandas_ai",
default="ai-mixtral-8x7b-instruct",
help_txt="The name of the ai catalog model to be used with PandasAI agent",
)
api_key: str = configfield(
"api_key",
default=os.getenv("OPENAI_API_KEY"),
help_txt="API KEY",
)


@configclass
Expand Down Expand Up @@ -104,16 +113,21 @@ class EmbeddingConfig(ConfigWizard):
model_engine: str = configfield(
"model_engine",
default="nvidia-ai-endpoints",
help_txt="The server type of the hosted model. Allowed values are hugginface",
help_txt="The server type of the hosted model. Allowed values are hugginface,openai",
)
dimensions: int = configfield(
"dimensions",
default=1024,
default=1536,
help_txt="The required dimensions of the embedding model. Currently utilized for vector DB indexing.",
)
server_url: str = configfield(
"server_url", default="", help_txt="The url of the server hosting nemo embedding model",
)
api_key: str = configfield(
"api_key",
default=os.getenv("OPENAI_API_KEY"),
help_txt="API KEY",
)


@configclass
Expand All @@ -129,11 +143,16 @@ class RankingConfig(ConfigWizard):
model_engine: str = configfield(
"model_engine",
default="nvidia-ai-endpoints",
help_txt="The server type of the hosted model. Allowed values are nvidia-ai-endpoints",
help_txt="The server type of the hosted model. Allowed values are nvidia-ai-endpoints,openai",
)
server_url: str = configfield(
"server_url", default="", help_txt="The url of the server hosting nemo Ranking model",
)
api_key: str = configfield(
"api_key",
default=os.getenv("OPENAI_API_KEY"),
help_txt="API KEY",
)


@configclass
Expand Down
1 change: 1 addition & 0 deletions RAG/src/chain_server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ llama-index-llms-langchain==0.1.3
llama-index-embeddings-langchain==0.1.2
llama-index-vector-stores-milvus==0.1.6
llama-index-vector-stores-postgres==0.1.5
langchain-openai>=0.0.2
pymilvus==2.4.0
dataclass-wizard==0.22.3
opencv-python==4.8.0.74
Expand Down
Loading