Skip to content

SD2-555-video-qa-feature-with-google-gemini-and-twelvlabs #269

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 92 additions & 1 deletion edenai_apis/apis/google/google_video_api.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
from pathlib import Path
from time import time
from typing import List
from typing import List, Dict, Any
import requests
import json
import time

from google.cloud import videointelligence

from edenai_apis.apis.google.google_helpers import (
GoogleVideoFeatures,
google_video_get_job,
score_to_content,
calculate_usage_tokens,
)
from edenai_apis.features.video import (
ContentNSFW,
ExplicitContentDetectionAsyncDataClass,
QuestionAnswerDataClass,
)
from edenai_apis.features.video.face_detection_async.face_detection_async_dataclass import (
FaceAttributes,
@@ -67,6 +72,7 @@
AsyncLaunchJobResponseType,
AsyncPendingResponseType,
AsyncResponseType,
ResponseType,
)


@@ -90,6 +96,41 @@ def google_upload_video(

return gcs_uri

def _check_file_status(self, file_uri: str, api_key: str) -> Dict[str, Any]:
url = f"{file_uri}?key={api_key}"
response = requests.get(url)
if response.status_code != 200:
raise ProviderException(message=response.text, code=response.status_code)
try:
response_json = response.json()
except json.JSONDecodeError as exc:
raise ProviderException(
"An error occurred while parsing the response."
) from exc
return response_json

def _upload_and_process_file(self, file: str, api_key: str) -> Dict[str, Any]:
upload_url = f"https://generativelanguage.googleapis.com/upload/v1beta/files?key={api_key}"

with open(file, "rb") as video_file:
file = {"file": video_file}
response = requests.post(upload_url, files=file)

if response.status_code != 200:
raise ProviderException(message=response.text, code=response.status_code)
try:
file_data = response.json()["file"]
except json.JSONDecodeError as exc:
raise ProviderException(
"An error occurred while parsing the response."
) from exc

while file_data["state"] == "PROCESSING":
time.sleep(5)
file_data = self._check_file_status(file_data["uri"], api_key)

return file_data

# Launch label detection job
def video__label_detection_async__launch_job(
self, file: str, file_url: str = ""
@@ -693,3 +734,53 @@ def video__shot_change_detection_async__get_job_result(
return AsyncPendingResponseType[ShotChangeDetectionAsyncDataClass](
status="pending", provider_job_id=provider_job_id
)

def video__question_answer(
self,
text: str,
file: str,
file_url: str = "",
temperature: float = 0,
model: str = None,
) -> QuestionAnswerDataClass:
api_key = self.api_settings.get("genai_api_key")
file_data = self._upload_and_process_file(file, api_key)
base_url = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
url = base_url.format(model=model, api_key=api_key)
payload = {
"contents": [
{
"parts": [
{"text": text},
{
"file_data": {
"mime_type": file_data["mimeType"],
"file_uri": file_data["uri"],
}
},
]
}
],
"generationConfig": {"candidateCount": 1, "temperature": temperature},
}
response = requests.post(url, json=payload)
try:
original_response = response.json()
except json.JSONDecodeError as exc:
raise ProviderException(
"An error occurred while parsing the response."
) from exc

if response.status_code != 200:
raise ProviderException(
message=original_response["error"]["message"],
code=response.status_code,
)
generated_text = original_response["candidates"][0]["content"]["parts"][0][
"text"
]
calculate_usage_tokens(original_response=original_response)
return ResponseType[QuestionAnswerDataClass](
original_response=original_response,
standardized_response=QuestionAnswerDataClass(answer=generated_text),
)
16 changes: 16 additions & 0 deletions edenai_apis/apis/google/info.json
Original file line number Diff line number Diff line change
@@ -1319,6 +1319,22 @@
]
},
"version": "Video intelligence v1"
},
"question_answer": {
"constraints": {
"file_types": [
"video/mp4",
"video/mpeg",
"video/mov",
"video/avi",
"video/x-flx",
"video/mpg",
"video/webm",
"video/wmv",
"video/3gpp"
]
},
"version": "v1Beta"
}
},
"image": {
49 changes: 49 additions & 0 deletions edenai_apis/apis/google/outputs/video/question_answer_output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"original_response": {
"candidates": [
{
"content": {
"parts": [
{
"text": "The video is about how to create AI hyperlapse videos. The video shows a man talking about AI hyperlapse videos and how they are becoming popular on YouTube and Instagram. He then shows how to create these videos using a phone. "
}
],
"role": "model"
},
"finishReason": "STOP",
"index": 0,
"safetyRatings": [
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"probability": "NEGLIGIBLE"
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"probability": "NEGLIGIBLE"
},
{
"category": "HARM_CATEGORY_HARASSMENT",
"probability": "NEGLIGIBLE"
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"probability": "NEGLIGIBLE"
}
]
}
],
"usageMetadata": {
"promptTokenCount": 3838,
"candidatesTokenCount": 46,
"totalTokenCount": 3884
},
"usage": {
"prompt_tokens": 3838,
"completion_tokens": 46,
"total_tokens": 3884
}
},
"standardized_response": {
"answer": "The video is about how to create AI hyperlapse videos. The video shows a man talking about AI hyperlapse videos and how they are becoming popular on YouTube and Instagram. He then shows how to create these videos using a phone. "
}
}
2 changes: 2 additions & 0 deletions edenai_apis/features/video/__init__.py
Original file line number Diff line number Diff line change
@@ -51,3 +51,5 @@
VideoTextFrames,
text_detection_async_arguments,
)

from .question_answer import QuestionAnswerDataClass, question_answer_arguments
2 changes: 2 additions & 0 deletions edenai_apis/features/video/question_answer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .question_answer_args import question_answer_arguments
from .question_answer_dataclass import QuestionAnswerDataClass
32 changes: 32 additions & 0 deletions edenai_apis/features/video/question_answer/question_answer_args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import mimetypes
import os
from typing import Dict

from pydub.utils import mediainfo

from edenai_apis.utils.files import FileInfo, FileWrapper

feature_path = os.path.dirname(os.path.dirname(__file__))

data_path = os.path.join(feature_path, "data")

video_path = f"{data_path}/shot.mp4"

mime_type = mimetypes.guess_type(video_path)[0]
file_info = FileInfo(
os.stat(video_path).st_size,
mime_type,
[extension[1:] for extension in mimetypes.guess_all_extensions(mime_type)],
mediainfo(video_path).get("sample_rate", "44100"),
mediainfo(video_path).get("channels", "1"),
)
file_wrapper = FileWrapper(video_path, "", file_info)


def question_answer_arguments(provider_name: str) -> Dict:
return {
"file": file_wrapper,
"text": "describe this video",
"temperature": 0.0,
"settings": {"google": "gemini-1.5-flash"},
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from pydantic import BaseModel


class QuestionAnswerDataClass(BaseModel):
answer: str
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"answer": "The video is about how to create AI hyperlapse videos. The video shows a man talking about AI hyperlapse videos and how they are becoming popular on YouTube and Instagram. He then shows how to create these videos using a phone. "
}
15 changes: 14 additions & 1 deletion edenai_apis/features/video/video_interface.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import ABC, abstractmethod
from io import BufferedReader
from typing import Optional

from edenai_apis.features.video import (
ExplicitContentDetectionAsyncDataClass,
@@ -9,6 +10,7 @@
ObjectTrackingAsyncDataClass,
PersonTrackingAsyncDataClass,
TextDetectionAsyncDataClass,
QuestionAnswerDataClass,
)
from edenai_apis.utils.types import AsyncBaseResponseType, AsyncLaunchJobResponseType

@@ -181,4 +183,15 @@ def video__shot_change_detection_async__launch_job(
def video__shot_change_detection_async__get_job_result(
self, provider_job_id: str
) -> AsyncBaseResponseType:
raise NotImplementedError
raise NotImplementedError

@abstractmethod
def video__question_answer(
self,
text: str,
file: str,
file_url: str = "",
temperature: float = 0.0,
model: Optional[str] = None,
) -> QuestionAnswerDataClass:
raise NotImplementedError