edenai · juandavidcruzgomez · Sep 23, 2024 · Sep 20, 2024 · Sep 20, 2024 · Sep 20, 2024
diff --git a/edenai_apis/apis/google/google_video_api.py b/edenai_apis/apis/google/google_video_api.py
@@ -1,17 +1,22 @@
 from pathlib import Path
 from time import time
-from typing import List
+from typing import List, Dict, Any
+import requests
+import json
+import time
 
 from google.cloud import videointelligence
 
 from edenai_apis.apis.google.google_helpers import (
     GoogleVideoFeatures,
     google_video_get_job,
     score_to_content,
+    calculate_usage_tokens,
 )
 from edenai_apis.features.video import (
     ContentNSFW,
     ExplicitContentDetectionAsyncDataClass,
+    QuestionAnswerDataClass,
 )
 from edenai_apis.features.video.face_detection_async.face_detection_async_dataclass import (
     FaceAttributes,
@@ -67,6 +72,7 @@
     AsyncLaunchJobResponseType,
     AsyncPendingResponseType,
     AsyncResponseType,
+    ResponseType,
 )
 
 
@@ -90,6 +96,41 @@ def google_upload_video(
 
         return gcs_uri
 
+    def _check_file_status(self, file_uri: str, api_key: str) -> Dict[str, Any]:
+        url = f"{file_uri}?key={api_key}"
+        response = requests.get(url)
+        if response.status_code != 200:
+            raise ProviderException(message=response.text, code=response.status_code)
+        try:
+            response_json = response.json()
+        except json.JSONDecodeError as exc:
+            raise ProviderException(
+                "An error occurred while parsing the response."
+            ) from exc
+        return response_json
+
+    def _upload_and_process_file(self, file: str, api_key: str) -> Dict[str, Any]:
+        upload_url = f"https://generativelanguage.googleapis.com/upload/v1beta/files?key={api_key}"
+
+        with open(file, "rb") as video_file:
+            file = {"file": video_file}
+            response = requests.post(upload_url, files=file)
+
+        if response.status_code != 200:
+            raise ProviderException(message=response.text, code=response.status_code)
+        try:
+            file_data = response.json()["file"]
+        except json.JSONDecodeError as exc:
+            raise ProviderException(
+                "An error occurred while parsing the response."
+            ) from exc
+
+        while file_data["state"] == "PROCESSING":
+            time.sleep(5)
+            file_data = self._check_file_status(file_data["uri"], api_key)
+
+        return file_data
+
     # Launch label detection job
     def video__label_detection_async__launch_job(
         self, file: str, file_url: str = ""
@@ -693,3 +734,53 @@ def video__shot_change_detection_async__get_job_result(
         return AsyncPendingResponseType[ShotChangeDetectionAsyncDataClass](
             status="pending", provider_job_id=provider_job_id
         )
+
+    def video__question_answer(
+        self,
+        text: str,
+        file: str,
+        file_url: str = "",
+        temperature: float = 0,
+        model: str = None,
+    ) -> QuestionAnswerDataClass:
+        api_key = self.api_settings.get("genai_api_key")
+        file_data = self._upload_and_process_file(file, api_key)
+        base_url = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
+        url = base_url.format(model=model, api_key=api_key)
+        payload = {
+            "contents": [
+                {
+                    "parts": [
+                        {"text": text},
+                        {
+                            "file_data": {
+                                "mime_type": file_data["mimeType"],
+                                "file_uri": file_data["uri"],
+                            }
+                        },
+                    ]
+                }
+            ],
+            "generationConfig": {"candidateCount": 1, "temperature": temperature},
+        }
+        response = requests.post(url, json=payload)
+        try:
+            original_response = response.json()
+        except json.JSONDecodeError as exc:
+            raise ProviderException(
+                "An error occurred while parsing the response."
+            ) from exc
+
+        if response.status_code != 200:
+            raise ProviderException(
+                message=original_response["error"]["message"],
+                code=response.status_code,
+            )
+        generated_text = original_response["candidates"][0]["content"]["parts"][0][
+            "text"
+        ]
+        calculate_usage_tokens(original_response=original_response)
+        return ResponseType[QuestionAnswerDataClass](
+            original_response=original_response,
+            standardized_response=QuestionAnswerDataClass(answer=generated_text),
+        )
diff --git a/edenai_apis/apis/google/info.json b/edenai_apis/apis/google/info.json
@@ -1319,6 +1319,22 @@
         ]
       },
       "version": "Video intelligence v1"
+    },
+    "question_answer": {
+      "constraints": {
+        "file_types": [
+          "video/mp4",
+          "video/mpeg",
+          "video/mov",
+          "video/avi",
+          "video/x-flx",
+          "video/mpg",
+          "video/webm",
+          "video/wmv",
+          "video/3gpp"
+        ]
+      },
+      "version": "v1Beta"
     }
   },
   "image": {

diff --git a/edenai_apis/apis/google/outputs/video/question_answer_output.json b/edenai_apis/apis/google/outputs/video/question_answer_output.json
@@ -0,0 +1,49 @@
+{
+  "original_response": {
+    "candidates": [
+      {
+        "content": {
+          "parts": [
+            {
+              "text": "The video is about how to create AI hyperlapse videos. The video shows a man talking about AI hyperlapse videos and how they are becoming popular on YouTube and Instagram. He then shows how to create these videos using a phone. "
+            }
+          ],
+          "role": "model"
+        },
+        "finishReason": "STOP",
+        "index": 0,
+        "safetyRatings": [
+          {
+            "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+            "probability": "NEGLIGIBLE"
+          },
+          {
+            "category": "HARM_CATEGORY_HATE_SPEECH",
+            "probability": "NEGLIGIBLE"
+          },
+          {
+            "category": "HARM_CATEGORY_HARASSMENT",
+            "probability": "NEGLIGIBLE"
+          },
+          {
+            "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+            "probability": "NEGLIGIBLE"
+          }
+        ]
+      }
+    ],
+    "usageMetadata": {
+      "promptTokenCount": 3838,
+      "candidatesTokenCount": 46,
+      "totalTokenCount": 3884
+    },
+    "usage": {
+      "prompt_tokens": 3838,
+      "completion_tokens": 46,
+      "total_tokens": 3884
+    }
+  },
+  "standardized_response": {
+    "answer": "The video is about how to create AI hyperlapse videos. The video shows a man talking about AI hyperlapse videos and how they are becoming popular on YouTube and Instagram. He then shows how to create these videos using a phone. "
+  }
+}
diff --git a/edenai_apis/features/video/__init__.py b/edenai_apis/features/video/__init__.py
@@ -51,3 +51,5 @@
     VideoTextFrames,
     text_detection_async_arguments,
 )
+
+from .question_answer import QuestionAnswerDataClass, question_answer_arguments
diff --git a/edenai_apis/features/video/question_answer/__init__.py b/edenai_apis/features/video/question_answer/__init__.py
@@ -0,0 +1,2 @@
+from .question_answer_args import question_answer_arguments
+from .question_answer_dataclass import QuestionAnswerDataClass
diff --git a/edenai_apis/features/video/question_answer/question_answer_args.py b/edenai_apis/features/video/question_answer/question_answer_args.py
@@ -0,0 +1,32 @@
+import mimetypes
+import os
+from typing import Dict
+
+from pydub.utils import mediainfo
+
+from edenai_apis.utils.files import FileInfo, FileWrapper
+
+feature_path = os.path.dirname(os.path.dirname(__file__))
+
+data_path = os.path.join(feature_path, "data")
+
+video_path = f"{data_path}/shot.mp4"
+
+mime_type = mimetypes.guess_type(video_path)[0]
+file_info = FileInfo(
+    os.stat(video_path).st_size,
+    mime_type,
+    [extension[1:] for extension in mimetypes.guess_all_extensions(mime_type)],
+    mediainfo(video_path).get("sample_rate", "44100"),
+    mediainfo(video_path).get("channels", "1"),
+)
+file_wrapper = FileWrapper(video_path, "", file_info)
+
+
+def question_answer_arguments(provider_name: str) -> Dict:
+    return {
+        "file": file_wrapper,
+        "text": "describe this video",
+        "temperature": 0.0,
+        "settings": {"google": "gemini-1.5-flash"},
+    }
diff --git a/edenai_apis/features/video/question_answer/question_answer_dataclass.py b/edenai_apis/features/video/question_answer/question_answer_dataclass.py
@@ -0,0 +1,5 @@
+from pydantic import BaseModel
+
+
+class QuestionAnswerDataClass(BaseModel):
+    answer: str
diff --git a/edenai_apis/features/video/question_answer/question_answer_response.json b/edenai_apis/features/video/question_answer/question_answer_response.json
@@ -0,0 +1,3 @@
+{
+    "answer": "The video is about how to create AI hyperlapse videos. The video shows a man talking about AI hyperlapse videos and how they are becoming popular on YouTube and Instagram. He then shows how to create these videos using a phone. "
+}
diff --git a/edenai_apis/features/video/video_interface.py b/edenai_apis/features/video/video_interface.py
@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
 from io import BufferedReader
+from typing import Optional
 
 from edenai_apis.features.video import (
     ExplicitContentDetectionAsyncDataClass,
@@ -9,6 +10,7 @@
     ObjectTrackingAsyncDataClass,
     PersonTrackingAsyncDataClass,
     TextDetectionAsyncDataClass,
+    QuestionAnswerDataClass,
 )
 from edenai_apis.utils.types import AsyncBaseResponseType, AsyncLaunchJobResponseType
 
@@ -181,4 +183,15 @@ def video__shot_change_detection_async__launch_job(
     def video__shot_change_detection_async__get_job_result(
         self, provider_job_id: str
     ) -> AsyncBaseResponseType:
-        raise NotImplementedError
+        raise NotImplementedError
+
+    @abstractmethod
+    def video__question_answer(
+        self,
+        text: str,
+        file: str,
+        file_url: str = "",
+        temperature: float = 0.0,
+        model: Optional[str] = None,
+    ) -> QuestionAnswerDataClass:
+        raise NotImplementedError
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .question_answer_args import question_answer_arguments
		from .question_answer_dataclass import QuestionAnswerDataClass