Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jacques/method list datasets #1189

Merged
merged 6 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions sdks/python/src/opik/api_objects/dataset/rest_operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from typing import List
from opik.rest_api import OpikApi
from opik import exceptions
from . import dataset
from .. import experiment
from ...rest_api.core.api_error import ApiError


def get_datasets(
rest_client: OpikApi, max_results: int = 1000, sync_items: bool = True
) -> List[dataset.Dataset]:
page_size = 100
datasets: List[dataset.Dataset] = []

page = 1
while len(datasets) < max_results:
page_datasets = rest_client.datasets.find_datasets(
page=page,
size=page_size,
)

if len(page_datasets.content) == 0:
break

for dataset_fern in page_datasets.content[: (max_results - len(datasets))]:
dataset_ = dataset.Dataset(
name=dataset_fern.name,
description=dataset_fern.description,
rest_client=rest_client,
)

if sync_items:
dataset_.__internal_api__sync_hashes__()

datasets.append(dataset_)

page += 1

return datasets


def get_dataset_id(rest_client: OpikApi, dataset_name: str) -> str:
try:
dataset_id = rest_client.datasets.get_dataset_by_identifier(
dataset_name=dataset_name
).id
except ApiError as e:
if e.status_code == 404:
raise exceptions.DatasetNotFound(
f"Dataset with the name {dataset_name} not found."
) from e
raise

return dataset_id


def get_dataset_experiments(
rest_client: OpikApi, dataset_id: str, max_results: int = 1000
) -> List[experiment.Experiment]:
page_size = 100
experiments: List[experiment.Experiment] = []

page = 1
while len(experiments) < max_results:
page_experiments = rest_client.experiments.find_experiments(
page=page,
size=page_size,
dataset_id=dataset_id,
)

if len(page_experiments.content) == 0:
break

for experiment_ in page_experiments.content[: max_results - len(experiments)]:
experiments.append(
experiment.Experiment(
id=experiment_.id,
name=experiment_.name,
dataset_name=experiment_.dataset_name,
rest_client=rest_client,
# TODO: add prompt if exists
)
)

page += 1

return experiments
22 changes: 13 additions & 9 deletions sdks/python/src/opik/api_objects/experiment/experiment_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,19 @@ def from_rest_experiment_item_compare(
cls,
value: experiment_item_compare.ExperimentItemCompare,
) -> "ExperimentItemContent":
feedback_scores: List[FeedbackScoreDict] = [
{
"category_name": rest_feedback_score.category_name,
"name": rest_feedback_score.name,
"reason": rest_feedback_score.reason,
"value": rest_feedback_score.value,
}
for rest_feedback_score in value.feedback_scores
]
if value.feedback_scores is None:
feedback_scores: List[FeedbackScoreDict] = []
jverre marked this conversation as resolved.
Show resolved Hide resolved
else:
feedback_scores = [
{
"category_name": rest_feedback_score.category_name,
"name": rest_feedback_score.name,
"reason": rest_feedback_score.reason,
"value": rest_feedback_score.value,
}
for rest_feedback_score in value.feedback_scores
]

return ExperimentItemContent(
id=value.id,
trace_id=value.trace_id,
Expand Down
23 changes: 1 addition & 22 deletions sdks/python/src/opik/api_objects/experiment/helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import logging
from opik import jsonable_encoder
from typing import Any, Dict, List, Mapping, Optional, Tuple

from opik import exceptions, jsonable_encoder
from opik.rest_api import OpikApi
from opik.rest_api.types import experiment_public
from .. import prompt

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -49,24 +46,6 @@ def build_metadata_and_prompt_versions(
return metadata, prompt_versions


def get_experiment_data_by_name(
rest_client: OpikApi, name: str
) -> experiment_public.ExperimentPublic:
page = 0

while True:
page += 1
experiment_page_public = rest_client.experiments.find_experiments(name=name)
if len(experiment_page_public.content) == 0:
raise exceptions.ExperimentNotFound(
f"Experiment with the name {name} not found."
)

for experiment in experiment_page_public.content:
if experiment.name == name:
return experiment


def handle_prompt_args(
prompt: Optional[prompt.Prompt] = None,
prompts: Optional[List[prompt.Prompt]] = None,
Expand Down
21 changes: 21 additions & 0 deletions sdks/python/src/opik/api_objects/experiment/rest_operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from opik.rest_api import OpikApi
from opik.rest_api.types import experiment_public
from opik import exceptions


def get_experiment_data_by_name(
rest_client: OpikApi, name: str
) -> experiment_public.ExperimentPublic:
page = 0

while True:
page += 1
experiment_page_public = rest_client.experiments.find_experiments(name=name)
if len(experiment_page_public.content) == 0:
raise exceptions.ExperimentNotFound(
f"Experiment with the name {name} not found."
)

for experiment in experiment_page_public.content:
if experiment.name == name:
return experiment
50 changes: 49 additions & 1 deletion sdks/python/src/opik/api_objects/opik_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
validation_helpers,
)
from .experiment import helpers as experiment_helpers
from .experiment import rest_operations as experiment_rest_operations
from .dataset import rest_operations as dataset_rest_operations
from ..message_processing import streamer_constructors, messages
from ..message_processing.batching import sequence_splitter

Expand Down Expand Up @@ -437,6 +439,52 @@ def get_dataset(self, name: str) -> dataset.Dataset:

return dataset_

def get_datasets(
self,
max_results: int = 100,
sync_items: bool = True,
) -> List[dataset.Dataset]:
"""
Returns all datasets up to the specified limit.

Args:
max_results: The maximum number of datasets to return.
sync_items: Whether to sync the hashes of the dataset items. This is used to deduplicate items when fetching the dataset but it can be an expensive operation.

Returns:
List[dataset.Dataset]: A list of dataset objects that match the filter string.
"""
datasets = dataset_rest_operations.get_datasets(
self._rest_client, max_results, sync_items
)

return datasets

def get_dataset_experiments(
jverre marked this conversation as resolved.
Show resolved Hide resolved
self,
dataset_name: str,
max_results: int = 100,
) -> List[experiment.Experiment]:
"""
Returns all experiments up to the specified limit.

Args:
dataset_name: The name of the dataset
max_results: The maximum number of experiments to return.

Returns:
List[experiment.Experiment]: A list of experiment objects.
"""
dataset_id = dataset_rest_operations.get_dataset_id(
self._rest_client, dataset_name
)

experiments = dataset_rest_operations.get_dataset_experiments(
self._rest_client, dataset_id, max_results
)

return experiments

def delete_dataset(self, name: str) -> None:
"""
Delete dataset by name
Expand Down Expand Up @@ -552,7 +600,7 @@ def get_experiment_by_name(self, name: str) -> experiment.Experiment:
Returns:
experiment.Experiment: the API object for an existing experiment.
"""
experiment_public = experiment_helpers.get_experiment_data_by_name(
experiment_public = experiment_rest_operations.get_experiment_data_by_name(
rest_client=self._rest_client, name=name
)

Expand Down
4 changes: 4 additions & 0 deletions sdks/python/src/opik/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,7 @@ def __str__(self) -> str:

class ExperimentNotFound(OpikException):
pass


class DatasetNotFound(OpikException):
pass
35 changes: 35 additions & 0 deletions sdks/python/tests/e2e/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,3 +531,38 @@ def test_experiment__get_experiment_by_name__experiment_not_found__ExperimentNot
):
with pytest.raises(exceptions.ExperimentNotFound):
opik_client.get_experiment_by_id("not-existing-name")


def test_experiment__get_experiment_items__no_feedback_scores(
opik_client: opik.Opik, dataset_name: str, experiment_name: str
):
dataset = opik_client.create_dataset(dataset_name)

dataset.insert(
[
{
"input": {"question": "What is the of capital of France?"},
"expected_model_output": {"output": "Paris"},
},
]
)

def task(item: Dict[str, Any]) -> Dict[str, Any]:
return {
"output": "Paris",
}

opik.evaluate(
dataset=dataset,
task=task,
scoring_metrics=[],
experiment_name=experiment_name,
)

opik.flush_tracker()

experiment = opik_client.get_experiment_by_name(experiment_name)
items = experiment.get_items()

assert len(items) == 1
assert items[0].feedback_scores == []