Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[azure][trace] Check trace Cosmos status and log warnings if not ready #3200

Merged
merged 29 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
18efc0c
add new cosmos db setup api
zhengfeiwang May 9, 2024
2b3eb8d
switch validate func
zhengfeiwang May 9, 2024
9b92982
fix: add missing header
zhengfeiwang May 9, 2024
5957e16
refine process
zhengfeiwang May 9, 2024
156a312
remove a test
zhengfeiwang May 9, 2024
d83fbca
update CHANGELOG
zhengfeiwang May 9, 2024
e1c2251
add get cosmos metadata
zhengfeiwang May 10, 2024
0732280
update the method to check if cosmos is available
zhengfeiwang May 10, 2024
5e1977e
Merge branch 'main' into zhengfei/feature/honor-service-cosmos-status
zhengfeiwang May 10, 2024
ab794b6
Merge branch 'main' into zhengfei/feature/honor-service-cosmos-status
zhengfeiwang May 13, 2024
bace48f
add util
zhengfeiwang May 13, 2024
e44c7cb
check disable status first
zhengfeiwang May 13, 2024
e29761b
add deprecate comment
zhengfeiwang May 13, 2024
71d89c1
move to azure.entities
zhengfeiwang May 13, 2024
87574d4
print warning
zhengfeiwang May 13, 2024
d0b9f16
fix import
zhengfeiwang May 13, 2024
1b0ffa1
honor status when load to cloud
zhengfeiwang May 13, 2024
f25d3a2
sanitize trace session metadata response
zhengfeiwang May 13, 2024
a05925e
update recording for test
zhengfeiwang May 13, 2024
db44f8b
update recording
zhengfeiwang May 13, 2024
7b66a31
update recording for perf test
zhengfeiwang May 13, 2024
32ef77c
update recording
zhengfeiwang May 13, 2024
e9a6838
update recording
zhengfeiwang May 13, 2024
8429add
update recording
zhengfeiwang May 13, 2024
66041a7
update recording
zhengfeiwang May 13, 2024
1397612
fix: mock get cosmos
zhengfeiwang May 13, 2024
25b50af
update recording
zhengfeiwang May 13, 2024
65395e3
update CHANGELOG
zhengfeiwang May 13, 2024
aab5104
refine condition
zhengfeiwang May 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/promptflow-azure/promptflow/azure/_constants/_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,22 @@
COSMOS_DB_SETUP_POLL_INTERVAL_SECOND = 30
COSMOS_DB_SETUP_POLL_PRINT_INTERVAL_SECOND = 30
COSMOS_DB_SETUP_RESOURCE_TYPE = "HOBO"


class CosmosConfiguration:
NONE = "None"
READ_DISABLED = "ReadDisabled"
WRITE_DISABLED = "WriteDisabled"
DISABLED = "Disabled"
DIAGNOSTIC_DISABLED = "DiagnosticDisabled"
DATA_CLEANED = "DataCleaned"
ACCOUNT_DELETED = "AccountDeleted"


class CosmosStatus:
NOT_EXISTS = "NotExists"
INITIALIZING = "Initializing"
INITIALIZED = "Initialized"
DELETING = "Deleting"
DELETED = "Deleted"
NOT_AVAILABLE = "NotAvailable"
3 changes: 3 additions & 0 deletions src/promptflow-azure/promptflow/azure/_contracts/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
33 changes: 33 additions & 0 deletions src/promptflow-azure/promptflow/azure/_contracts/_trace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------

from dataclasses import dataclass

from promptflow.azure._constants._trace import CosmosConfiguration, CosmosStatus
from promptflow.azure._restclient.flow.models import TraceCosmosMetaDto


@dataclass
class CosmosMetadata:
entity_id: str
configuration: str
status: str
database_name: str
resource_type: str

@staticmethod
def _from_rest_object(obj: TraceCosmosMetaDto) -> "CosmosMetadata":
return CosmosMetadata(
entity_id=obj.entity_id,
configuration=obj.trace_cosmos_configuration,
status=obj.trace_cosmos_status,
database_name=obj.database_name,
resource_type=obj.resource_type,
)

def is_disabled(self) -> bool:
return self.configuration == CosmosConfiguration.DISABLED

def is_ready(self) -> bool:
return not self.is_disabled() and self.status == CosmosStatus.INITIALIZED
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
COSMOS_DB_SETUP_POLL_PRINT_INTERVAL_SECOND,
COSMOS_DB_SETUP_POLL_TIMEOUT_SECOND,
)
from promptflow.azure._constants._trace import (
COSMOS_DB_SETUP_POLL_INTERVAL_SECOND,
COSMOS_DB_SETUP_POLL_PRINT_INTERVAL_SECOND,
COSMOS_DB_SETUP_POLL_TIMEOUT_SECOND,
)
from promptflow.azure._restclient.flow import AzureMachineLearningDesignerServiceClient
from promptflow.azure._utils.general import get_authorization, get_arm_token, get_aml_token
from promptflow.exceptions import UserErrorException, PromptflowException, SystemErrorException
Expand Down Expand Up @@ -763,6 +768,22 @@ def init_workspace_cosmos(
**kwargs,
)

def get_workspace_cosmos_metadata(
self,
subscription_id: str,
resource_group_name: str,
workspace_name: str,
**kwargs,
):
"""Get Cosmos DB metadata."""
return self.caller.trace_sessions.get_trace_session_metadata_async(
subscription_id=subscription_id,
resource_group_name=resource_group_name,
workspace_name=workspace_name,
headers=self._get_headers(),
**kwargs,
)

@_request_wrapper()
def setup_workspace_cosmos(self, subscription_id, resource_group_name, workspace_name, body, **kwargs):
"""Setup Cosmos DB for workspace/project."""
Expand Down
18 changes: 11 additions & 7 deletions src/promptflow-azure/promptflow/azure/_utils/_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@
from azure.core.exceptions import ResourceNotFoundError
from azure.identity import AzureCliCredential

from promptflow._constants import AzureWorkspaceKind, CosmosDBContainerName
from promptflow._constants import AzureWorkspaceKind
from promptflow._sdk._utilities.general_utils import extract_workspace_triad_from_trace_provider
from promptflow._utils.logger_utils import get_cli_sdk_logger
from promptflow.azure import PFClient
from promptflow.azure._constants._trace import COSMOS_DB_SETUP_RESOURCE_TYPE
from promptflow.azure._restclient.flow_service_caller import FlowRequestException
from promptflow.exceptions import ErrorTarget, UserErrorException

_logger = get_cli_sdk_logger()
Expand Down Expand Up @@ -61,13 +60,16 @@ def validate_trace_destination(value: str) -> None:
_logger.debug("Resource type is valid.")

# the workspace Cosmos DB is initialized
# try to retrieve the token from PFS; if failed, call PFS init API and start polling
# if not, call PFS setup API and start polling
_logger.debug("Validating workspace Cosmos DB is initialized...")
pf_client = PFClient(ml_client=ml_client)
try:
pf_client._traces._get_cosmos_db_token(container_name=CosmosDBContainerName.SPAN)
_logger.debug("The workspace Cosmos DB is already initialized.")
except FlowRequestException:
cosmos_metadata = pf_client._traces._get_cosmos_metadata()
# raise error if the Cosmos DB is disabled
if cosmos_metadata.is_disabled():
error_message = "The workspace Cosmos DB is disabled, please enable it first."
_logger.error(error_message)
raise _create_trace_destination_value_user_error(error_message)
if not cosmos_metadata.is_ready():
# print here to let users aware this operation as it's kind of time consuming
init_cosmos_msg = (
"The workspace Cosmos DB is not initialized yet, "
Expand All @@ -76,6 +78,8 @@ def validate_trace_destination(value: str) -> None:
print(init_cosmos_msg)
_logger.debug(init_cosmos_msg)
pf_client._traces._setup_cosmos_db(resource_type=COSMOS_DB_SETUP_RESOURCE_TYPE)
zhengfeiwang marked this conversation as resolved.
Show resolved Hide resolved
else:
_logger.debug("The workspace Cosmos DB is available.")
_logger.debug("The workspace Cosmos DB is initialized.")

_logger.debug("pf.config.trace.destination is valid.")
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from azure.ai.ml._scope_dependent_operations import OperationConfig, OperationScope, _ScopeDependentOperations

from promptflow._sdk._telemetry import ActivityType, WorkspaceTelemetryMixin, monitor_operation
from promptflow.azure._contracts._trace import CosmosMetadata
from promptflow.azure._restclient.flow.models import TraceDbSetupRequest
from promptflow.azure._restclient.flow_service_caller import FlowServiceCaller

Expand All @@ -32,6 +33,7 @@ def __init__(

@monitor_operation(activity_name="pfazure.traces._init_cosmos_db", activity_type=ActivityType.INTERNALCALL)
def _init_cosmos_db(self) -> Optional[Dict]:
# this API is deprecated and will be removed in the future
resp = self._service_caller.init_workspace_cosmos(
subscription_id=self._operation_scope.subscription_id,
resource_group_name=self._operation_scope.resource_group_name,
Expand Down Expand Up @@ -63,3 +65,11 @@ def _setup_cosmos_db(self, resource_type: str) -> None:
body=body,
)
return

def _get_cosmos_metadata(self) -> CosmosMetadata:
rest_obj = self._service_caller.get_workspace_cosmos_metadata(
subscription_id=self._operation_scope.subscription_id,
resource_group_name=self._operation_scope.resource_group_name,
workspace_name=self._operation_scope.workspace_name,
)
return CosmosMetadata._from_rest_object(rest_obj)