From c00da8e0f876a234b75b13aa2fb0994d91d36224 Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Tue, 30 Jan 2024 12:55:14 -0600 Subject: [PATCH] Catch and fix Google Healthcare API errors This change adds support for Google Healthcare API DICOMweb servers, such as the NCI's [Imaging Data Commons](https://datacommons.cancer.gov/repository/imaging-data-commons). The problem: Google Healthcare API raises an error if `AvailableTransferSyntaxUID` is a field, or if `SOPClassUID` is used as a search filter. The `SOPClassUID` should definitely be allowed as an instance-level search filter, as documented in [Table 10.6.1-5. Required Matching Attributes](https://dicom.nema.org/medical/dicom/current/output/chtml/part18/sect_10.6.html). However, this has apparently been a long-standing problem of nearly four years (see [here](https://github.com/GoogleCloudPlatform/healthcare-dicom-dicomweb-adapter/pull/30#discussion_r312954232)), so it may not be fixed anytime soon. And even if it is fixed, the Imaging Data Commons may not update their software anytime soon. It would be highly advantageous to support such a large DICOMweb repository by working around the issue. The fix in this PR is as follows: 1. The two `search_for_instances()` calls are still performed identically as before, as long as there are no HTTP errors. 2. If there is an HTTP error with a 400 status_code, and a message is present matching the errors from Google Healthcare API, then the `search_for_instances()` arguments are patched to work for Google Healthcare API, as follows: a) `AvailableTransferSyntaxUID` is simply removed, if present. b) `SOPClassUID` is manually filtered, if present (meaning it is not supplied in the `search_filters`, but only instances with a matching `SOPClassUID` are returned). These changes shouldn't have any impact on any situations except where an error occurs from a Google Healthcare API server. And in that case, the function calls are patched and then work properly. The following example works after this fix: ```python from wsidicom import WsiDicom, WsiDicomWebClient url = 'https://proxy.imaging.datacommons.cancer.gov/current/viewer-only-no-downloads-see-tinyurl-dot-com-slash-3j3d9jyp/dicomWeb' study_uid = '2.25.227261840503961430496812955999336758586' series_uid = '1.3.6.1.4.1.5962.99.1.1334438926.1589741711.1637717011470.2.0' client = WsiDicomWebClient.create_client(url) slide = WsiDicom.open_web(client, study_uid, series_uid) ``` Fixes: #141 Signed-off-by: Patrick Avery --- wsidicom/web/wsidicom_web_client.py | 59 ++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/wsidicom/web/wsidicom_web_client.py b/wsidicom/web/wsidicom_web_client.py index f7696bfb..85336e18 100644 --- a/wsidicom/web/wsidicom_web_client.py +++ b/wsidicom/web/wsidicom_web_client.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy from http import HTTPStatus import logging from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union @@ -253,7 +254,7 @@ def _get_intances( return ( self._get_uids_from_response(instance, series_uid) for series_uid in series_uids - for instance in self._client.search_for_instances( + for instance in self._search_for_instances( study_uid, series_uid, search_filters={SOP_CLASS_UID: sop_class_uid}, @@ -261,7 +262,7 @@ def _get_intances( ) return ( self._get_uids_from_response(instance) - for instance in self._client.search_for_instances( + for instance in self._search_for_instances( study_uid, fields=["AvailableTransferSyntaxUID"], search_filters={ @@ -271,6 +272,60 @@ def _get_intances( ) ) + def _search_for_instances(self, *args, **kwargs): + # Try performing a regular search_for_instances(). If there is an error, + # check if it is a Google Healthcare API error that we can fix. If so, + # fix it and make the request again. + try: + yield from self._client.search_for_instances(*args, **kwargs) + except HTTPError as e: + if e.response.status_code != 400: + # Not a Google Healthcare API error. Propagate the exception + raise + + # Check if it was a google healthcare API error + google_healthcare_api_errors = ( + 'unknown/unsupported QIDO attribute: AvailableTransferSyntaxUID', + # Sometimes, this says "SOPClassUID is not a supported instance or study...", + # and sometimes, it says "SOPClassUID is not a supported instance or series..." + # Just catch the first part with "instance" + 'SOPClassUID is not a supported instance', + ) + if not any(x in e.response.text for x in google_healthcare_api_errors): + # Not a Google Healthcare API error. Propagate the exception + raise + + # It was a Google Healthcare API error. + # Fix the request and perform it again. + + # Perform a deepcopy so that the caller's arguments are not modified. + # We assume that `fields` and `search_filters` are kwargs, not args. + kwargs = copy.deepcopy(kwargs) + + # Remove the AvailableTransferSyntaxUID, if present, as google + # healthcare API does not support this. + if 'AvailableTransferSyntaxUID' in kwargs.get('fields', []): + kwargs['fields'].remove('AvailableTransferSyntaxUID') + + # Perform manual filtering for SOP_CLASS_UID, if present. + # Google Healthcare API doesn't support this as a search filter + # (even though it definitely should). + if SOP_CLASS_UID not in kwargs.get('search_filters', {}): + # We only needed to remove the AvailableTransferSyntaxUID. + # Try the search again. + yield from self._client.search_for_instances(*args, **kwargs) + return + + # Perform the manual filtering for SOP_CLASS_UID + sop_class_uid = kwargs['search_filters'].pop(SOP_CLASS_UID) + if SOP_CLASS_UID not in kwargs.get('fields', []): + # Make sure we get the SOP_CLASS_UID so we can manually filter + kwargs.setdefault('fields', []).append(SOP_CLASS_UID) + + for result in self._client.search_for_instances(*args, **kwargs): + if result[SOP_CLASS_UID]['Value'][0] == sop_class_uid: + yield result + @staticmethod def _get_uids_from_response( response: Dict[str, Dict[Any, Any]], series_uid: Optional[UID] = None