Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for unicode labels with new RichLabelAnnotator class #1116

Merged
merged 12 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
37 changes: 37 additions & 0 deletions docs/detection/annotators.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,37 @@ status: new

</div>

=== "RichLabel"

```python
import supervision as sv

image = ...
detections = sv.Detections(...)
LinasKo marked this conversation as resolved.
Show resolved Hide resolved

labels = [
f"{class_name} {confidence:.2f}"
for class_name, confidence
in zip(detections['class_name'], detections.confidence)
]

rich_label_annotator = sv.RichLabelAnnotator(
font_path=".../font.ttf",
LinasKo marked this conversation as resolved.
Show resolved Hide resolved
text_position=sv.Position.CENTER
)
annotated_frame = label_annotator.annotate(
scene=image.copy(),
detections=detections,
labels=labels
)
```

<div class="result" markdown>

![label-annotator-example](https://media.roboflow.com/supervision-annotator-examples/label-annotator-example-purple.png){ align=center width="800" }

</div>

=== "Crop"

```python
Expand Down Expand Up @@ -492,6 +523,12 @@ status: new

:::supervision.annotators.core.LabelAnnotator

<div class="md-typeset">
<h2><a href="#supervision.annotators.core.RichLabelAnnotator">RichLabelAnnotator</a></h2>
</div>

:::supervision.annotators.core.RichLabelAnnotator

<div class="md-typeset">
<h2><a href="#supervision.annotators.core.BlurAnnotator">BlurAnnotator</a></h2>
</div>
Expand Down
1 change: 1 addition & 0 deletions supervision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
PercentageBarAnnotator,
PixelateAnnotator,
PolygonAnnotator,
RichLabelAnnotator,
RoundBoxAnnotator,
TraceAnnotator,
TriangleAnnotator,
Expand Down
230 changes: 172 additions & 58 deletions supervision/annotators/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,15 @@

import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont

from supervision.annotators.base import BaseAnnotator, ImageType
from supervision.annotators.utils import ColorLookup, Trace, resolve_color
from supervision.annotators.utils import (
ColorLookup,
Trace,
resolve_color,
resolve_text_background_xyxy,
)
from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES
from supervision.detection.core import Detections
from supervision.detection.utils import clip_boxes, mask_to_polygons
Expand Down Expand Up @@ -936,59 +942,6 @@ def __init__(
self.text_anchor: Position = text_position
self.color_lookup: ColorLookup = color_lookup

@staticmethod
def resolve_text_background_xyxy(
center_coordinates: Tuple[int, int],
text_wh: Tuple[int, int],
position: Position,
) -> Tuple[int, int, int, int]:
center_x, center_y = center_coordinates
text_w, text_h = text_wh

if position == Position.TOP_LEFT:
return center_x, center_y - text_h, center_x + text_w, center_y
elif position == Position.TOP_RIGHT:
return center_x - text_w, center_y - text_h, center_x, center_y
elif position == Position.TOP_CENTER:
return (
center_x - text_w // 2,
center_y - text_h,
center_x + text_w // 2,
center_y,
)
elif position == Position.CENTER or position == Position.CENTER_OF_MASS:
return (
center_x - text_w // 2,
center_y - text_h // 2,
center_x + text_w // 2,
center_y + text_h // 2,
)
elif position == Position.BOTTOM_LEFT:
return center_x, center_y, center_x + text_w, center_y + text_h
elif position == Position.BOTTOM_RIGHT:
return center_x - text_w, center_y, center_x, center_y + text_h
elif position == Position.BOTTOM_CENTER:
return (
center_x - text_w // 2,
center_y,
center_x + text_w // 2,
center_y + text_h,
)
elif position == Position.CENTER_LEFT:
return (
center_x - text_w,
center_y - text_h // 2,
center_x,
center_y + text_h // 2,
)
elif position == Position.CENTER_RIGHT:
return (
center_x,
center_y - text_h // 2,
center_x + text_w,
center_y + text_h // 2,
)

@convert_for_annotation_method
def annotate(
self,
Expand Down Expand Up @@ -1056,9 +1009,11 @@ def annotate(
color=self.color,
detections=detections,
detection_idx=detection_idx,
color_lookup=self.color_lookup
if custom_color_lookup is None
else custom_color_lookup,
color_lookup=(
self.color_lookup
if custom_color_lookup is None
else custom_color_lookup
),
)

if labels is not None:
Expand All @@ -1078,7 +1033,7 @@ def annotate(
)[0]
text_w_padded = text_w + 2 * self.text_padding
text_h_padded = text_h + 2 * self.text_padding
text_background_xyxy = self.resolve_text_background_xyxy(
text_background_xyxy = resolve_text_background_xyxy(
center_coordinates=tuple(center_coordinates),
text_wh=(text_w_padded, text_h_padded),
position=self.text_anchor,
Expand Down Expand Up @@ -1148,6 +1103,165 @@ def draw_rounded_rectangle(
return scene


class RichLabelAnnotator:
"""
A class for annotating labels on an image using provided detections,
with support for Unicode characters by using a custom font.
"""

def __init__(
self,
color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
text_color: Color = Color.WHITE,
font_path: str = None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be Optional[str]

font_size: int = 10,
text_padding: int = 10,
text_position: Position = Position.TOP_LEFT,
color_lookup: ColorLookup = ColorLookup.CLASS,
border_radius: int = 0,
):
"""
Args:
color (Union[Color, ColorPalette]): The color or color palette to use for
annotating the text background.
text_color (Color): The color to use for the text.
font_path (str): Path to the font file (e.g., ".ttf" or ".otf") to use for
rendering text. If `None`, the default PIL font will be used.
font_size (int): Font size for the text.
text_padding (int): Padding around the text within its background box.
text_position (Position): Position of the text relative to the detection.
Possible values are defined in the `Position` enum.
color_lookup (ColorLookup): Strategy for mapping colors to annotations.
Options are `INDEX`, `CLASS`, `TRACK`.
border_radius (int): The radius to apply round edges. If the selected
value is higher than the lower dimension, width or height, is clipped.
"""
self.color = color
self.text_color = text_color
self.text_padding = text_padding
self.text_anchor = text_position
self.color_lookup = color_lookup
self.border_radius = border_radius
if font_path is not None:
try:
self.font = ImageFont.truetype(font_path, font_size)
except OSError:
print(f"Font path '{font_path}' not found. Using PIL's default font.")
self.font = ImageFont.load_default(size=font_size)
else:
self.font = ImageFont.load_default(size=font_size)
LinasKo marked this conversation as resolved.
Show resolved Hide resolved

LinasKo marked this conversation as resolved.
Show resolved Hide resolved
def annotate(
self,
scene: ImageType,
detections: Detections,
labels: List[str] = None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be Optional[...]

And yes - I'm aware LabelAnnotator is similarly incorrect.

custom_color_lookup: Optional[np.ndarray] = None,
) -> ImageType:
"""
Annotates the given scene with labels based on the provided
detections, with support for Unicode characters.

Args:
scene (ImageType): The image where labels will be drawn.
`ImageType` is a flexible type, accepting either `numpy.ndarray`
or `PIL.Image.Image`.
detections (Detections): Object detections to annotate.
labels (List[str]): Optional. Custom labels for each detection.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be (Optional[List[str]])

custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
Allows to override the default color mapping strategy.

Returns:
The annotated image, matching the type of `scene` (`numpy.ndarray`
or `PIL.Image.Image`)

Example:
```python
import supervision as sv

image = ...
detections = sv.Detections(...)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here - we need a specific model that provides class_name

I see LabelAnnotator is the same. I'll update both later on.


labels = [
f"{class_name} {confidence:.2f}"
for class_name, confidence
in zip(detections['class_name'], detections.confidence)
]

rich_label_annotator = sv.RichLabelAnnotator(font_path="path/to/font.ttf")
annotated_frame = label_annotator.annotate(
scene=image.copy(),
detections=detections,
labels=labels
)
```

"""
if isinstance(scene, np.ndarray):
scene = Image.fromarray(cv2.cvtColor(scene, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(scene)
anchors_coordinates = detections.get_anchors_coordinates(
anchor=self.text_anchor
).astype(int)
if labels is not None and len(labels) != len(detections):
raise ValueError(
f"The number of labels provided ({len(labels)}) does not match the "
f"number of detections ({len(detections)}). Each detection should have "
f"a corresponding label. This discrepancy can occur if the labels and "
f"detections are not aligned or if an incorrect number of labels has "
f"been provided. Please ensure that the labels array has the same "
f"length as the Detections object."
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's nice when libraries tell what exactly is wrong :)

Still, we can make it more concise. We can remove this:

 This discrepancy can occur if the labels and "
f"detections are not aligned or if an incorrect number of labels has "
f"been provided. Please ensure that the labels array has the same "
f"length as the Detections object.

for detection_idx, center_coordinates in enumerate(anchors_coordinates):
color = resolve_color(
color=self.color,
detections=detections,
detection_idx=detection_idx,
color_lookup=(
self.color_lookup
if custom_color_lookup is None
else custom_color_lookup
),
)
if labels is not None:
text = labels[detection_idx]
elif detections[CLASS_NAME_DATA_FIELD] is not None:
text = detections[CLASS_NAME_DATA_FIELD][detection_idx]
elif detections.class_id is not None:
text = str(detections.class_id[detection_idx])
else:
text = str(detection_idx)

left, top, right, bottom = draw.textbbox((0, 0), text, font=self.font)
text_width = right - left
text_height = bottom - top
text_w_padded = text_width + 2 * self.text_padding
text_h_padded = text_height + 2 * self.text_padding
text_background_xyxy = resolve_text_background_xyxy(
center_coordinates=tuple(center_coordinates),
text_wh=(text_w_padded, text_h_padded),
position=self.text_anchor,
)

text_x = text_background_xyxy[0] + self.text_padding - left
text_y = text_background_xyxy[1] + self.text_padding - top

draw.rounded_rectangle(
text_background_xyxy,
radius=self.border_radius,
fill=color.as_rgb(),
outline=None,
)
draw.text(
xy=(text_x, text_y),
text=text,
font=self.font,
fill=self.text_color.as_rgb(),
)

return scene
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now always a PIL image. Let's check if the user passed a np.ndarray originally and make sure we return the image in the same format.



class BlurAnnotator(BaseAnnotator):
"""
A class for blurring regions in an image using provided detections.
Expand Down
55 changes: 54 additions & 1 deletion supervision/annotators/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from enum import Enum
from typing import Optional, Union
from typing import Optional, Tuple, Union

import numpy as np

Expand Down Expand Up @@ -63,6 +63,59 @@ def resolve_color_idx(
return detections.tracker_id[detection_idx]


def resolve_text_background_xyxy(
center_coordinates: Tuple[int, int],
text_wh: Tuple[int, int],
position: Position,
) -> Tuple[int, int, int, int]:
center_x, center_y = center_coordinates
text_w, text_h = text_wh

if position == Position.TOP_LEFT:
return center_x, center_y - text_h, center_x + text_w, center_y
elif position == Position.TOP_RIGHT:
return center_x - text_w, center_y - text_h, center_x, center_y
elif position == Position.TOP_CENTER:
return (
center_x - text_w // 2,
center_y - text_h,
center_x + text_w // 2,
center_y,
)
elif position == Position.CENTER or position == Position.CENTER_OF_MASS:
return (
center_x - text_w // 2,
center_y - text_h // 2,
center_x + text_w // 2,
center_y + text_h // 2,
)
elif position == Position.BOTTOM_LEFT:
return center_x, center_y, center_x + text_w, center_y + text_h
elif position == Position.BOTTOM_RIGHT:
return center_x - text_w, center_y, center_x, center_y + text_h
elif position == Position.BOTTOM_CENTER:
return (
center_x - text_w // 2,
center_y,
center_x + text_w // 2,
center_y + text_h,
)
elif position == Position.CENTER_LEFT:
return (
center_x - text_w,
center_y - text_h // 2,
center_x,
center_y + text_h // 2,
)
elif position == Position.CENTER_RIGHT:
return (
center_x,
center_y - text_h // 2,
center_x + text_w,
center_y + text_h // 2,
)


def get_color_by_index(color: Union[Color, ColorPalette], idx: int) -> Color:
if isinstance(color, ColorPalette):
return color.by_idx(idx)
Expand Down