cvat-ai · omerferhatt · Jan 18, 2024 · Jan 18, 2024 · Apr 30, 2024 · Apr 30, 2024
@@ -146,6 +146,7 @@ interface State {
  activeLabelID: number | null;
  activeTracker: MLModel | null;
  convertMasksToPolygons: boolean;
+ selectedObjectType: ObjectType;
  trackedShapes: TrackedShape[];
  fetching: boolean;
  pointsReceived: boolean;
@@ -235,6 +236,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
  super(props);
  this.state = {
  convertMasksToPolygons: false,
+ selectedObjectType: ObjectType.SHAPE,
  activeInteractor: props.interactors.length ? props.interactors[0] : null,
  activeTracker: props.trackers.length ? props.trackers[0] : null,
  activeLabelID: props.labels.length ? props.labels[0].id as number : null,
@@ -593,7 +595,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
  const portals = !activeTracker ?
  [] :
  states
- .filter((objectState) => objectState.objectType === 'track' && objectState.shapeType === 'rectangle')
+ .filter((objectState) => objectState.objectType === 'track' && (objectState.shapeType === 'rectangle' || objectState.shapeType === 'polygon'))
  .map((objectState: any): React.ReactPortal | null => {
  const { clientID } = objectState;
  const selectorID = `#cvat-objects-sidebar-state-item-${clientID}`;
@@ -822,7 +824,10 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
  job: jobInstance.id,
  }) as TrackerResults;
 
- response.shapes = response.shapes.map(trackedRectangleMapper);
+ // If shape type is rectangle, keep same approach
+ if (response.shapes[0].length === 4) {
+ response.shapes = response.shapes.map(trackedRectangleMapper);
+ }
  for (let i = 0; i < trackableObjects.clientIDs.length; i++) {
  const clientID = trackableObjects.clientIDs[i];
  const shape = response.shapes[i];
@@ -859,15 +864,15 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
  }
 
  private async constructFromPoints(): Promise<void> {
- const { convertMasksToPolygons } = this.state;
+ const { convertMasksToPolygons, selectedObjectType } = this.state;
  const {
  frame, labels, curZOrder, activeLabelID, createAnnotations,
  } = this.props;
 
  if (convertMasksToPolygons) {
  const object = new core.classes.ObjectState({
  frame,
- objectType: ObjectType.SHAPE,
+ objectType: selectedObjectType,
  source: core.enums.Source.SEMI_AUTO,
  label: labels.find((label) => label.id === activeLabelID as number) as Label,
  shapeType: ShapeType.POLYGON,
@@ -958,6 +963,29 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
  );
  }
 
+ private renderObjectTypeBlock(): JSX.Element {
+ const { selectedObjectType } = this.state;
+ const objectTypes = Object.values(ObjectType);
+ objectTypes.splice(objectTypes.indexOf(ObjectType.TAG), 1);
+ return (
+ <Row className='cvat-interactors-setups-container'>
+ <Select
+ value={selectedObjectType}
+ onChange={(value: ObjectType) => {
+ this.setState({ selectedObjectType: value });
+ }}
+ >
+ {objectTypes.map((type) => (
+ <Select.Option key={type} value={type}>
+ {type}
+ </Select.Option>
+ ))}
+ </Select>
+ <Text>Object Type</Text>
+ </Row>
+ );
+ }
+
  private renderLabelBlock(): JSX.Element {
  const { labels } = this.props;
  const { activeLabelID } = this.state;
@@ -1346,6 +1374,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
  }
 
  private renderPopoverContent(): JSX.Element {
+ const { convertMasksToPolygons } = this.state;
  return (
  <div className='cvat-tools-control-popover-content'>
  <Row justify='start'>
@@ -1358,6 +1387,7 @@ export class ToolsControlComponent extends React.PureComponent<Props, State> {
  <Tabs type='card' tabBarGutter={8}>
  <Tabs.TabPane key='interactors' tab='Interactors'>
  {this.renderMasksConvertingBlock()}
+ {convertMasksToPolygons ? this.renderObjectTypeBlock() : null}
  {this.renderLabelBlock()}
  {this.renderInteractorBlock()}
  </Tabs.TabPane>

diff --git a/serverless/pytorch/omerferhatt/xmem/nuclio/function-gpu.yaml b/serverless/pytorch/omerferhatt/xmem/nuclio/function-gpu.yaml
@@ -0,0 +1,67 @@
+metadata:
+ name: pth-omerferhatt-xmem
+ namespace: cvat
+ annotations:
+ name: XMem
+ type: tracker
+ spec:
+ framework: pytorch
+
+spec:
+ description: Long-Term Object Segmentation with an Atkinson-Shiffrin Memory Model
+ runtime: 'python:3.9'
+ handler: main:handler
+ eventTimeout: 30s
+
+ env:
+ - name: PYTHONPATH
+ value: /opt/nuclio/xmem
+
+ build:
+ image: cvat.pth.omerferhatt.xmem:latest-gpu
+ baseImage: nvidia/cuda:12.1.0-runtime-ubuntu22.04
+
+ directives:
+ preCopy:
+ - kind: RUN
+ value: |-
+ apt update \
+ && apt install -y --no-install-recommends \
+ wget \
+ git \
+ ca-certificates \
+ python-is-python3 \
+ python3 \
+ python3-pip \
+ && rm -rf /var/lib/apt/lists/*
+ - kind: WORKDIR
+ value: /opt/nuclio
+ - kind: RUN
+ value: git clone --branch main https://github.com/omerferhatt/XMem xmem
+ - kind: RUN
+ value: pip install opencv-python-headless jsonpickle
+ - kind: RUN
+ value: |-
+ pip install torch torchvision
+ - kind: RUN
+ value: wget 'https://www.dropbox.com/scl/fi/5m1l747p15qzgq023e0q9/xmem.pth?rlkey=ss2kjaq4qlvvk5juucyvtmrh8&dl=0' -O '/xmem.pth'
+
+ triggers:
+ myHttpTrigger:
+ maxWorkers: 1
+ kind: 'http'
+ workerAvailabilityTimeoutMilliseconds: 10000
+ attributes:
+ # Set value from the calculation of tracking of 100 objects at the same time on a 4k image
+ maxRequestBodySize: 1073741824 # 1GB
+
+ resources:
+ limits:
+ nvidia.com/gpu: 1
+
+ platform:
+ attributes:
+ restartPolicy:
+ name: always
+ maximumRetryCount: 3
+ mountMode: volume
diff --git a/serverless/pytorch/omerferhatt/xmem/nuclio/function.yaml b/serverless/pytorch/omerferhatt/xmem/nuclio/function.yaml
@@ -0,0 +1,63 @@
+metadata:
+ name: pth-omerferhatt-xmem
+ namespace: cvat
+ annotations:
+ name: XMem
+ type: tracker
+ spec:
+ framework: pytorch
+
+spec:
+ description: Long-Term Object Segmentation with an Atkinson-Shiffrin Memory Model
+ runtime: 'python:3.9'
+ handler: main:handler
+ eventTimeout: 30s
+
+ env:
+ - name: PYTHONPATH
+ value: /opt/nuclio/xmem
+
+ build:
+ image: cvat.pth.omerferhatt.xmem
+ baseImage: ubuntu:22.04
+
+ directives:
+ preCopy:
+ - kind: RUN
+ value: |-
+ apt update \
+ && apt install -y --no-install-recommends \
+ wget \
+ git \
+ ca-certificates \
+ python-is-python3 \
+ python3 \
+ python3-pip \
+ && rm -rf /var/lib/apt/lists/*
+ - kind: WORKDIR
+ value: /opt/nuclio
+ - kind: RUN
+ value: git clone --branch main https://github.com/omerferhatt/XMem xmem
+ - kind: RUN
+ value: pip install opencv-python-headless jsonpickle
+ - kind: RUN
+ value: |-
+ pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu
+ - kind: RUN
+ value: wget 'https://www.dropbox.com/scl/fi/5m1l747p15qzgq023e0q9/xmem.pth?rlkey=ss2kjaq4qlvvk5juucyvtmrh8&dl=0' -O '/xmem.pth'
+
+ triggers:
+ myHttpTrigger:
+ maxWorkers: 1
+ kind: 'http'
+ workerAvailabilityTimeoutMilliseconds: 10000
+ attributes:
+ # Set value from the calculation of tracking of 100 objects at the same time on a 4k image
+ maxRequestBodySize: 1073741824 # 1GB
+
+ platform:
+ attributes:
+ restartPolicy:
+ name: always
+ maximumRetryCount: 3
+ mountMode: volume
diff --git a/serverless/pytorch/omerferhatt/xmem/nuclio/main.py b/serverless/pytorch/omerferhatt/xmem/nuclio/main.py
@@ -0,0 +1,42 @@
+import base64
+import io
+import json
+
+import numpy as np
+from model_handler import ModelHandler
+from PIL import Image
+
+
+def init_context(context):
+ context.logger.info("Init context... 0%")
+ model = ModelHandler()
+ context.user_data.model = model
+ context.logger.info("Init context...100%")
+
+
+def handler(context, event):
+ context.logger.info("Run XMem model")
+ data = event.body
+ buf = io.BytesIO(base64.b64decode(data["image"]))
+ shapes = data.get("shapes")
+ states = data.get("states")
+
+ image = Image.open(buf).convert("RGB")
+ image = np.array(image)[:, :, ::-1].copy()
+ results = {"shapes": [], "states": []}
+ for i, shape in enumerate(shapes):
+ context.logger.info(f"Inference [{i}] started")
+
+ shape, state = context.user_data.model.infer(
+ image, shape, states[i] if i < len(states) else None
+ )
+ results["shapes"].append(shape)
+ results["states"].append(state)
+ context.logger.info(f"Inference [{i}] finised")
+
+ return context.Response(
+ body=json.dumps(results),
+ headers={},
+ content_type="application/json",
+ status_code=200,
+ )