Skip to content

Segmenter

Bases: BaseComponent

Runs instance segmentation on image tiles.

Modes
  • With box prompts: Requires infer_coco_path with detection boxes
  • Without box prompts: Runs segmentation on full tiles
Requirements
  • tiles_path: Directory containing tiles
  • infer_coco_path + infer_gdf: If model requires box prompts
Produces
  • infer_gdf: GeoDataFrame with segmented polygons
  • Columns: segmenter_score (+ preserves detector columns if present)
Source code in canopyrs/engine/components/segmenter.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
class SegmenterComponent(BaseComponent):
    """
    Runs instance segmentation on image tiles.

    Modes:
        - With box prompts: Requires infer_coco_path with detection boxes
        - Without box prompts: Runs segmentation on full tiles

    Requirements:
        - tiles_path: Directory containing tiles
        - infer_coco_path + infer_gdf: If model requires box prompts

    Produces:
        - infer_gdf: GeoDataFrame with segmented polygons
        - Columns: segmenter_score (+ preserves detector columns if present)
    """

    name = 'segmenter'

    BASE_REQUIRES_STATE = {StateKey.TILES_PATH}
    BASE_REQUIRES_COLUMNS: Set[str] = set()

    BASE_PRODUCES_STATE = {StateKey.INFER_GDF, StateKey.INFER_COCO_PATH}
    BASE_PRODUCES_COLUMNS = {Col.GEOMETRY, Col.OBJECT_ID, Col.TILE_PATH, Col.SEGMENTER_SCORE}

    BASE_STATE_HINTS = {
        StateKey.TILES_PATH: "Segmenter needs tiles to process. Add a tilerizer before segmenter.",
        StateKey.INFER_COCO_PATH: "This segmenter model requires box prompts from a COCO file.",
        StateKey.INFER_GDF: "This segmenter model requires a GeoDataFrame with detection boxes.",
    }

    BASE_COLUMN_HINTS = {
        Col.OBJECT_ID: "Segmenter needs object IDs to associate masks with detections.",
    }

    def __init__(
        self,
        config: SegmenterConfig,
        parent_output_path: str = None,
        component_id: int = None
    ):
        super().__init__(config, parent_output_path, component_id)

        # Get model class (without instantiating) to check requirements
        if config.model not in SEGMENTER_REGISTRY:
            raise ValueError(f'Invalid segmenter model: {config.model}')
        self._model_class = SEGMENTER_REGISTRY.get(config.model)

        # Set base requirements
        self.requires_state = set(self.BASE_REQUIRES_STATE)
        self.requires_columns = set(self.BASE_REQUIRES_COLUMNS)
        self.produces_state = set(self.BASE_PRODUCES_STATE)
        self.produces_columns = set(self.BASE_PRODUCES_COLUMNS)

        # Set hints
        self.state_hints = dict(self.BASE_STATE_HINTS)
        self.column_hints = dict(self.BASE_COLUMN_HINTS)

        # Add model-specific requirements
        if self._model_class.REQUIRES_BOX_PROMPT:
            self.requires_state.add(StateKey.INFER_COCO_PATH)
            self.state_hints[StateKey.INFER_COCO_PATH] = (
                f"The '{config.model}' segmenter requires box prompts. "
                f"Add a detector before segmenter."
            )

    @classmethod
    def run_standalone(
        cls,
        config: SegmenterConfig,
        tiles_path: str,
        output_path: str,
        infer_coco_path: str = None,
    ) -> 'DataState':
        """
        Run segmenter standalone on pre-tiled imagery.

        Args:
            config: Segmenter configuration
            tiles_path: Path to directory containing tiles
            output_path: Where to save outputs
            infer_coco_path: Path to COCO file with detection boxes
                             (required if the model needs box prompts, like SAM)

        Returns:
            DataState with segmentation results (access .infer_gdf for the GeoDataFrame)

        Example:
            result = SegmenterComponent.run_standalone(
                config=SegmenterConfig(model='sam2', ...),
                tiles_path='./tiles',
                output_path='./output',
            )
            print(result.infer_gdf)
        """
        from canopyrs.engine.pipeline import run_component
        return run_component(
            component=cls(config),
            output_path=output_path,
            tiles_path=tiles_path,
            infer_coco_path=infer_coco_path,
        )

    @validate_requirements
    def __call__(self, data_state: DataState) -> ComponentResult:
        """
        Run instance segmentation on tiles.

        Returns flattened GDF with new geometries (masks).
        Pipeline handles merging with existing GDF (to preserve detector columns).
        """

        segmenter = self._model_class(self.config)

        # Create appropriate dataset
        data_paths = [data_state.tiles_path]

        if segmenter.REQUIRES_BOX_PROMPT:
            data_paths.append(Path(data_state.infer_coco_path).parent)
            dataset = DetectionLabeledRasterCocoDataset(
                fold=INFER_AOI_NAME,
                root_path=data_paths,
                box_padding_percentage=self.config.box_padding_percentage,
                transform=None,
                other_attributes_names_to_pass=[Col.OBJECT_ID]
            )
        else:
            dataset = UnlabeledRasterDataset(
                fold=None,
                root_path=data_paths,
                transform=None
            )

        # Run inference
        tiles_paths, tiles_masks_objects_ids, tiles_masks_polygons, tiles_masks_scores = \
            segmenter.infer_on_dataset(dataset)

        # Flatten outputs into GDF
        rows = []
        unique_id = 0
        for i in range(len(tiles_paths)):
            for j in range(len(tiles_masks_polygons[i])):
                row = {
                    Col.TILE_PATH: tiles_paths[i],
                    Col.GEOMETRY: tiles_masks_polygons[i][j],
                    Col.SEGMENTER_SCORE: tiles_masks_scores[i][j],
                }
                # Include object_id if available (from detector), or assign new unique id otherwise
                if tiles_masks_objects_ids is not None:
                    row[Col.OBJECT_ID] = tiles_masks_objects_ids[i][j]
                else:
                    row[Col.OBJECT_ID] = unique_id
                    unique_id += 1
                rows.append(row)

        # Create GDF with new geometries (masks)
        gdf = gpd.GeoDataFrame(rows, geometry=Col.GEOMETRY, crs=None) if rows else gpd.GeoDataFrame(
            columns=self.produces_columns,
            crs=None
        )

        print(f"SegmenterComponent: Generated {len(gdf)} masks.")

        return ComponentResult(
            gdf=gdf,
            produced_columns=self.produces_columns,
            objects_are_new=not self._model_class.REQUIRES_BOX_PROMPT,
            save_gpkg=True,
            gpkg_name_suffix="notaggregated",
            save_coco=True,
            coco_scores_column=Col.SEGMENTER_SCORE,
            coco_categories_column=None,
        )

__call__(data_state)

Run instance segmentation on tiles.

Returns flattened GDF with new geometries (masks). Pipeline handles merging with existing GDF (to preserve detector columns).

Source code in canopyrs/engine/components/segmenter.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
@validate_requirements
def __call__(self, data_state: DataState) -> ComponentResult:
    """
    Run instance segmentation on tiles.

    Returns flattened GDF with new geometries (masks).
    Pipeline handles merging with existing GDF (to preserve detector columns).
    """

    segmenter = self._model_class(self.config)

    # Create appropriate dataset
    data_paths = [data_state.tiles_path]

    if segmenter.REQUIRES_BOX_PROMPT:
        data_paths.append(Path(data_state.infer_coco_path).parent)
        dataset = DetectionLabeledRasterCocoDataset(
            fold=INFER_AOI_NAME,
            root_path=data_paths,
            box_padding_percentage=self.config.box_padding_percentage,
            transform=None,
            other_attributes_names_to_pass=[Col.OBJECT_ID]
        )
    else:
        dataset = UnlabeledRasterDataset(
            fold=None,
            root_path=data_paths,
            transform=None
        )

    # Run inference
    tiles_paths, tiles_masks_objects_ids, tiles_masks_polygons, tiles_masks_scores = \
        segmenter.infer_on_dataset(dataset)

    # Flatten outputs into GDF
    rows = []
    unique_id = 0
    for i in range(len(tiles_paths)):
        for j in range(len(tiles_masks_polygons[i])):
            row = {
                Col.TILE_PATH: tiles_paths[i],
                Col.GEOMETRY: tiles_masks_polygons[i][j],
                Col.SEGMENTER_SCORE: tiles_masks_scores[i][j],
            }
            # Include object_id if available (from detector), or assign new unique id otherwise
            if tiles_masks_objects_ids is not None:
                row[Col.OBJECT_ID] = tiles_masks_objects_ids[i][j]
            else:
                row[Col.OBJECT_ID] = unique_id
                unique_id += 1
            rows.append(row)

    # Create GDF with new geometries (masks)
    gdf = gpd.GeoDataFrame(rows, geometry=Col.GEOMETRY, crs=None) if rows else gpd.GeoDataFrame(
        columns=self.produces_columns,
        crs=None
    )

    print(f"SegmenterComponent: Generated {len(gdf)} masks.")

    return ComponentResult(
        gdf=gdf,
        produced_columns=self.produces_columns,
        objects_are_new=not self._model_class.REQUIRES_BOX_PROMPT,
        save_gpkg=True,
        gpkg_name_suffix="notaggregated",
        save_coco=True,
        coco_scores_column=Col.SEGMENTER_SCORE,
        coco_categories_column=None,
    )

run_standalone(config, tiles_path, output_path, infer_coco_path=None) classmethod

Run segmenter standalone on pre-tiled imagery.

Parameters:

Name Type Description Default
config SegmenterConfig

Segmenter configuration

required
tiles_path str

Path to directory containing tiles

required
output_path str

Where to save outputs

required
infer_coco_path str

Path to COCO file with detection boxes (required if the model needs box prompts, like SAM)

None

Returns:

Type Description
DataState

DataState with segmentation results (access .infer_gdf for the GeoDataFrame)

Example

result = SegmenterComponent.run_standalone( config=SegmenterConfig(model='sam2', ...), tiles_path='./tiles', output_path='./output', ) print(result.infer_gdf)

Source code in canopyrs/engine/components/segmenter.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
@classmethod
def run_standalone(
    cls,
    config: SegmenterConfig,
    tiles_path: str,
    output_path: str,
    infer_coco_path: str = None,
) -> 'DataState':
    """
    Run segmenter standalone on pre-tiled imagery.

    Args:
        config: Segmenter configuration
        tiles_path: Path to directory containing tiles
        output_path: Where to save outputs
        infer_coco_path: Path to COCO file with detection boxes
                         (required if the model needs box prompts, like SAM)

    Returns:
        DataState with segmentation results (access .infer_gdf for the GeoDataFrame)

    Example:
        result = SegmenterComponent.run_standalone(
            config=SegmenterConfig(model='sam2', ...),
            tiles_path='./tiles',
            output_path='./output',
        )
        print(result.infer_gdf)
    """
    from canopyrs.engine.pipeline import run_component
    return run_component(
        component=cls(config),
        output_path=output_path,
        tiles_path=tiles_path,
        infer_coco_path=infer_coco_path,
    )