Skip to content

Workspaces

Workspace

Manage a Roboflow workspace.

Source code in roboflow/core/workspace.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
class Workspace:
    """
    Manage a Roboflow workspace.
    """

    def __init__(self, info, api_key, default_workspace, model_format):
        if api_key in DEMO_KEYS:
            self.__api_key = api_key
            self.model_format = model_format
            self.project_list = []
        else:
            workspace_info = info["workspace"]
            self.name = workspace_info["name"]
            self.project_list = workspace_info["projects"]
            if "members" in workspace_info.keys():
                self.members = workspace_info["members"]
            self.url = workspace_info["url"]
            self.model_format = model_format

            self.__api_key = api_key

    def list_projects(self):
        """
        Print all projects in the workspace to the console.
        """
        print(self.project_list)

    def projects(self):
        """
        Retrieve all projects in the workspace.

        Returns:
            List of Project objects.
        """
        projects_array = []
        for a_project in self.project_list:
            proj = Project(self.__api_key, a_project, self.model_format)
            projects_array.append(proj.id)

        return projects_array

    def project(self, project_name):
        """
        Retrieve a Project() object that represents a project in the workspace.

        This object can be used to retrieve the model through which to run inference.

        Args:
            project_name (str): name of the project

        Returns:
            Project Object
        """
        sys.stdout.write("\r" + "loading Roboflow project...")
        sys.stdout.write("\n")
        sys.stdout.flush()

        if self.__api_key in DEMO_KEYS:
            return Project(self.__api_key, {}, self.model_format)

        project_name = project_name.replace(self.url + "/", "")

        if "/" in project_name:
            raise RuntimeError(
                "The {} project is not available in this ({}) workspace".format(
                    project_name, self.url
                )
            )

        dataset_info = requests.get(
            API_URL + "/" + self.url + "/" + project_name + "?api_key=" + self.__api_key
        )

        # Throw error if dataset isn't valid/user doesn't have permissions to access the dataset
        if dataset_info.status_code != 200:
            raise RuntimeError(dataset_info.text)

        dataset_info = dataset_info.json()["project"]

        return Project(self.__api_key, dataset_info, self.model_format)

    def create_project(self, project_name, project_type, project_license, annotation):
        """
        Create a project in a Roboflow workspace.

        Args:
            project_name (str): name of the project
            project_type (str): type of the project
            project_license (str): license of the project (set to `private` for private projects, only available for paid customers)
            annotation (str): annotation of the project

        Returns:
            Project Object
        """
        data = {
            "name": project_name,
            "type": project_type,
            "license": project_license,
            "annotation": annotation,
        }

        r = requests.post(
            API_URL + "/" + self.url + "/projects?api_key=" + self.__api_key, json=data
        )

        r.raise_for_status()

        if "error" in r.json().keys():
            raise RuntimeError(r.json()["error"])

        return self.project(r.json()["id"].split("/")[-1])

    def clip_compare(
        self, dir: str = "", image_ext: str = ".png", target_image: str = ""
    ) -> dict:
        """
        Compare all images in a directory to a target image using CLIP

        Args:
            dir (str): name reference to a directory of images for comparison
            image_ext (str): file format for expected images (don't include the . before the file type name)
            target_image (str): name reference for target image to compare individual images from directory against

        Returns:
            dict: a key:value mapping of image_name:comparison_score_to_target
        """

        # list to store comparison results in
        comparisons = []
        # grab all images in a given directory with ext type
        for image in glob.glob(f"./{dir}/*{image_ext}"):
            # compare image
            similarity = clip_encode(image, target_image)
            # map image name to similarity score
            comparisons.append({image: similarity})
            comparisons = sorted(comparisons, key=lambda item: -list(item.values())[0])
        return comparisons

    def two_stage(
        self,
        image: str = "",
        first_stage_model_name: str = "",
        first_stage_model_version: int = 0,
        second_stage_model_name: str = "",
        second_stage_model_version: int = 0,
    ) -> dict:
        """
        For each prediction in a first stage detection, perform detection with the second stage model

        Args:
            image (str): name of the image to be processed
            first_stage_model_name (str): name of the first stage detection model
            first_stage_model_version (int): version number for the first stage model
            second_stage_mode (str): name of the second stage detection model
            second_stage_model_version (int): version number for the second stage model

        Returns:
            dict: a json obj containing the results of the second stage detection
        """
        results = []

        # create PIL image for cropping
        pil_image = Image.open(image).convert("RGB")

        # grab first and second stage model from project
        stage_one_project = self.project(first_stage_model_name)
        stage_one_model = stage_one_project.version(first_stage_model_version).model
        stage_two_project = self.project(second_stage_model_name)
        stage_two_model = stage_two_project.version(second_stage_model_version).model

        print(self.project(first_stage_model_name))

        # perform first inference
        predictions = stage_one_model.predict(image)

        if (
            stage_one_project.type == "object-detection"
            and stage_two_project == "classification"
        ):
            # interact with each detected object from stage one inference results
            for boundingbox in predictions:
                # rip bounding box coordinates from json1
                # note: infer returns center points of box as (x,y) and width, height
                # ----- but pillow crop requires the top left and bottom right points to crop
                box = (
                    boundingbox["x"] - boundingbox["width"] / 2,
                    boundingbox["y"] - boundingbox["height"] / 2,
                    boundingbox["x"] + boundingbox["width"] / 2,
                    boundingbox["y"] + boundingbox["height"] / 2,
                )

                # create a new cropped image using the first stage prediction coordinates (for each box!)
                croppedImg = pil_image.crop(box)
                croppedImg.save("./temp.png")

                # capture results of second stage inference from cropped image
                results.append(stage_two_model.predict("./temp.png")[0])

            # delete the written image artifact
            try:
                os.remove("./temp.png")
            except FileNotFoundError:
                print("no detections")

        else:
            print(
                "please use an object detection model for the first stage--can only perform two stage with bounding box results",
                "please use a classification model for the second stage",
            )

        return results

    def two_stage_ocr(
        self,
        image: str = "",
        first_stage_model_name: str = "",
        first_stage_model_version: int = 0,
    ) -> dict:
        """
        For each prediction in the first stage object detection, perform OCR as second stage.

        Args:
            image (str): name of the image to be processed
            first_stage_model_name (str): name of the first stage detection model
            first_stage_model_version (int): version number for the first stage model

        Returns:
            dict: a json obj containing the results of the second stage detection
        """
        results = []

        # create PIL image for cropping
        pil_image = Image.open(image).convert("RGB")

        # grab first and second stage model from project
        stage_one_project = self.project(first_stage_model_name)
        stage_one_model = stage_one_project.version(first_stage_model_version).model

        # perform first inference
        predictions = stage_one_model.predict(image)

        # interact with each detected object from stage one inference results
        if stage_one_project.type == "object-detection":
            for boundingbox in predictions:
                # rip bounding box coordinates from json1
                # note: infer returns center points of box as (x,y) and width, height
                # ----- but pillow crop requires the top left and bottom right points to crop
                box = (
                    boundingbox["x"] - boundingbox["width"] / 2,
                    boundingbox["y"] - boundingbox["height"] / 2,
                    boundingbox["x"] + boundingbox["width"] / 2,
                    boundingbox["y"] + boundingbox["height"] / 2,
                )

                # create a new cropped image using the first stage prediction coordinates (for each box!)
                croppedImg = pil_image.crop(box)

                # capture OCR results from cropped image
                results.append(ocr_infer(croppedImg)["results"])
        else:
            print(
                "please use an object detection model--can only perform two stage with bounding box results"
            )

        return results

    def upload_dataset(
        self,
        dataset_path: str,
        project_name: str,
        num_workers: int = 10,
        dataset_format: str = "yolov8",
        project_license: str = "MIT",
        project_type: str = "object-detection",
    ):
        """
        Upload a dataset to Roboflow.

        Args:
            dataset_path (str): path to the dataset
            project_name (str): name of the project
            num_workers (int): number of workers to use for parallel uploads
            dataset_format (str): format of the dataset (`voc`, `yolov8`, `yolov5`)
            project_license (str): license of the project (set to `private` for private projects, only available for paid customers)
            project_type (str): type of the project (only `object-detection` is supported)
        """
        if project_type != "object-detection":
            raise ("upload_dataset only supported for object-detection projects")

        if dataset_format not in ["voc", "yolov8", "yolov5"]:
            raise (
                "dataset_format not supported - please use voc, yolov8, yolov5. PS, you can always convert your dataset in the Roboflow UI"
            )

        # check type stuff and convert
        if dataset_format == "yolov8" or dataset_format == "yolov5":
            # convert to voc
            for split in ["train", "valid", "test"]:
                dataset = sv.DetectionDataset.from_yolo(
                    images_directory_path=dataset_path + "/" + split + "/images",
                    annotations_directory_path=dataset_path + "/" + split + "/labels",
                    data_yaml_path=dataset_path + "/data.yaml",
                )

                dataset.as_pascal_voc(
                    images_directory_path=dataset_path + "_voc" + "/" + split,
                    annotations_directory_path=dataset_path + "_voc" + "/" + split,
                )

            dataset_path = dataset_path + "_voc"

        if project_name in [p["name"] for p in self.project_list]:
            dataset_upload_project = self.project(project_name)
            print(f"Uploading to existing project {dataset_upload_project.id}")
        else:
            dataset_upload_project = self.create_project(
                project_name,
                project_license=project_license,
                annotation=project_name,
                project_type=project_type,
            )
            print(f"Created project {dataset_upload_project.id}")

        def upload_file(img_file: str, split: str):
            """
            Upload an image or annotation to a project.

            Args:
                img_file (str): path to the image
                split (str): split to which the the image should be added (train, valid, test)
            """
            label_file = img_file.replace(".jpg", ".xml")
            dataset_upload_project.upload(
                image_path=img_file, annotation_path=label_file, split=split
            )

        def parallel_upload(file_list, split):
            with concurrent.futures.ThreadPoolExecutor(
                max_workers=num_workers
            ) as executor:
                list(
                    tqdm(
                        executor.map(upload_file, file_list, [split] * len(file_list)),
                        total=len(file_list),
                        file=sys.stdout,
                    )
                )

        write_line("uploading training set...")
        file_list = glob.glob(dataset_path + "/train/*.jpg")
        parallel_upload(file_list, "train")

        write_line("uploading validation set...")
        file_list = glob.glob(dataset_path + "/valid/*.jpg")
        parallel_upload(file_list, "valid")

        write_line("uploading test set...")
        file_list = glob.glob(dataset_path + "/test/*.jpg")
        parallel_upload(file_list, "test")

    def active_learning(
        self,
        raw_data_location: str = "",
        raw_data_extension: str = "",
        inference_endpoint: list = [],
        upload_destination: str = "",
        conditionals: dict = {},
        use_localhost: bool = False,
    ) -> str:
        """perform inference on each image in directory and upload based on conditions
        @params:
            raw_data_location: (str) = folder of frames to be processed
            raw_data_extension: (str) = extension of frames to be processed
            inference_endpoint: (List[str, int]) = name of the project
            upload_destination: (str) = name of the upload project
            conditionals: (dict) = dictionary of upload conditions
            use_localhost: (bool) = determines if local http format used or remote endpoint
        """
        prediction_results = []

        # ensure that all fields of conditionals have a key:value pair
        conditionals["target_classes"] = (
            []
            if "target_classes" not in conditionals
            else conditionals["target_classes"]
        )
        conditionals["confidence_interval"] = (
            [30, 99]
            if "confidence_interval" not in conditionals
            else conditionals["confidence_interval"]
        )
        conditionals["required_class_variance_count"] = (
            1
            if "required_class_variance_count" not in conditionals
            else conditionals["required_class_variance_count"]
        )
        conditionals["required_objects_count"] = (
            1
            if "required_objects_count" not in conditionals
            else conditionals["required_objects_count"]
        )
        conditionals["required_class_count"] = (
            0
            if "required_class_count" not in conditionals
            else conditionals["required_class_count"]
        )
        conditionals["minimum_size_requirement"] = (
            float("-inf")
            if "minimum_size_requirement" not in conditionals
            else conditionals["minimum_size_requirement"]
        )
        conditionals["maximum_size_requirement"] = (
            float("inf")
            if "maximum_size_requirement" not in conditionals
            else conditionals["maximum_size_requirement"]
        )

        # check if inference_model references endpoint or local
        local = "http://localhost:9001/" if use_localhost else None

        inference_model = (
            self.project(inference_endpoint[0])
            .version(version_number=inference_endpoint[1], local=local)
            .model
        )
        upload_project = self.project(upload_destination)

        print("inference reference point: ", inference_model)
        print("upload destination: ", upload_project)

        # check if raw data type is cv2 frame
        if type(raw_data_location is type(ndarray)):
            globbed_files = [raw_data_location]
        else:
            globbed_files = glob.glob(raw_data_location + "/*" + raw_data_extension)

        image1 = globbed_files[0]
        similarity_timeout_counter = 0

        for index, image in enumerate(globbed_files):
            try:
                print(
                    "*** Processing image ["
                    + str(index + 1)
                    + "/"
                    + str(len(globbed_files))
                    + "] - "
                    + image
                    + " ***"
                )
            except:
                pass

            if "similarity_confidence_threshold" in conditionals.keys():
                image2 = image
                # measure the similarity of two images using CLIP (hits an endpoint hosted by Roboflow)
                similarity = clip_encode(image1, image2, CLIP_FEATURIZE_URL)
                similarity_timeout_counter += 1

                if (
                    similarity <= conditionals["similarity_confidence_threshold"]
                    or similarity_timeout_counter
                    == conditionals["similarity_timeout_limit"]
                ):
                    image1 = image
                    similarity_timeout_counter = 0
                else:
                    print(image2 + " --> similarity too high to --> " + image1)
                    continue  # skip this image if too similar or counter hits limit

            predictions = inference_model.predict(image).json()["predictions"]
            # collect all predictions to return to user at end
            prediction_results.append({"image": image, "predictions": predictions})

            # compare object and class count of predictions if enabled, continue if not enough occurances
            if not count_comparisons(
                predictions,
                conditionals["required_objects_count"],
                conditionals["required_class_count"],
                conditionals["target_classes"],
            ):
                print(" [X] image failed count cases")
                continue

            # iterate through all predictions
            for i, prediction in enumerate(predictions):
                print(i)

                # check if box size of detection fits requirements
                if not check_box_size(
                    prediction,
                    conditionals["minimum_size_requirement"],
                    conditionals["maximum_size_requirement"],
                ):
                    print(" [X] prediction failed box size cases")
                    continue

                # compare confidence of detected object to confidence thresholds
                # confidence comes in as a .XXX instead of XXX%
                if (
                    prediction["confidence"] * 100
                    >= conditionals["confidence_interval"][0]
                    and prediction["confidence"] * 100
                    <= conditionals["confidence_interval"][1]
                ):
                    # filter out non-target_class uploads if enabled
                    if (
                        len(conditionals["target_classes"]) > 0
                        and prediction["class"] not in conditionals["target_classes"]
                    ):
                        print(" [X] prediction failed target_classes")
                        continue

                    # upload on success!
                    print(" >> image uploaded!")
                    upload_project.upload(image, num_retry_uploads=3)
                    break

        # return predictions with filenames if globbed images from dir, otherwise return latest prediction result
        return (
            prediction_results
            if type(raw_data_location) is not ndarray
            else prediction_results[-1]["predictions"]
        )

    def __str__(self):
        projects = self.projects()
        json_value = {"name": self.name, "url": self.url, "projects": projects}

        return json.dumps(json_value, indent=2)

active_learning(raw_data_location='', raw_data_extension='', inference_endpoint=[], upload_destination='', conditionals={}, use_localhost=False)

perform inference on each image in directory and upload based on conditions @params: raw_data_location: (str) = folder of frames to be processed raw_data_extension: (str) = extension of frames to be processed inference_endpoint: (List[str, int]) = name of the project upload_destination: (str) = name of the upload project conditionals: (dict) = dictionary of upload conditions use_localhost: (bool) = determines if local http format used or remote endpoint

Source code in roboflow/core/workspace.py
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
def active_learning(
    self,
    raw_data_location: str = "",
    raw_data_extension: str = "",
    inference_endpoint: list = [],
    upload_destination: str = "",
    conditionals: dict = {},
    use_localhost: bool = False,
) -> str:
    """perform inference on each image in directory and upload based on conditions
    @params:
        raw_data_location: (str) = folder of frames to be processed
        raw_data_extension: (str) = extension of frames to be processed
        inference_endpoint: (List[str, int]) = name of the project
        upload_destination: (str) = name of the upload project
        conditionals: (dict) = dictionary of upload conditions
        use_localhost: (bool) = determines if local http format used or remote endpoint
    """
    prediction_results = []

    # ensure that all fields of conditionals have a key:value pair
    conditionals["target_classes"] = (
        []
        if "target_classes" not in conditionals
        else conditionals["target_classes"]
    )
    conditionals["confidence_interval"] = (
        [30, 99]
        if "confidence_interval" not in conditionals
        else conditionals["confidence_interval"]
    )
    conditionals["required_class_variance_count"] = (
        1
        if "required_class_variance_count" not in conditionals
        else conditionals["required_class_variance_count"]
    )
    conditionals["required_objects_count"] = (
        1
        if "required_objects_count" not in conditionals
        else conditionals["required_objects_count"]
    )
    conditionals["required_class_count"] = (
        0
        if "required_class_count" not in conditionals
        else conditionals["required_class_count"]
    )
    conditionals["minimum_size_requirement"] = (
        float("-inf")
        if "minimum_size_requirement" not in conditionals
        else conditionals["minimum_size_requirement"]
    )
    conditionals["maximum_size_requirement"] = (
        float("inf")
        if "maximum_size_requirement" not in conditionals
        else conditionals["maximum_size_requirement"]
    )

    # check if inference_model references endpoint or local
    local = "http://localhost:9001/" if use_localhost else None

    inference_model = (
        self.project(inference_endpoint[0])
        .version(version_number=inference_endpoint[1], local=local)
        .model
    )
    upload_project = self.project(upload_destination)

    print("inference reference point: ", inference_model)
    print("upload destination: ", upload_project)

    # check if raw data type is cv2 frame
    if type(raw_data_location is type(ndarray)):
        globbed_files = [raw_data_location]
    else:
        globbed_files = glob.glob(raw_data_location + "/*" + raw_data_extension)

    image1 = globbed_files[0]
    similarity_timeout_counter = 0

    for index, image in enumerate(globbed_files):
        try:
            print(
                "*** Processing image ["
                + str(index + 1)
                + "/"
                + str(len(globbed_files))
                + "] - "
                + image
                + " ***"
            )
        except:
            pass

        if "similarity_confidence_threshold" in conditionals.keys():
            image2 = image
            # measure the similarity of two images using CLIP (hits an endpoint hosted by Roboflow)
            similarity = clip_encode(image1, image2, CLIP_FEATURIZE_URL)
            similarity_timeout_counter += 1

            if (
                similarity <= conditionals["similarity_confidence_threshold"]
                or similarity_timeout_counter
                == conditionals["similarity_timeout_limit"]
            ):
                image1 = image
                similarity_timeout_counter = 0
            else:
                print(image2 + " --> similarity too high to --> " + image1)
                continue  # skip this image if too similar or counter hits limit

        predictions = inference_model.predict(image).json()["predictions"]
        # collect all predictions to return to user at end
        prediction_results.append({"image": image, "predictions": predictions})

        # compare object and class count of predictions if enabled, continue if not enough occurances
        if not count_comparisons(
            predictions,
            conditionals["required_objects_count"],
            conditionals["required_class_count"],
            conditionals["target_classes"],
        ):
            print(" [X] image failed count cases")
            continue

        # iterate through all predictions
        for i, prediction in enumerate(predictions):
            print(i)

            # check if box size of detection fits requirements
            if not check_box_size(
                prediction,
                conditionals["minimum_size_requirement"],
                conditionals["maximum_size_requirement"],
            ):
                print(" [X] prediction failed box size cases")
                continue

            # compare confidence of detected object to confidence thresholds
            # confidence comes in as a .XXX instead of XXX%
            if (
                prediction["confidence"] * 100
                >= conditionals["confidence_interval"][0]
                and prediction["confidence"] * 100
                <= conditionals["confidence_interval"][1]
            ):
                # filter out non-target_class uploads if enabled
                if (
                    len(conditionals["target_classes"]) > 0
                    and prediction["class"] not in conditionals["target_classes"]
                ):
                    print(" [X] prediction failed target_classes")
                    continue

                # upload on success!
                print(" >> image uploaded!")
                upload_project.upload(image, num_retry_uploads=3)
                break

    # return predictions with filenames if globbed images from dir, otherwise return latest prediction result
    return (
        prediction_results
        if type(raw_data_location) is not ndarray
        else prediction_results[-1]["predictions"]
    )

clip_compare(dir='', image_ext='.png', target_image='')

Compare all images in a directory to a target image using CLIP

Parameters:

Name Type Description Default
dir str

name reference to a directory of images for comparison

''
image_ext str

file format for expected images (don't include the . before the file type name)

'.png'
target_image str

name reference for target image to compare individual images from directory against

''

Returns:

Name Type Description
dict dict

a key:value mapping of image_name:comparison_score_to_target

Source code in roboflow/core/workspace.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def clip_compare(
    self, dir: str = "", image_ext: str = ".png", target_image: str = ""
) -> dict:
    """
    Compare all images in a directory to a target image using CLIP

    Args:
        dir (str): name reference to a directory of images for comparison
        image_ext (str): file format for expected images (don't include the . before the file type name)
        target_image (str): name reference for target image to compare individual images from directory against

    Returns:
        dict: a key:value mapping of image_name:comparison_score_to_target
    """

    # list to store comparison results in
    comparisons = []
    # grab all images in a given directory with ext type
    for image in glob.glob(f"./{dir}/*{image_ext}"):
        # compare image
        similarity = clip_encode(image, target_image)
        # map image name to similarity score
        comparisons.append({image: similarity})
        comparisons = sorted(comparisons, key=lambda item: -list(item.values())[0])
    return comparisons

create_project(project_name, project_type, project_license, annotation)

Create a project in a Roboflow workspace.

Parameters:

Name Type Description Default
project_name str

name of the project

required
project_type str

type of the project

required
project_license str

license of the project (set to private for private projects, only available for paid customers)

required
annotation str

annotation of the project

required

Returns:

Type Description

Project Object

Source code in roboflow/core/workspace.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def create_project(self, project_name, project_type, project_license, annotation):
    """
    Create a project in a Roboflow workspace.

    Args:
        project_name (str): name of the project
        project_type (str): type of the project
        project_license (str): license of the project (set to `private` for private projects, only available for paid customers)
        annotation (str): annotation of the project

    Returns:
        Project Object
    """
    data = {
        "name": project_name,
        "type": project_type,
        "license": project_license,
        "annotation": annotation,
    }

    r = requests.post(
        API_URL + "/" + self.url + "/projects?api_key=" + self.__api_key, json=data
    )

    r.raise_for_status()

    if "error" in r.json().keys():
        raise RuntimeError(r.json()["error"])

    return self.project(r.json()["id"].split("/")[-1])

list_projects()

Print all projects in the workspace to the console.

Source code in roboflow/core/workspace.py
46
47
48
49
50
def list_projects(self):
    """
    Print all projects in the workspace to the console.
    """
    print(self.project_list)

project(project_name)

Retrieve a Project() object that represents a project in the workspace.

This object can be used to retrieve the model through which to run inference.

Parameters:

Name Type Description Default
project_name str

name of the project

required

Returns:

Type Description

Project Object

Source code in roboflow/core/workspace.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def project(self, project_name):
    """
    Retrieve a Project() object that represents a project in the workspace.

    This object can be used to retrieve the model through which to run inference.

    Args:
        project_name (str): name of the project

    Returns:
        Project Object
    """
    sys.stdout.write("\r" + "loading Roboflow project...")
    sys.stdout.write("\n")
    sys.stdout.flush()

    if self.__api_key in DEMO_KEYS:
        return Project(self.__api_key, {}, self.model_format)

    project_name = project_name.replace(self.url + "/", "")

    if "/" in project_name:
        raise RuntimeError(
            "The {} project is not available in this ({}) workspace".format(
                project_name, self.url
            )
        )

    dataset_info = requests.get(
        API_URL + "/" + self.url + "/" + project_name + "?api_key=" + self.__api_key
    )

    # Throw error if dataset isn't valid/user doesn't have permissions to access the dataset
    if dataset_info.status_code != 200:
        raise RuntimeError(dataset_info.text)

    dataset_info = dataset_info.json()["project"]

    return Project(self.__api_key, dataset_info, self.model_format)

projects()

Retrieve all projects in the workspace.

Returns:

Type Description

List of Project objects.

Source code in roboflow/core/workspace.py
52
53
54
55
56
57
58
59
60
61
62
63
64
def projects(self):
    """
    Retrieve all projects in the workspace.

    Returns:
        List of Project objects.
    """
    projects_array = []
    for a_project in self.project_list:
        proj = Project(self.__api_key, a_project, self.model_format)
        projects_array.append(proj.id)

    return projects_array

two_stage(image='', first_stage_model_name='', first_stage_model_version=0, second_stage_model_name='', second_stage_model_version=0)

For each prediction in a first stage detection, perform detection with the second stage model

Parameters:

Name Type Description Default
image str

name of the image to be processed

''
first_stage_model_name str

name of the first stage detection model

''
first_stage_model_version int

version number for the first stage model

0
second_stage_mode str

name of the second stage detection model

required
second_stage_model_version int

version number for the second stage model

0

Returns:

Name Type Description
dict dict

a json obj containing the results of the second stage detection

Source code in roboflow/core/workspace.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def two_stage(
    self,
    image: str = "",
    first_stage_model_name: str = "",
    first_stage_model_version: int = 0,
    second_stage_model_name: str = "",
    second_stage_model_version: int = 0,
) -> dict:
    """
    For each prediction in a first stage detection, perform detection with the second stage model

    Args:
        image (str): name of the image to be processed
        first_stage_model_name (str): name of the first stage detection model
        first_stage_model_version (int): version number for the first stage model
        second_stage_mode (str): name of the second stage detection model
        second_stage_model_version (int): version number for the second stage model

    Returns:
        dict: a json obj containing the results of the second stage detection
    """
    results = []

    # create PIL image for cropping
    pil_image = Image.open(image).convert("RGB")

    # grab first and second stage model from project
    stage_one_project = self.project(first_stage_model_name)
    stage_one_model = stage_one_project.version(first_stage_model_version).model
    stage_two_project = self.project(second_stage_model_name)
    stage_two_model = stage_two_project.version(second_stage_model_version).model

    print(self.project(first_stage_model_name))

    # perform first inference
    predictions = stage_one_model.predict(image)

    if (
        stage_one_project.type == "object-detection"
        and stage_two_project == "classification"
    ):
        # interact with each detected object from stage one inference results
        for boundingbox in predictions:
            # rip bounding box coordinates from json1
            # note: infer returns center points of box as (x,y) and width, height
            # ----- but pillow crop requires the top left and bottom right points to crop
            box = (
                boundingbox["x"] - boundingbox["width"] / 2,
                boundingbox["y"] - boundingbox["height"] / 2,
                boundingbox["x"] + boundingbox["width"] / 2,
                boundingbox["y"] + boundingbox["height"] / 2,
            )

            # create a new cropped image using the first stage prediction coordinates (for each box!)
            croppedImg = pil_image.crop(box)
            croppedImg.save("./temp.png")

            # capture results of second stage inference from cropped image
            results.append(stage_two_model.predict("./temp.png")[0])

        # delete the written image artifact
        try:
            os.remove("./temp.png")
        except FileNotFoundError:
            print("no detections")

    else:
        print(
            "please use an object detection model for the first stage--can only perform two stage with bounding box results",
            "please use a classification model for the second stage",
        )

    return results

two_stage_ocr(image='', first_stage_model_name='', first_stage_model_version=0)

For each prediction in the first stage object detection, perform OCR as second stage.

Parameters:

Name Type Description Default
image str

name of the image to be processed

''
first_stage_model_name str

name of the first stage detection model

''
first_stage_model_version int

version number for the first stage model

0

Returns:

Name Type Description
dict dict

a json obj containing the results of the second stage detection

Source code in roboflow/core/workspace.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
def two_stage_ocr(
    self,
    image: str = "",
    first_stage_model_name: str = "",
    first_stage_model_version: int = 0,
) -> dict:
    """
    For each prediction in the first stage object detection, perform OCR as second stage.

    Args:
        image (str): name of the image to be processed
        first_stage_model_name (str): name of the first stage detection model
        first_stage_model_version (int): version number for the first stage model

    Returns:
        dict: a json obj containing the results of the second stage detection
    """
    results = []

    # create PIL image for cropping
    pil_image = Image.open(image).convert("RGB")

    # grab first and second stage model from project
    stage_one_project = self.project(first_stage_model_name)
    stage_one_model = stage_one_project.version(first_stage_model_version).model

    # perform first inference
    predictions = stage_one_model.predict(image)

    # interact with each detected object from stage one inference results
    if stage_one_project.type == "object-detection":
        for boundingbox in predictions:
            # rip bounding box coordinates from json1
            # note: infer returns center points of box as (x,y) and width, height
            # ----- but pillow crop requires the top left and bottom right points to crop
            box = (
                boundingbox["x"] - boundingbox["width"] / 2,
                boundingbox["y"] - boundingbox["height"] / 2,
                boundingbox["x"] + boundingbox["width"] / 2,
                boundingbox["y"] + boundingbox["height"] / 2,
            )

            # create a new cropped image using the first stage prediction coordinates (for each box!)
            croppedImg = pil_image.crop(box)

            # capture OCR results from cropped image
            results.append(ocr_infer(croppedImg)["results"])
    else:
        print(
            "please use an object detection model--can only perform two stage with bounding box results"
        )

    return results

upload_dataset(dataset_path, project_name, num_workers=10, dataset_format='yolov8', project_license='MIT', project_type='object-detection')

Upload a dataset to Roboflow.

Parameters:

Name Type Description Default
dataset_path str

path to the dataset

required
project_name str

name of the project

required
num_workers int

number of workers to use for parallel uploads

10
dataset_format str

format of the dataset (voc, yolov8, yolov5)

'yolov8'
project_license str

license of the project (set to private for private projects, only available for paid customers)

'MIT'
project_type str

type of the project (only object-detection is supported)

'object-detection'
Source code in roboflow/core/workspace.py
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
def upload_dataset(
    self,
    dataset_path: str,
    project_name: str,
    num_workers: int = 10,
    dataset_format: str = "yolov8",
    project_license: str = "MIT",
    project_type: str = "object-detection",
):
    """
    Upload a dataset to Roboflow.

    Args:
        dataset_path (str): path to the dataset
        project_name (str): name of the project
        num_workers (int): number of workers to use for parallel uploads
        dataset_format (str): format of the dataset (`voc`, `yolov8`, `yolov5`)
        project_license (str): license of the project (set to `private` for private projects, only available for paid customers)
        project_type (str): type of the project (only `object-detection` is supported)
    """
    if project_type != "object-detection":
        raise ("upload_dataset only supported for object-detection projects")

    if dataset_format not in ["voc", "yolov8", "yolov5"]:
        raise (
            "dataset_format not supported - please use voc, yolov8, yolov5. PS, you can always convert your dataset in the Roboflow UI"
        )

    # check type stuff and convert
    if dataset_format == "yolov8" or dataset_format == "yolov5":
        # convert to voc
        for split in ["train", "valid", "test"]:
            dataset = sv.DetectionDataset.from_yolo(
                images_directory_path=dataset_path + "/" + split + "/images",
                annotations_directory_path=dataset_path + "/" + split + "/labels",
                data_yaml_path=dataset_path + "/data.yaml",
            )

            dataset.as_pascal_voc(
                images_directory_path=dataset_path + "_voc" + "/" + split,
                annotations_directory_path=dataset_path + "_voc" + "/" + split,
            )

        dataset_path = dataset_path + "_voc"

    if project_name in [p["name"] for p in self.project_list]:
        dataset_upload_project = self.project(project_name)
        print(f"Uploading to existing project {dataset_upload_project.id}")
    else:
        dataset_upload_project = self.create_project(
            project_name,
            project_license=project_license,
            annotation=project_name,
            project_type=project_type,
        )
        print(f"Created project {dataset_upload_project.id}")

    def upload_file(img_file: str, split: str):
        """
        Upload an image or annotation to a project.

        Args:
            img_file (str): path to the image
            split (str): split to which the the image should be added (train, valid, test)
        """
        label_file = img_file.replace(".jpg", ".xml")
        dataset_upload_project.upload(
            image_path=img_file, annotation_path=label_file, split=split
        )

    def parallel_upload(file_list, split):
        with concurrent.futures.ThreadPoolExecutor(
            max_workers=num_workers
        ) as executor:
            list(
                tqdm(
                    executor.map(upload_file, file_list, [split] * len(file_list)),
                    total=len(file_list),
                    file=sys.stdout,
                )
            )

    write_line("uploading training set...")
    file_list = glob.glob(dataset_path + "/train/*.jpg")
    parallel_upload(file_list, "train")

    write_line("uploading validation set...")
    file_list = glob.glob(dataset_path + "/valid/*.jpg")
    parallel_upload(file_list, "valid")

    write_line("uploading test set...")
    file_list = glob.glob(dataset_path + "/test/*.jpg")
    parallel_upload(file_list, "test")