Skip to content

Roboflow Data Loader

Bases: DataLoader

Source code in evaluations/dataloaders/roboflow.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
class RoboflowDataLoader(DataLoader):
    def __init__(
        self,
        workspace_url: str,
        project_url: str,
        project_version: int,
        image_files: str,
        model_type: str = "object-detection",
        dataset: str = "test",
    ):
        """
        Load a dataset from Roboflow. Saves the result to ./dataset/

        Args:
            workspace_url (str): The Roboflow workspace URL
            project_url (str): The Roboflow project URL
            project_version (int): The Roboflow project version
            model_type (str): The model type. Either "object-detection" or "classification"

        Returns:
            None
        """
        self.workspace_url = workspace_url
        self.project_url = project_url
        self.project_version = project_version
        self.model_type = model_type
        self.data = {}
        self.image_files = image_files
        self.dataset = dataset

        self.model = None
        self.dataset_version = None

    def download_dataset(self) -> None:
        """
        Download a dataset from Roboflow. Saves the result to ./dataset/

        Returns:
            None
        """

        roboflow.login()
        rf = roboflow.Roboflow()

        self.data = {}

        project = rf.workspace(self.workspace_url).project(self.project_url)

        self.dataset_version = project.version(self.project_version)
        self.dataset_content = self.dataset_version
        self.model = project.version(self.project_version).model

        if self.model_type == "classification":
            data_format = "folder"
        elif self.model_type == "multiclass":
            data_format = "multiclass"
        elif self.model_type == "object-detection":
            data_format = "yolov5"
        elif self.model_type == "segmentation":
            data_format = "yolov5"
        else:
            raise ValueError("Model type not supported")

        root_path = self.image_files

        # download if needed
        if not os.path.exists(root_path):
            self.dataset_version.download(data_format, root_path)

        if data_format == "yolov5":
            yaml_data = os.path.join(root_path, "data.yaml")
            if os.path.exists(yaml_data):
                # load class names map
                with open(yaml_data, "r") as file:
                    dataset_yaml = yaml.safe_load(file)
                    self.class_names = [
                        i.replace("-", " ") for i in dataset_yaml["names"]
                    ]
        elif data_format == "multiclass":
            with open(os.path.join(root_path, "valid/", "_classes.csv")) as f:
                reader = csv.reader(f)
                results = list(reader)

                class_names = results[0]

                # first item will be "filename", so we need to remove it
                self.class_names = [c.strip() for c in class_names][1:]

                self.class_names.append("background")

                for row in results[1:]:
                    self.data[os.path.join(root_path, "valid/", row[0])] = {
                        "filename": os.path.join(root_path, "valid/", row[0]),
                        "predictions": [],
                        "ground_truth": [
                            self.class_names[c - 1].strip()
                            for c in range(1, len(row))
                            if row[c].strip() == "1"
                        ],
                    }

                return self.class_names, self.data, self.model
        else:
            # class names are folder names in test/
            self.class_names = [
                name
                for name in os.listdir(os.path.join(root_path, "valid"))
                if os.path.isdir(os.path.join(root_path, "valid", name))
            ]

        for root, dirs, files in os.walk(
            self.image_files.rstrip("/") + f"/{self.dataset}/"
        ):
            for file in files:
                if file.endswith(".jpg"):
                    if self.model_type == "object-detection" or self.model_type == "segmentation":
                        ground_truth, masks = get_ground_truth_for_image(
                            os.path.join(root, file)
                        )
                        if masks != []:
                            ground_truth = masks
                    else:
                        # folder name
                        ground_truth = [os.path.basename(root)]

                    self.data[os.path.join(root, file)] = {
                        "filename": os.path.join(root, file),
                        "predictions": [],
                        "ground_truth": ground_truth,
                    }

        self.class_names.append("background")

        return self.class_names, self.data, self.model

__init__(workspace_url, project_url, project_version, image_files, model_type='object-detection', dataset='test')

Load a dataset from Roboflow. Saves the result to ./dataset/

Parameters:

Name Type Description Default
workspace_url str

The Roboflow workspace URL

required
project_url str

The Roboflow project URL

required
project_version int

The Roboflow project version

required
model_type str

The model type. Either "object-detection" or "classification"

'object-detection'

Returns:

Type Description

None

Source code in evaluations/dataloaders/roboflow.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def __init__(
    self,
    workspace_url: str,
    project_url: str,
    project_version: int,
    image_files: str,
    model_type: str = "object-detection",
    dataset: str = "test",
):
    """
    Load a dataset from Roboflow. Saves the result to ./dataset/

    Args:
        workspace_url (str): The Roboflow workspace URL
        project_url (str): The Roboflow project URL
        project_version (int): The Roboflow project version
        model_type (str): The model type. Either "object-detection" or "classification"

    Returns:
        None
    """
    self.workspace_url = workspace_url
    self.project_url = project_url
    self.project_version = project_version
    self.model_type = model_type
    self.data = {}
    self.image_files = image_files
    self.dataset = dataset

    self.model = None
    self.dataset_version = None

download_dataset()

Download a dataset from Roboflow. Saves the result to ./dataset/

Returns:

Type Description
None

None

Source code in evaluations/dataloaders/roboflow.py
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def download_dataset(self) -> None:
    """
    Download a dataset from Roboflow. Saves the result to ./dataset/

    Returns:
        None
    """

    roboflow.login()
    rf = roboflow.Roboflow()

    self.data = {}

    project = rf.workspace(self.workspace_url).project(self.project_url)

    self.dataset_version = project.version(self.project_version)
    self.dataset_content = self.dataset_version
    self.model = project.version(self.project_version).model

    if self.model_type == "classification":
        data_format = "folder"
    elif self.model_type == "multiclass":
        data_format = "multiclass"
    elif self.model_type == "object-detection":
        data_format = "yolov5"
    elif self.model_type == "segmentation":
        data_format = "yolov5"
    else:
        raise ValueError("Model type not supported")

    root_path = self.image_files

    # download if needed
    if not os.path.exists(root_path):
        self.dataset_version.download(data_format, root_path)

    if data_format == "yolov5":
        yaml_data = os.path.join(root_path, "data.yaml")
        if os.path.exists(yaml_data):
            # load class names map
            with open(yaml_data, "r") as file:
                dataset_yaml = yaml.safe_load(file)
                self.class_names = [
                    i.replace("-", " ") for i in dataset_yaml["names"]
                ]
    elif data_format == "multiclass":
        with open(os.path.join(root_path, "valid/", "_classes.csv")) as f:
            reader = csv.reader(f)
            results = list(reader)

            class_names = results[0]

            # first item will be "filename", so we need to remove it
            self.class_names = [c.strip() for c in class_names][1:]

            self.class_names.append("background")

            for row in results[1:]:
                self.data[os.path.join(root_path, "valid/", row[0])] = {
                    "filename": os.path.join(root_path, "valid/", row[0]),
                    "predictions": [],
                    "ground_truth": [
                        self.class_names[c - 1].strip()
                        for c in range(1, len(row))
                        if row[c].strip() == "1"
                    ],
                }

            return self.class_names, self.data, self.model
    else:
        # class names are folder names in test/
        self.class_names = [
            name
            for name in os.listdir(os.path.join(root_path, "valid"))
            if os.path.isdir(os.path.join(root_path, "valid", name))
        ]

    for root, dirs, files in os.walk(
        self.image_files.rstrip("/") + f"/{self.dataset}/"
    ):
        for file in files:
            if file.endswith(".jpg"):
                if self.model_type == "object-detection" or self.model_type == "segmentation":
                    ground_truth, masks = get_ground_truth_for_image(
                        os.path.join(root, file)
                    )
                    if masks != []:
                        ground_truth = masks
                else:
                    # folder name
                    ground_truth = [os.path.basename(root)]

                self.data[os.path.join(root, file)] = {
                    "filename": os.path.join(root, file),
                    "predictions": [],
                    "ground_truth": ground_truth,
                }

    self.class_names.append("background")

    return self.class_names, self.data, self.model