Skip to content

Commit

Permalink
Merge pull request #152 from roboflow/dataset-upload
Browse files Browse the repository at this point in the history
Upload Dataset
  • Loading branch information
Jacobsolawetz authored Jun 21, 2023
2 parents 8694dfe + 66c4d58 commit 35f38a4
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 2 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ python-dateutil
python-dotenv
requests
six
supervision
urllib3>=1.26.6
wget
tqdm>=4.41.0
Expand Down
2 changes: 1 addition & 1 deletion roboflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from roboflow.core.workspace import Workspace
from roboflow.util.general import write_line

__version__ = "1.0.9"
__version__ = "1.1.0"


def check_key(api_key, model, notebook, num_retries=0):
Expand Down
2 changes: 1 addition & 1 deletion roboflow/core/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ def __annotation_upload(

# check if annotation file exists
elif os.path.exists(annotation_path):
print("-> found given annotation file")
# print("-> found given annotation file")
annotation_string = open(annotation_path, "r").read()

# if not annotation file, check if user wants to upload regular as classification annotation
Expand Down
79 changes: 79 additions & 0 deletions roboflow/core/workspace.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import concurrent.futures
import glob
import json
import os
import random
import re
import sys

import requests
import supervision as sv
from numpy import ndarray
from PIL import Image
from tqdm import tqdm

from roboflow.config import API_URL, CLIP_FEATURIZE_URL, DEMO_KEYS
from roboflow.core.project import Project
Expand All @@ -15,6 +20,7 @@
count_comparisons,
)
from roboflow.util.clip_compare_utils import clip_encode
from roboflow.util.general import write_line
from roboflow.util.two_stage_utils import ocr_infer


Expand Down Expand Up @@ -242,6 +248,79 @@ def two_stage_ocr(

return results

def upload_dataset(
self,
dataset_path,
project_name,
num_workers=10,
dataset_format="yolov8",
project_license="MIT",
project_type="object-detection",
):
if project_type != "object-detection":
raise ("upload_dataset only supported for object-detection projects")

if dataset_format not in ["voc", "yolov8", "yolov5"]:
raise (
"dataset_format not supported - please use voc, yolov8, yolov5. PS, you can always convert your dataset in the Roboflow UI"
)

# check type stuff and convert
if dataset_format == "yolov8" or dataset_format == "yolov5":
# convert to voc
for split in ["train", "valid", "test"]:
dataset = sv.DetectionDataset.from_yolo(
images_directory_path=dataset_path + "/" + split + "/images",
annotations_directory_path=dataset_path + "/" + split + "/labels",
data_yaml_path=dataset_path + "/data.yaml",
)

dataset.as_pascal_voc(
images_directory_path=dataset_path + "_voc" + "/" + split,
annotations_directory_path=dataset_path + "_voc" + "/" + split,
)

dataset_path = dataset_path + "_voc"

if project_name in self.project_list:
dataset_upload_project = self.project(project_name)
else:
dataset_upload_project = self.create_project(
project_name,
project_license=project_license,
annotation=project_name,
project_type=project_type,
)

def upload_file(img_file, split):
label_file = img_file.replace(".jpg", ".xml")
dataset_upload_project.upload(
image_path=img_file, annotation_path=label_file, split=split
)

def parallel_upload(file_list, split):
with concurrent.futures.ThreadPoolExecutor(
max_workers=num_workers
) as executor:
list(
tqdm(
executor.map(upload_file, file_list, [split] * len(file_list)),
total=len(file_list),
)
)

write_line("uploading training set...")
file_list = glob.glob(dataset_path + "/train/*.jpg")
parallel_upload(file_list, "train")

write_line("uploading validation set...")
file_list = glob.glob(dataset_path + "/valid/*.jpg")
parallel_upload(file_list, "valid")

write_line("uploading test set...")
file_list = glob.glob(dataset_path + "/test/*.jpg")
parallel_upload(file_list, "test")

def active_learning(
self,
raw_data_location: str = "",
Expand Down

0 comments on commit 35f38a4

Please sign in to comment.