diff --git a/requirements.txt b/requirements.txt index a2c507f8..0f53a6ad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ python-dateutil python-dotenv requests six +supervision urllib3>=1.26.6 wget tqdm>=4.41.0 diff --git a/roboflow/__init__.py b/roboflow/__init__.py index 8aee5613..e72e4b95 100644 --- a/roboflow/__init__.py +++ b/roboflow/__init__.py @@ -12,7 +12,7 @@ from roboflow.core.workspace import Workspace from roboflow.util.general import write_line -__version__ = "1.0.9" +__version__ = "1.1.0" def check_key(api_key, model, notebook, num_retries=0): diff --git a/roboflow/core/project.py b/roboflow/core/project.py index 4ac41f32..c0b45618 100644 --- a/roboflow/core/project.py +++ b/roboflow/core/project.py @@ -361,7 +361,7 @@ def __annotation_upload( # check if annotation file exists elif os.path.exists(annotation_path): - print("-> found given annotation file") + # print("-> found given annotation file") annotation_string = open(annotation_path, "r").read() # if not annotation file, check if user wants to upload regular as classification annotation diff --git a/roboflow/core/workspace.py b/roboflow/core/workspace.py index 777b5106..2a80c232 100644 --- a/roboflow/core/workspace.py +++ b/roboflow/core/workspace.py @@ -1,11 +1,16 @@ +import concurrent.futures import glob import json import os +import random +import re import sys import requests +import supervision as sv from numpy import ndarray from PIL import Image +from tqdm import tqdm from roboflow.config import API_URL, CLIP_FEATURIZE_URL, DEMO_KEYS from roboflow.core.project import Project @@ -15,6 +20,7 @@ count_comparisons, ) from roboflow.util.clip_compare_utils import clip_encode +from roboflow.util.general import write_line from roboflow.util.two_stage_utils import ocr_infer @@ -242,6 +248,79 @@ def two_stage_ocr( return results + def upload_dataset( + self, + dataset_path, + project_name, + num_workers=10, + dataset_format="yolov8", + project_license="MIT", + project_type="object-detection", + ): + if project_type != "object-detection": + raise ("upload_dataset only supported for object-detection projects") + + if dataset_format not in ["voc", "yolov8", "yolov5"]: + raise ( + "dataset_format not supported - please use voc, yolov8, yolov5. PS, you can always convert your dataset in the Roboflow UI" + ) + + # check type stuff and convert + if dataset_format == "yolov8" or dataset_format == "yolov5": + # convert to voc + for split in ["train", "valid", "test"]: + dataset = sv.DetectionDataset.from_yolo( + images_directory_path=dataset_path + "/" + split + "/images", + annotations_directory_path=dataset_path + "/" + split + "/labels", + data_yaml_path=dataset_path + "/data.yaml", + ) + + dataset.as_pascal_voc( + images_directory_path=dataset_path + "_voc" + "/" + split, + annotations_directory_path=dataset_path + "_voc" + "/" + split, + ) + + dataset_path = dataset_path + "_voc" + + if project_name in self.project_list: + dataset_upload_project = self.project(project_name) + else: + dataset_upload_project = self.create_project( + project_name, + project_license=project_license, + annotation=project_name, + project_type=project_type, + ) + + def upload_file(img_file, split): + label_file = img_file.replace(".jpg", ".xml") + dataset_upload_project.upload( + image_path=img_file, annotation_path=label_file, split=split + ) + + def parallel_upload(file_list, split): + with concurrent.futures.ThreadPoolExecutor( + max_workers=num_workers + ) as executor: + list( + tqdm( + executor.map(upload_file, file_list, [split] * len(file_list)), + total=len(file_list), + ) + ) + + write_line("uploading training set...") + file_list = glob.glob(dataset_path + "/train/*.jpg") + parallel_upload(file_list, "train") + + write_line("uploading validation set...") + file_list = glob.glob(dataset_path + "/valid/*.jpg") + parallel_upload(file_list, "valid") + + write_line("uploading test set...") + file_list = glob.glob(dataset_path + "/test/*.jpg") + parallel_upload(file_list, "test") + def active_learning( self, raw_data_location: str = "",