diff --git a/roboflow/__init__.py b/roboflow/__init__.py index ad2972ba..bf1fca79 100644 --- a/roboflow/__init__.py +++ b/roboflow/__init__.py @@ -15,7 +15,7 @@ from roboflow.models import CLIPModel, GazeModel # noqa: F401 from roboflow.util.general import write_line -__version__ = "1.1.44" +__version__ = "1.1.45" def check_key(api_key, model, notebook, num_retries=0): diff --git a/roboflow/roboflowpy.py b/roboflow/roboflowpy.py index 025b4391..86e00832 100755 --- a/roboflow/roboflowpy.py +++ b/roboflow/roboflowpy.py @@ -47,7 +47,8 @@ def download(args): def import_dataset(args): - rf = roboflow.Roboflow() + api_key = load_roboflow_api_key(args.workspace) + rf = roboflow.Roboflow(api_key) workspace = rf.workspace(args.workspace) workspace.upload_dataset( dataset_path=args.folder, diff --git a/roboflow/util/folderparser.py b/roboflow/util/folderparser.py index 50ec9e16..bf469e84 100644 --- a/roboflow/util/folderparser.py +++ b/roboflow/util/folderparser.py @@ -8,7 +8,7 @@ from .image_utils import load_labelmap IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp"} -ANNOTATION_EXTENSIONS = {".txt", ".json", ".xml", ".csv"} +ANNOTATION_EXTENSIONS = {".txt", ".json", ".xml", ".csv", ".jsonl"} LABELMAPS_EXTENSIONS = {".labels", ".yaml", ".yml"} @@ -107,13 +107,14 @@ def _map_annotations_to_images_1tomany(images, annotationFiles): dirname = image["dirname"] annotationsInSameDir = annotationsByDirname.get(dirname, []) if annotationsInSameDir: - if len(annotationsInSameDir) > 1: - print(f"warning: found multiple annotation files on dir {dirname}") - annotationFile = annotationsInSameDir[0] - format = annotationFile["parsedType"] - image["annotationfile"] = _filterIndividualAnnotations( - image, annotationFile, format, imgRefMap, annotationMap - ) + for annotationFile in annotationsInSameDir: + format = annotationFile["parsedType"] + filtered_annotations = _filterIndividualAnnotations( + image, annotationFile, format, imgRefMap, annotationMap + ) + if filtered_annotations: + image["annotationfile"] = filtered_annotations + break def _build_image_and_annotation_maps(annotationFiles): @@ -182,11 +183,16 @@ def _filterIndividualAnnotations(image, annotation, format, imgRefMap, annotatio return _annotation else: return None + elif format == "jsonl": + jsonlLines = [json.dumps(line) for line in parsed if line["image"] == image["name"]] + if jsonlLines: + _annotation = {"name": "annotation.jsonl", "rawText": "\n".join(jsonlLines)} + return _annotation return None def _loadAnnotations(folder, annotations): - valid_extensions = {".json", ".csv"} + valid_extensions = {".json", ".csv", ".jsonl"} annotations = [a for a in annotations if a["extension"] in valid_extensions] for ann in annotations: extension = ann["extension"] @@ -197,12 +203,29 @@ def _loadAnnotations(folder, annotations): if parsedType: ann["parsed"] = parsed ann["parsedType"] = parsedType + elif extension == ".jsonl": + ann["parsed"] = _read_jsonl(f"{folder}{ann['file']}") + ann["parsedType"] = "jsonl" elif extension == ".csv": ann["parsedType"] = "csv" ann["parsed"] = _parseAnnotationCSV(f"{folder}{ann['file']}") return annotations +def _read_jsonl(path): + data = [] + with open(path) as file: + for linenum, line in enumerate(file, 1): + if not line: + continue + try: + json_object = json.loads(line.strip()) + data.append(json_object) + except json.JSONDecodeError: + print(f"Warning: Skipping invalid JSON line in {path}:{linenum}") + return data + + def _parseAnnotationCSV(filename): # TODO: use a proper CSV library? with open(filename) as f: diff --git a/tests/datasets/paligemma/README.dataset.txt b/tests/datasets/paligemma/README.dataset.txt new file mode 100644 index 00000000..cb36ebf0 --- /dev/null +++ b/tests/datasets/paligemma/README.dataset.txt @@ -0,0 +1,5 @@ +# ChartQA > 2024-08-28 7:21pm +https://universe.roboflow.com/roboflow-jvuqo/chartqa-c9zny + +Provided by a Roboflow user +License: CC BY 4.0 diff --git a/tests/datasets/paligemma/dataset/5964b4c268577652f171d52dc317d82d_png.rf.5bf49f8aa575f586001710b1d79968fd.jpg b/tests/datasets/paligemma/dataset/5964b4c268577652f171d52dc317d82d_png.rf.5bf49f8aa575f586001710b1d79968fd.jpg new file mode 100644 index 00000000..bfb91196 Binary files /dev/null and b/tests/datasets/paligemma/dataset/5964b4c268577652f171d52dc317d82d_png.rf.5bf49f8aa575f586001710b1d79968fd.jpg differ diff --git a/tests/datasets/paligemma/dataset/5e2369e237c0c612d09181b63fb20480_png.rf.5be427175f28f7042e34636bd0dd89cc.jpg b/tests/datasets/paligemma/dataset/5e2369e237c0c612d09181b63fb20480_png.rf.5be427175f28f7042e34636bd0dd89cc.jpg new file mode 100644 index 00000000..739a8029 Binary files /dev/null and b/tests/datasets/paligemma/dataset/5e2369e237c0c612d09181b63fb20480_png.rf.5be427175f28f7042e34636bd0dd89cc.jpg differ diff --git a/tests/datasets/paligemma/dataset/63a6c783083d5c7c7290bc81877a4ee9_png.rf.5c02d037f48bc3df56e6d0e3e6e053e4.jpg b/tests/datasets/paligemma/dataset/63a6c783083d5c7c7290bc81877a4ee9_png.rf.5c02d037f48bc3df56e6d0e3e6e053e4.jpg new file mode 100644 index 00000000..2556733a Binary files /dev/null and b/tests/datasets/paligemma/dataset/63a6c783083d5c7c7290bc81877a4ee9_png.rf.5c02d037f48bc3df56e6d0e3e6e053e4.jpg differ diff --git a/tests/datasets/paligemma/dataset/900e8ea2a3c336686c23978e800af239_png.rf.011b80b16a622820ca92b91543f5a44d.jpg b/tests/datasets/paligemma/dataset/900e8ea2a3c336686c23978e800af239_png.rf.011b80b16a622820ca92b91543f5a44d.jpg new file mode 100644 index 00000000..c37579e0 Binary files /dev/null and b/tests/datasets/paligemma/dataset/900e8ea2a3c336686c23978e800af239_png.rf.011b80b16a622820ca92b91543f5a44d.jpg differ diff --git a/tests/datasets/paligemma/dataset/_annotations.test.jsonl b/tests/datasets/paligemma/dataset/_annotations.test.jsonl new file mode 100644 index 00000000..6a4f765a --- /dev/null +++ b/tests/datasets/paligemma/dataset/_annotations.test.jsonl @@ -0,0 +1,9 @@ +{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"What region in Italy had the highest number of mafia crimes in 2018?","suffix":"Calabria"} +{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"How many criminal reports were recorded in the region of Calabria in 2018?","suffix":"896"} +{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"What region in Italy had the highest number of mafia crimes in 2018?","suffix":"Calabria"} +{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"How many criminal reports were recorded in the region of Calabria in 2018?","suffix":"896"} +{"image":"de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg","prefix":"Which sector had the highest ROI in 2013?","suffix":"Retail"} +{"image":"de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg","prefix":"Which sector had the highest ROI in 2014?","suffix":"Electronics"} +{"image":"e1893eee3f64bda1eac88da795ad3a00_png.rf.01248d761c27015da1fa5f3c4daea759.jpg","prefix":"How much did Hermes' national general cargo revenue add up to in 2009?","suffix":"100"} +{"image":"e1893eee3f64bda1eac88da795ad3a00_png.rf.01248d761c27015da1fa5f3c4daea759.jpg","prefix":"How much did Hermes' national general cargo revenue add up to in 2009?","suffix":"100"} +{"image":"eaab023f1ce380c4c9163415facc3c0d_png.rf.01c5a1f19653c056bbb3b0c8fc2d752d.jpg","prefix":"What's the percentage value of leftmost bar?","suffix":"24"} diff --git a/tests/datasets/paligemma/dataset/_annotations.train.jsonl b/tests/datasets/paligemma/dataset/_annotations.train.jsonl new file mode 100644 index 00000000..ecc9b17c --- /dev/null +++ b/tests/datasets/paligemma/dataset/_annotations.train.jsonl @@ -0,0 +1,4 @@ +{"image":"63a6c783083d5c7c7290bc81877a4ee9_png.rf.5c02d037f48bc3df56e6d0e3e6e053e4.jpg","prefix":"How many research and public policy oriented organizations were there among the registered environmental and conservation organizations in the United States in 2005?","suffix":"372"} +{"image":"63a6c783083d5c7c7290bc81877a4ee9_png.rf.5c02d037f48bc3df56e6d0e3e6e053e4.jpg","prefix":"How many research and public policy oriented organizations were there among the registered environmental and conservation organizations in the United States in 2005?","suffix":"372"} +{"image":"5964b4c268577652f171d52dc317d82d_png.rf.5bf49f8aa575f586001710b1d79968fd.jpg","prefix":"What was the crude birth rate in Costa Rica in 2019?","suffix":"13.69"} +{"image":"5964b4c268577652f171d52dc317d82d_png.rf.5bf49f8aa575f586001710b1d79968fd.jpg","prefix":"What was the crude birth rate in Costa Rica in 2019?","suffix":"13.69"} diff --git a/tests/datasets/paligemma/dataset/_annotations.valid.jsonl b/tests/datasets/paligemma/dataset/_annotations.valid.jsonl new file mode 100644 index 00000000..33f50e18 --- /dev/null +++ b/tests/datasets/paligemma/dataset/_annotations.valid.jsonl @@ -0,0 +1,3 @@ +{"image":"fa68474f5b30c3d647ec1f5cddf41570_png.rf.000949c9aafeb8c594a936a0ef92993f.jpg","prefix":"How many murders and manslaughters were recorded by the Belgian police in 2020?","suffix":"874"} +{"image":"fa68474f5b30c3d647ec1f5cddf41570_png.rf.000949c9aafeb8c594a936a0ef92993f.jpg","prefix":"How many murders and manslaughters were recorded by the Belgian police in 2020?","suffix":"874"} +{"image":"aca6fd05e9b2830518288ba082aa6f76_png.rf.001543e209328197472f6587dfa8a6d6.jpg","prefix":"What was the unemployment rate in Chile in 2020?","suffix":"11.51"} diff --git a/tests/datasets/paligemma/dataset/de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg b/tests/datasets/paligemma/dataset/de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg new file mode 100644 index 00000000..5df4e022 Binary files /dev/null and b/tests/datasets/paligemma/dataset/de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg differ diff --git a/tests/datasets/paligemma/dataset/de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg b/tests/datasets/paligemma/dataset/de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg new file mode 100644 index 00000000..0c68ef67 Binary files /dev/null and b/tests/datasets/paligemma/dataset/de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg differ diff --git a/tests/datasets/paligemma/dataset/fa68474f5b30c3d647ec1f5cddf41570_png.rf.000949c9aafeb8c594a936a0ef92993f.jpg b/tests/datasets/paligemma/dataset/fa68474f5b30c3d647ec1f5cddf41570_png.rf.000949c9aafeb8c594a936a0ef92993f.jpg new file mode 100644 index 00000000..7c80696e Binary files /dev/null and b/tests/datasets/paligemma/dataset/fa68474f5b30c3d647ec1f5cddf41570_png.rf.000949c9aafeb8c594a936a0ef92993f.jpg differ diff --git a/tests/manual/debugme.py b/tests/manual/debugme.py index 3fb4617a..762dc225 100644 --- a/tests/manual/debugme.py +++ b/tests/manual/debugme.py @@ -41,6 +41,7 @@ # f"import {thisdir}/data/cultura-pepino-yolov8_voc -w wolfodorpythontests -p yellow-auto -c 100".split() # noqa: E501 // docs # f"import {thisdir}/data/cultura-pepino-yolov5pytorch -w wolfodorpythontests -p yellow-auto -c 100 -n papaiasso".split() # noqa: E501 // docs # f"import {thisdir}/../datasets/mosquitos -w wolfodorpythontests -p yellow-auto -n papaiasso".split() # noqa: E501 // docs - f"deployment list".split() # noqa: E501 // docs + # f"deployment list".split() # noqa: E501 // docs + f"import -w tonyprivate -p meh-plvrv {thisdir}/../datasets/paligemma/".split() # noqa: E501 // docs ) args.func(args) diff --git a/tests/manual/uselocal b/tests/manual/uselocal index 644f5f6c..8c8bf9ca 100644 --- a/tests/manual/uselocal +++ b/tests/manual/uselocal @@ -1,5 +1,8 @@ #!/bin/env bash -cp data/.config-staging data/.config +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cp $SCRIPT_DIR/data/.config-staging $SCRIPT_DIR/data/.config export API_URL=https://localhost.roboflow.one export APP_URL=https://localhost.roboflow.one +export DEDICATED_DEPLOYMENT_URL=https://staging.roboflow.cloud +export ROBOFLOW_CONFIG_DIR=$SCRIPT_DIR/data/.config # need to set it in /etc/hosts to the IP of host.docker.internal! diff --git a/tests/manual/useprod b/tests/manual/useprod index d83e3213..82bdf25a 100644 --- a/tests/manual/useprod +++ b/tests/manual/useprod @@ -1,7 +1,9 @@ #!/bin/env bash -cp data/.config-prod data/.config +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cp $SCRIPT_DIR/data/.config-prod $SCRIPT_DIR/data/.config export API_URL=https://api.roboflow.com export APP_URL=https://app.roboflow.com export OBJECT_DETECTION_URL=https://detect.roboflow.one export DEDICATED_DEPLOYMENT_URL=https://roboflow.cloud +export ROBOFLOW_CONFIG_DIR=$SCRIPT_DIR/data/.config diff --git a/tests/manual/usestaging b/tests/manual/usestaging index 1cffeb1f..aa3970f9 100644 --- a/tests/manual/usestaging +++ b/tests/manual/usestaging @@ -1,7 +1,9 @@ #!/bin/env bash -cp data/.config-staging data/.config +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cp $SCRIPT_DIR/data/.config-staging $SCRIPT_DIR/data/.config export API_URL=https://api.roboflow.one export APP_URL=https://app.roboflow.one export OBJECT_DETECTION_URL=https://lambda-object-detection.staging.roboflow.com export DEDICATED_DEPLOYMENT_URL=https://staging.roboflow.cloud +export ROBOFLOW_CONFIG_DIR=$SCRIPT_DIR/data/.config diff --git a/tests/util/test_folderparser.py b/tests/util/test_folderparser.py index f3ce2580..d4d497cb 100644 --- a/tests/util/test_folderparser.py +++ b/tests/util/test_folderparser.py @@ -52,6 +52,20 @@ def test_parse_mosquitos_csv(self): expected += "train_10308.jpeg,1058,943,japonicus/koreicus,28,187,908,815\n" assert testImage["annotationfile"]["rawText"] == expected + def test_paligemma_format(self): + folder = f"{thisdir}/../datasets/paligemma" + parsed = folderparser.parsefolder(folder) + testImagePath = "/dataset/de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg" + testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0] + assert testImage["annotationfile"]["name"] == "annotation.jsonl" + expected = ( + '{"image": "de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg",' + ' "prefix": "Which sector had the highest ROI in 2013?", "suffix": "Retail"}\n' + '{"image": "de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg",' + ' "prefix": "Which sector had the highest ROI in 2014?", "suffix": "Electronics"}' + ) + assert testImage["annotationfile"]["rawText"] == expected + def _assertJsonMatchesFile(actual, filename): with open(filename) as file: