Skip to content

Commit

Permalink
added splits to new cards and updated split_annotators to update furt…
Browse files Browse the repository at this point in the history
…her cards to be added

Signed-off-by: dafnapension <[email protected]>
  • Loading branch information
dafnapension committed Feb 6, 2025
1 parent c747353 commit aa4eb0d
Show file tree
Hide file tree
Showing 37 changed files with 226 additions and 71 deletions.
7 changes: 6 additions & 1 deletion prepare/cards/mlsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@

for lang in langs:
card = TaskCard(
loader=LoadHF(path="mlsum", name=lang),
loader=LoadHF(
path="mlsum",
name=lang,
splits=["train", "validation", "test"],
streaming=True,
),
preprocess_steps=[
Rename(field_to_field={"text": "document"}),
Wrap(field="summary", inside="list", to_field="summaries"),
Expand Down
2 changes: 1 addition & 1 deletion prepare/cards/numeric_nlg.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from unitxt.test_utils.card import test_card

card = TaskCard(
loader=LoadHF(path="kasnerz/numericnlg"),
loader=LoadHF(path="kasnerz/numericnlg", splits=["train", "validation", "test"]),
preprocess_steps=[
Set(
fields={
Expand Down
2 changes: 2 additions & 0 deletions prepare/cards/ragbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
loader=LoadHF(
path="rungalileo/ragbench",
name=subset,
splits=["train", "test", "validation"],
),
preprocess_steps=[
Copy(field="documents", to_field="contexts"),
Expand All @@ -51,6 +52,7 @@
loader=LoadHF(
path="rungalileo/ragbench",
name=subset,
splits=["train", "test", "validation"],
),
preprocess_steps=[
Set({"context_type": "documents"}),
Expand Down
2 changes: 1 addition & 1 deletion prepare/cards/trec.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
classes = [expand_label_text[label] for label in classlabels.names]

card = TaskCard(
loader=LoadHF(path=dataset_name),
loader=LoadHF(path=dataset_name, splits=["train", "test"]),
preprocess_steps=[
Shuffle(page_size=sys.maxsize),
SplitRandomMix(
Expand Down
8 changes: 7 additions & 1 deletion src/unitxt/catalog/cards/mlsum/de.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
"loader": {
"__type__": "load_hf",
"path": "mlsum",
"name": "de"
"name": "de",
"splits": [
"train",
"validation",
"test"
],
"streaming": true
},
"preprocess_steps": [
{
Expand Down
8 changes: 7 additions & 1 deletion src/unitxt/catalog/cards/mlsum/es.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
"loader": {
"__type__": "load_hf",
"path": "mlsum",
"name": "es"
"name": "es",
"splits": [
"train",
"validation",
"test"
],
"streaming": true
},
"preprocess_steps": [
{
Expand Down
8 changes: 7 additions & 1 deletion src/unitxt/catalog/cards/mlsum/fr.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
"loader": {
"__type__": "load_hf",
"path": "mlsum",
"name": "fr"
"name": "fr",
"splits": [
"train",
"validation",
"test"
],
"streaming": true
},
"preprocess_steps": [
{
Expand Down
8 changes: 7 additions & 1 deletion src/unitxt/catalog/cards/mlsum/ru.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
"loader": {
"__type__": "load_hf",
"path": "mlsum",
"name": "ru"
"name": "ru",
"splits": [
"train",
"validation",
"test"
],
"streaming": true
},
"preprocess_steps": [
{
Expand Down
8 changes: 7 additions & 1 deletion src/unitxt/catalog/cards/mlsum/tu.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
"loader": {
"__type__": "load_hf",
"path": "mlsum",
"name": "tu"
"name": "tu",
"splits": [
"train",
"validation",
"test"
],
"streaming": true
},
"preprocess_steps": [
{
Expand Down
7 changes: 6 additions & 1 deletion src/unitxt/catalog/cards/numeric_nlg.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "kasnerz/numericnlg"
"path": "kasnerz/numericnlg",
"splits": [
"train",
"validation",
"test"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "covidqa"
"name": "covidqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "cuad"
"name": "cuad",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "delucionqa"
"name": "delucionqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "emanual"
"name": "emanual",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "expertqa"
"name": "expertqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "finqa"
"name": "finqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "hagrid"
"name": "hagrid",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "hotpotqa"
"name": "hotpotqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "msmarco"
"name": "msmarco",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "pubmedqa"
"name": "pubmedqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "tatqa"
"name": "tatqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "techqa"
"name": "techqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
7 changes: 6 additions & 1 deletion src/unitxt/catalog/cards/ragbench/covidqa.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "covidqa"
"name": "covidqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
7 changes: 6 additions & 1 deletion src/unitxt/catalog/cards/ragbench/cuad.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "cuad"
"name": "cuad",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
7 changes: 6 additions & 1 deletion src/unitxt/catalog/cards/ragbench/delucionqa.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "delucionqa"
"name": "delucionqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
7 changes: 6 additions & 1 deletion src/unitxt/catalog/cards/ragbench/emanual.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "emanual"
"name": "emanual",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
7 changes: 6 additions & 1 deletion src/unitxt/catalog/cards/ragbench/expertqa.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "expertqa"
"name": "expertqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
7 changes: 6 additions & 1 deletion src/unitxt/catalog/cards/ragbench/finqa.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
"loader": {
"__type__": "load_hf",
"path": "rungalileo/ragbench",
"name": "finqa"
"name": "finqa",
"splits": [
"train",
"test",
"validation"
]
},
"preprocess_steps": [
{
Expand Down
Loading

0 comments on commit aa4eb0d

Please sign in to comment.