Skip to content

Commit

Permalink
Merge pull request #13 from FAIRChemistry/union-types
Browse files Browse the repository at this point in the history
Union type Support
  • Loading branch information
JR-1991 authored Dec 8, 2024
2 parents 79f82e1 + ab697c7 commit a26b966
Show file tree
Hide file tree
Showing 7 changed files with 243 additions and 24 deletions.
14 changes: 14 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
repos:
- repo: local
hooks:
- id: rustfmt
name: rustfmt
entry: cargo fmt -- --check
language: system
types: [rust]

- id: clippy
name: clippy
entry: cargo clippy --all-targets --all-features -- -D warnings
language: system
types: [rust]
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,14 @@ This project uses GitHub Actions for continuous integration. The tests can be ru
cargo test
cargo clippy
```

### Using pre-commit hooks

This project uses [pre-commit](https://pre-commit.com/) to run the `rustfmt` and `clippy` commands on every commit. To install the pre-commit hooks, you can use the following command:

```bash
pip install pre-commit
pre-commit install
```

Once the pre-commit hooks are installed, they will run on every commit. This will ensure that the code is formatted and linted correctly. And the clippy CI will not complain about warnings.
160 changes: 137 additions & 23 deletions src/attribute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use std::{error::Error, fmt, str::FromStr};
use pyo3::{pyclass, pymethods};

/// Represents an attribute with various properties and options.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default)]
#[cfg_attr(feature = "python", pyclass(get_all))]
pub struct Attribute {
/// The name of the attribute.
Expand Down Expand Up @@ -94,14 +94,16 @@ impl Attribute {
///
/// * `option` - The option to add.
pub fn add_option(&mut self, option: AttrOption) -> Result<(), Box<dyn Error>> {
match option.key.to_lowercase().as_str() {
"type" => self.set_dtype(option.value),
"term" => self.term = Some(option.value),
"description" => self.docstring = option.value,
"xml" => self.set_xml(XMLType::from_str(&option.value).expect("Invalid XML type")),
"default" => self.default = Some(DataType::from_str(&option.value)?),
"multiple" => self.is_array = option.value.to_lowercase() == "true",
_ => self.options.push(option),
match OptionKey::from_str(option.key.as_str()) {
OptionKey::Type => self.set_dtype(option.value)?,
OptionKey::Term => self.term = Some(option.value),
OptionKey::Description => self.docstring = option.value,
OptionKey::Default => self.default = Some(DataType::from_str(&option.value)?),
OptionKey::Multiple => self.is_array = option.value.to_lowercase() == "true",
OptionKey::Other => self.options.push(option),
OptionKey::Xml => {
self.set_xml(XMLType::from_str(&option.value).expect("Invalid XML type"))
}
}

Ok(())
Expand All @@ -112,22 +114,91 @@ impl Attribute {
/// # Arguments
///
/// * `dtype` - The data type to set.
fn set_dtype(&mut self, dtype: String) {
let mut dtype = dtype;
// Handle special case for identifiers
if dtype.to_lowercase().starts_with("identifier") {
self.is_id = true;
// Regex replace identifier or Identifier with string
let pattern = regex::Regex::new(r"[I|i]dentifier").unwrap();
dtype = pattern.replace_all(&dtype, "string").to_string();
fn set_dtype(&mut self, dtype: String) -> Result<(), Box<dyn Error>> {
let mut dtypes = self.break_up_dtypes(&dtype);

self.validate_dtypes(&dtypes)?;

for dtype in dtypes.iter_mut() {
*dtype = dtype.trim().to_string();
if self.is_identifier(dtype) {
*dtype = self.process_identifier(dtype);
}

if dtype.ends_with("[]") {
self.is_array = true;
}

self.dtypes.push(dtype.trim_end_matches("[]").to_string());
}

// Handle special case for arrays
if dtype.ends_with("[]") {
self.is_array = true;
Ok(())
}

/// Splits a data type string into a vector of strings based on commas.
///
/// # Arguments
///
/// * `dtype` - A string representing the data types, separated by commas.
///
/// # Returns
///
/// A vector of strings, each representing a separate data type.
fn break_up_dtypes(&self, dtype: &str) -> Vec<String> {
dtype.split(",").map(|s| s.to_string()).collect()
}

/// Validates a vector of data type strings to ensure consistency in array notation.
///
/// # Arguments
///
/// * `dtypes` - A reference to a vector of strings representing data types.
///
/// # Returns
///
/// A `Result` indicating success or an error if the validation fails.
fn validate_dtypes(&self, dtypes: &[String]) -> Result<(), Box<dyn Error>> {
let has_multiple_dtypes = dtypes.len() > 1;
let contains_array_dtype = dtypes.iter().any(|dtype| dtype.ends_with("[]"));

if has_multiple_dtypes && contains_array_dtype {
return Err(
"If more than one dtype is provided, none can be array valued by []. \
Use the keyword 'Multiple' instead."
.into(),
);
}

self.dtypes.push(dtype.trim_end_matches("[]").to_string());
Ok(())
}

/// Checks if a data type string represents an identifier.
///
/// # Arguments
///
/// * `dtype` - A string representing a data type.
///
/// # Returns
///
/// `true` if the data type is an identifier, `false` otherwise.
fn is_identifier(&self, dtype: &str) -> bool {
dtype.to_lowercase().starts_with("identifier")
}

/// Processes a data type string to replace 'identifier' with 'string'.
///
/// # Arguments
///
/// * `dtype` - A string representing a data type.
///
/// # Returns
///
/// A new string with 'identifier' replaced by 'string'.
fn process_identifier(&mut self, dtype: &str) -> String {
self.is_id = true;
// Regex replace identifier or Identifier with string
let pattern = regex::Regex::new(r"[I|i]dentifier").unwrap();
pattern.replace_all(dtype, "string").to_string()
}

/// Converts the attribute to a JSON schema.
Expand Down Expand Up @@ -349,6 +420,47 @@ impl<'de> Deserialize<'de> for DataType {
}
}

/// Represents the different keys that can be used for attribute options.
enum OptionKey {
/// Represents the data type of the attribute.
Type,
/// Represents the term associated with the attribute.
Term,
/// Represents the description of the attribute.
Description,
/// Represents the XML type information for the attribute.
Xml,
/// Represents the default value for the attribute.
Default,
/// Indicates if the attribute can have multiple values.
Multiple,
/// Represents any other option not covered by the predefined keys.
Other,
}

impl OptionKey {
/// Converts a string to an `OptionKey`.
///
/// # Arguments
///
/// * `key` - The string representation of the key.
///
/// # Returns
///
/// An `OptionKey` corresponding to the given string.
fn from_str(key: &str) -> Self {
match key.to_lowercase().as_str() {
"type" => OptionKey::Type,
"term" => OptionKey::Term,
"description" => OptionKey::Description,
"xml" => OptionKey::Xml,
"default" => OptionKey::Default,
"multiple" => OptionKey::Multiple,
_ => OptionKey::Other,
}
}
}

#[cfg(test)]
mod tests {
use crate::xmltype::XMLType;
Expand Down Expand Up @@ -410,7 +522,8 @@ mod tests {
#[test]
fn test_attribute_set_dtype() {
let mut attr = Attribute::new("name".to_string(), false);
attr.set_dtype("string".to_string());
attr.set_dtype("string".to_string())
.expect("Failed to set dtype");
assert_eq!(attr.dtypes.len(), 1);
assert_eq!(attr.dtypes[0], "string");
assert_eq!(attr.is_array, false);
Expand All @@ -419,7 +532,8 @@ mod tests {
#[test]
fn test_attribute_set_array_dtype() {
let mut attr = Attribute::new("name".to_string(), false);
attr.set_dtype("string[]".to_string());
attr.set_dtype("string[]".to_string())
.expect("Failed to set dtype");
assert_eq!(attr.dtypes.len(), 1);
assert_eq!(attr.dtypes[0], "string");
assert_eq!(attr.is_array, true);
Expand Down
29 changes: 29 additions & 0 deletions tests/data/model_multiple_types.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
id-field: true
repo: "https://www.github.com/my/repo/"
prefix: "tst"
prefixes:
schema: http://schema.org/
nsmap:
tst: http://example.com/test/
---

### Test

- primitive
- Type: string, integer, float, boolean
- complex
- Type: Other, Another
- array
- Type: string, integer, float, boolean
- Multiple: True

### Another

- value
- Type: string

### Other

- value
- Type: string
26 changes: 26 additions & 0 deletions tests/data/model_multiple_types_invalid.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
id-field: true
repo: "https://www.github.com/my/repo/"
prefix: "tst"
prefixes:
schema: http://schema.org/
nsmap:
tst: http://example.com/test/
---

### Test

- primitive
- Type: string[], integer, float, boolean
- complex
- Type: Other, Another[]

### Another

- value
- Type: string

### Other

- value
- Type: string
25 changes: 25 additions & 0 deletions tests/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,4 +348,29 @@ mod tests {
// Act
DataModel::from_markdown(path).expect("Could not parse markdown");
}

#[test]
#[should_panic]
fn test_multiple_types_invalid() {
let path = Path::new("tests/data/model_multiple_types_invalid.md");
DataModel::from_markdown(path).expect("Could not parse markdown");
}

#[test]
fn test_multiple_types() {
let path = Path::new("tests/data/model_multiple_types.md");
let model = DataModel::from_markdown(path).expect("Could not parse markdown");

for object in model.objects {
if object.name == "Test" {
for attribute in object.attributes {
assert!(attribute.dtypes.len() > 1);

if attribute.name == "array" {
assert!(attribute.is_array);
}
}
}
}
}
}

0 comments on commit a26b966

Please sign in to comment.