From afe57bbfdae7adea8cecf805813efa178157686b Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 00:32:36 +0100 Subject: [PATCH 01/17] update dependencies --- Cargo.lock | 427 ++++++++++++++++++++++++++++++++++++++++++++++------- Cargo.toml | 4 + 2 files changed, 381 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 38ca3db..036a9b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,6 +150,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bstr" version = "1.9.1" @@ -167,11 +176,17 @@ version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" -version = "1.6.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" [[package]] name = "cc" @@ -279,6 +294,25 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "cpufeatures" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "darling" version = "0.20.8" @@ -314,6 +348,12 @@ dependencies = [ "syn", ] +[[package]] +name = "data-encoding" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" + [[package]] name = "deranged" version = "0.3.11" @@ -336,12 +376,28 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "doc-comment" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + [[package]] name = "encoding_rs" version = "0.8.34" @@ -428,36 +484,48 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-io" -version = "0.3.30" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "futures-sink" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-util" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -466,6 +534,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getopts" version = "0.2.21" @@ -475,6 +553,17 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "gimli" version = "0.28.1" @@ -742,10 +831,11 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -757,9 +847,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.153" +version = "0.2.168" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" [[package]] name = "linked-hash-map" @@ -773,6 +863,16 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.21" @@ -792,6 +892,7 @@ dependencies = [ "log", "minijinja", "minijinja-embed", + "openai-api-rs", "pretty_assertions", "pretty_env_logger", "pulldown-cmark", @@ -803,6 +904,7 @@ dependencies = [ "serde_json", "serde_with", "textwrap", + "tokio", "toml 0.8.14", "wasm-bindgen", ] @@ -828,6 +930,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "minijinja" version = "2.0.1" @@ -854,13 +966,13 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.11" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", "wasi", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -896,16 +1008,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" version = "0.32.2" @@ -921,6 +1023,21 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "openai-api-rs" +version = "5.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0061068e3cd1d5a538a0c61484fb1e0722f5113d107e7e1c652b62a6fcba148" +dependencies = [ + "bytes", + "futures-util", + "reqwest", + "serde", + "serde_json", + "tokio", + "tokio-tungstenite", +] + [[package]] name = "openssl" version = "0.10.64" @@ -955,9 +1072,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-src" -version = "300.3.1+3.3.1" +version = "300.4.1+3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7259953d42a81bf137fbbd73bd30a8e1914d6dce43c2b90ed575783a22608b91" +checksum = "faa4eac4138c62414b5622d1b31c5c304f34b406b013c079c2bbc652fdd6678c" dependencies = [ "cc", ] @@ -975,6 +1092,29 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.5", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -1031,6 +1171,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + [[package]] name = "predicates" version = "3.1.0" @@ -1171,6 +1320,45 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags 2.5.0", +] + [[package]] name = "regex" version = "1.10.4" @@ -1223,6 +1411,7 @@ dependencies = [ "js-sys", "log", "mime", + "mime_guess", "native-tls", "once_cell", "percent-encoding", @@ -1235,6 +1424,7 @@ dependencies = [ "system-configuration", "tokio", "tokio-native-tls", + "tokio-socks", "tower-service", "url", "wasm-bindgen", @@ -1293,6 +1483,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "security-framework" version = "2.11.0" @@ -1410,6 +1606,26 @@ dependencies = [ "syn", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + [[package]] name = "slab" version = "0.4.9" @@ -1535,6 +1751,26 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thiserror" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "time" version = "0.3.36" @@ -1583,18 +1819,31 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.37.0" +version = "1.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" dependencies = [ "backtrace", "bytes", "libc", "mio", - "num_cpus", + "parking_lot", "pin-project-lite", + "signal-hook-registry", "socket2", - "windows-sys 0.48.0", + "tokio-macros", + "windows-sys 0.52.0", +] + +[[package]] +name = "tokio-macros" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -1607,6 +1856,32 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-socks" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d4770b8024672c1101b3f6733eab95b18007dbe0847a8afe341fcf79e06043f" +dependencies = [ + "either", + "futures-util", + "thiserror", + "tokio", +] + +[[package]] +name = "tokio-tungstenite" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9" +dependencies = [ + "futures-util", + "log", + "native-tls", + "tokio", + "tokio-native-tls", + "tungstenite", +] + [[package]] name = "tokio-util" version = "0.7.11" @@ -1715,6 +1990,31 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "tungstenite" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a" +dependencies = [ + "byteorder", + "bytes", + "data-encoding", + "http", + "httparse", + "log", + "native-tls", + "rand", + "sha1", + "thiserror", + "utf-8", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + [[package]] name = "unicase" version = "2.7.0" @@ -1780,6 +2080,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8parse" version = "0.2.1" @@ -1824,9 +2130,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" dependencies = [ "cfg-if", "once_cell", @@ -1835,13 +2141,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn", @@ -1850,21 +2155,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.42" +version = "0.4.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1872,9 +2178,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", @@ -1885,15 +2191,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" dependencies = [ "js-sys", "wasm-bindgen", @@ -2089,3 +2395,24 @@ name = "yansi" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index b5c75d0..f8b3825 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,10 +31,13 @@ textwrap = "0.16.1" pyo3 = { version = "0.23.2", features = ["extension-module"], optional = true } wasm-bindgen = { version = "0.2.95", optional = true } serde-wasm-bindgen = { version = "0.6.5", optional = true } +tokio = { version = "1.42.0", features = ["rt"] } +openai-api-rs = { version = "5.2.3", optional = true } [features] python = ["pyo3"] wasm = ["wasm-bindgen", "serde-wasm-bindgen"] +openai = ["openai-api-rs"] [build-dependencies] minijinja-embed = "2.0.1" @@ -42,6 +45,7 @@ minijinja-embed = "2.0.1" [[bin]] name = "md-models" path = "src/bin/cli.rs" +required-features = ["openai"] [dev-dependencies] assert_cmd = "2.0.14" From f66b087607626e6afa91d78868a0602cb39afcc1 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 00:38:11 +0100 Subject: [PATCH 02/17] remove old json schema export --- src/json/parser.rs | 256 -------------------------------- src/primitives.rs | 144 ------------------ src/schema.rs | 353 --------------------------------------------- 3 files changed, 753 deletions(-) delete mode 100644 src/json/parser.rs delete mode 100644 src/primitives.rs delete mode 100644 src/schema.rs diff --git a/src/json/parser.rs b/src/json/parser.rs deleted file mode 100644 index 9836d29..0000000 --- a/src/json/parser.rs +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (c) 2024 Jan Range - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - */ - -use crate::{ - attribute::{AttrOption, Attribute}, - datamodel::DataModel, - markdown::frontmatter::FrontMatter, - object::{Enumeration, Object}, -}; -use convert_case::{Case, Casing}; -use reqwest::Url; -use std::{error::Error, path::Path}; - -use super::datatype::DataType; - -static PROP_KEYS: [&str; 10] = [ - "type", "format", "enum", "minimum", "maximum", "minItems", "maxItems", "title", "items", - "$ref", -]; - -/// Parse a JSON schema into an MD-Models data model -pub fn parse_json_schema(path: &Path) -> Result> { - let schema = read_json_schema(path).expect( - "Could not read the JSON schema file. Make sure the file is a valid JSON schema file.", - ); - - // Create a new data model - let name = schema - .get("title") - .expect("Could not find title in the JSON schema") - .as_str() - .expect("Title is not a string") - .to_string(); - let mut model = DataModel::new(Some(name), None); - model.config = Some(FrontMatter::default()); - - // Create the root object - let object = create_object(&schema); - model.objects.push(object); - - // Create the rest of the objects and enums - let definitions = schema.get("definitions").unwrap(); - for (key, value) in definitions.as_object().unwrap() { - let data_type = DataType::from_object(value); - - match data_type { - DataType::Object { properties: _ } => { - let object = create_object(value); - model.objects.push(object); - } - DataType::Enum { values } => { - let enumeration = create_enum(key, &values); - model.enums.push(enumeration); - } - _ => {} - } - } - - Ok(model) -} - -/// Read JSON schema from a file -fn read_json_schema(path: &Path) -> Result { - let content = std::fs::read_to_string(path).expect("Could not read the JSON schema file"); - serde_json::from_str(&content) -} - -fn create_enum(name: &str, values: &[String]) -> Enumeration { - // Create a generic mapping for the enum - let mappings = values - .iter() - .map(|v| (create_enum_alias(v), v.to_string())) - .collect(); - - Enumeration { - name: name.to_string(), - mappings, - docstring: "".to_string(), - } -} - -fn create_enum_alias(name: &str) -> String { - // If it is a URL, get the last part of the URL and part before the .org/.com - let name = if let Ok(url) = Url::parse(name) { - url_to_enum_alias(url) - } else { - remove_special_characters(name) - }; - - name.to_case(Case::Snake).to_uppercase() -} - -fn remove_special_characters(input: &str) -> String { - input.chars().filter(|c| c.is_alphanumeric()).collect() -} - -fn url_to_enum_alias(url: Url) -> String { - // Get the host and path - let host = url.host_str().unwrap_or(""); - let path = url.path(); - - // Remove the 'www.' prefix from the host if present - let host = host.strip_prefix("www.").unwrap_or(host); - - // Replace dot and slash with underscore - let mut result = host.replace('.', "_"); - result.push('_'); - result.push_str(&path.replace('/', "_")); - - // Trim trailing underscore - result.trim_end_matches('_').to_string() -} - -/// Extract properties from a JSON schema -fn create_object(schema: &serde_json::Value) -> Object { - let name = schema - .get("title") - .expect("Could not find title in the JSON schema") - .as_str() - .expect("Title is not a string"); - let properties = schema - .get("properties") - .expect("Could not find properties in the JSON schema") - .as_object() - .expect("Properties is not an object"); - - let mut object = Object::new(name.to_string(), None); - - for (key, value) in properties { - let data_type = DataType::from_object(value); - - let mut attribute = match data_type { - DataType::Object { properties } => process_object(key, &properties), - DataType::Array => process_array(key, value), - DataType::Enum { values: _ } => process_enum(key), - DataType::Reference { reference } => process_reference(key, reference), - _ => process_primitive(key, value), - }; - - // Add all other keys as options - for (key, value) in value.as_object().unwrap() { - if !PROP_KEYS.contains(&key.as_str()) { - attribute - .add_option(AttrOption::new( - key.to_string(), - value.as_str().unwrap().to_string(), - )) - .expect("Failed to add option"); - } - } - - object.attributes.push(attribute); - } - - object -} - -fn process_array(name: &str, value: &serde_json::Value) -> Attribute { - // Prepare attribute - let mut attribute = Attribute::new(name.to_string(), false); - attribute.is_array = true; - - // Get the items - let items = value - .get("items") - .expect("Could not find items in the array"); - - // Check whether the items is a ref or any other type - let data_type = DataType::from_object(items); - - // Set the data type - attribute.dtypes = match data_type { - DataType::Reference { reference } => vec![reference], - _ => vec![data_type.to_string()], - }; - - attribute -} - -fn process_primitive(name: &str, value: &serde_json::Value) -> Attribute { - let mut attribute = Attribute::new(name.to_string(), false); - let data_type = value - .get("type") - .expect("Could not find type in the property") - .as_str() - .expect("Type is not a string") - .to_string(); - - attribute.dtypes = vec![data_type]; - - attribute -} - -fn process_reference(name: &str, reference: String) -> Attribute { - let mut attribute = Attribute::new(name.to_string(), false); - attribute.dtypes = vec![reference]; - attribute -} - -fn process_object(_name: &str, _value: &serde_json::Value) -> Attribute { - panic!("Nested object type is not supported yet"); -} - -fn process_enum(_name: &str) -> Attribute { - panic!("Property enums are currently only allowed as reference"); -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_json_schema() { - // Arrange - let path = Path::new("tests/data/expected_json_schema.json"); - - // Act - let model = parse_json_schema(path).unwrap(); - - // Assert - assert_eq!(model.objects.len(), 2); - assert_eq!(model.enums.len(), 1); - - let object = &model.objects[0]; - assert_eq!(object.name, "Test"); - assert_eq!(object.attributes.len(), 4); - - let object = &model.objects[1]; - assert_eq!(object.name, "Test2"); - assert_eq!(object.attributes.len(), 2); - - let enumeration = &model.enums[0]; - assert_eq!(enumeration.name, "Ontology"); - assert_eq!(enumeration.mappings.len(), 3); - } -} diff --git a/src/primitives.rs b/src/primitives.rs deleted file mode 100644 index e5110a5..0000000 --- a/src/primitives.rs +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2024 Jan Range - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - */ - -use std::collections::HashMap; - -/// A struct to manage primitive types and their corresponding JSON mappings. -pub struct PrimitiveTypes { - types: Vec, - json_mappings: HashMap, -} - -impl Default for PrimitiveTypes { - fn default() -> Self { - PrimitiveTypes::new() - } -} - -impl PrimitiveTypes { - /// Creates a new instance of `PrimitiveTypes` with predefined primitive types - /// and their corresponding JSON mappings. - pub fn new() -> Self { - let mut json_mappings = HashMap::new(); - - json_mappings.insert("string".to_string(), "string".to_string()); - json_mappings.insert("float".to_string(), "number".to_string()); - json_mappings.insert("integer".to_string(), "integer".to_string()); - json_mappings.insert("boolean".to_string(), "boolean".to_string()); - json_mappings.insert("bool".to_string(), "boolean".to_string()); - json_mappings.insert("null".to_string(), "null".to_string()); - json_mappings.insert("date".to_string(), "string".to_string()); - json_mappings.insert("number".to_string(), "number".to_string()); - json_mappings.insert("identifier".to_string(), "string".to_string()); - json_mappings.insert("bytes".to_string(), "string".to_string()); - - PrimitiveTypes { - types: vec![ - "string".to_string(), - "float".to_string(), - "integer".to_string(), - "boolean".to_string(), - "bool".to_string(), - "null".to_string(), - "number".to_string(), - "date".to_string(), - "identifier".to_string(), - ], - json_mappings, - } - } - - /// Filters and returns the list of non-primitive types from the given list of data types. - /// - /// # Arguments - /// - /// * `dtypes` - A reference to a vector of data types to be filtered. - /// - /// # Returns - /// - /// A vector containing only the non-primitive types from the input vector. - pub fn filter_non_primitives(&self, dtypes: &Vec) -> Vec { - let mut non_primitive_types: Vec = Vec::new(); - for dtype in dtypes { - if !self.is_primitive(dtype) { - non_primitive_types.push(dtype.to_string()); - } - } - - non_primitive_types - } - - /// Filters and returns the list of primitive types from the given list of data types. - /// - /// # Arguments - /// - /// * `dtypes` - A reference to a vector of data types to be filtered. - /// - /// # Returns - /// - /// A vector containing only the primitive types from the input vector. - pub fn filter_primitive(&self, dtypes: &Vec) -> Vec { - let mut primitive_types: Vec = Vec::new(); - for dtype in dtypes { - if self.is_primitive(dtype) { - primitive_types.push(dtype.to_string()); - } - } - - primitive_types - } - - /// Checks if the given data type is a primitive type. - /// - /// # Arguments - /// - /// * `dtype` - A string slice representing the data type to be checked. - /// - /// # Returns - /// - /// A boolean value indicating whether the data type is a primitive type. - fn is_primitive(&self, dtype: &str) -> bool { - self.types.contains(&dtype.to_string()) - } - - /// Converts a data type to its corresponding JSON representation. - /// - /// # Arguments - /// - /// * `dtype` - A reference to a string representing the data type to be converted. - /// - /// # Returns - /// - /// A string representing the JSON mapping of the data type. - /// - /// # Panics - /// - /// Panics if the data type is not a primitive type. - pub fn dtype_to_json(&self, dtype: &String) -> String { - if !self.json_mappings.contains_key(dtype) { - panic!("The data type {} is not a primitive type", dtype) - } else { - self.json_mappings[dtype].to_string() - } - } -} diff --git a/src/schema.rs b/src/schema.rs deleted file mode 100644 index 2b8eac4..0000000 --- a/src/schema.rs +++ /dev/null @@ -1,353 +0,0 @@ -/* - * Copyright (c) 2024 Jan Range - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - */ - -use crate::attribute; -use crate::attribute::AttrOption; -use crate::datamodel::DataModel; -use crate::object::{self, Enumeration}; -use crate::primitives::PrimitiveTypes; -use serde::{Deserialize, Serialize}; -use serde_json::json; -use std::collections::HashSet; -use std::fmt::Display; -use std::hash::Hash; - -static DEFINITIONS_KEY: &str = "definitions"; -static SCHEMA_VERSION: &str = "http://json-schema.org/draft-07/schema"; - -#[derive(PartialEq, Eq, Debug)] -enum RefType { - Object(String), - Enum(String), -} - -impl Hash for RefType { - fn hash(&self, state: &mut H) { - match self { - RefType::Object(name) => name.hash(state), - RefType::Enum(name) => name.hash(state), - } - } -} - -impl Display for RefType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - RefType::Object(name) => write!(f, "{}", name), - RefType::Enum(name) => write!(f, "{}", name), - } - } -} - -#[derive(Serialize, Deserialize)] -struct JSONSchema { - #[serde(rename = "$schema")] - schema: String, - #[serde(flatten)] - definitions: serde_json::Value, -} - -/// Converts a data model to a JSON schema. -/// -/// # Arguments -/// * `name` - The name of the object to convert. -/// * `model` - The data model containing the objects and enums. -/// -/// # Returns -/// A JSON string representing the schema. -pub fn to_json_schema(name: &String, model: &DataModel) -> String { - let objects = &model.objects; - let obj = objects.iter().find(|o| o.name == *name).unwrap(); - let (mut schema, used_refs) = process_class(obj, model); - - // Get unique used_refs - let used_refs = used_refs.into_iter().collect::>(); - - for reference in used_refs { - match reference { - RefType::Object(name) => { - let sub_obj = objects.iter().find(|o| o.name == name).unwrap(); - let (properties, _) = process_class(sub_obj, model); - schema[DEFINITIONS_KEY][name] = properties; - } - RefType::Enum(name) => { - let sub_enum = model.enums.iter().find(|e| e.name == name).unwrap(); - let properties = process_enum(sub_enum); - schema[DEFINITIONS_KEY][name] = properties; - } - } - } - - let schema = JSONSchema { - schema: SCHEMA_VERSION.to_string(), - definitions: schema, - }; - - serde_json::to_string_pretty(&schema).unwrap() -} - -/// Processes a class object to generate its JSON schema and collect references. -/// -/// # Arguments -/// * `object` - The object to process. -/// * `model` - The data model containing the objects and enums. -/// -/// # Returns -/// A tuple containing the JSON schema and a set of references. -fn process_class( - object: &object::Object, - model: &DataModel, -) -> (serde_json::Value, HashSet) { - // Retrieve all object and enum names - let object_names = model - .objects - .iter() - .map(|o| o.name.clone()) - .collect::>(); - let enum_names = model - .enums - .iter() - .map(|e| e.name.clone()) - .collect::>(); - - // Initialize the schema and references - let mut all_refs = HashSet::new(); - let mut schema = json!({ - "title": object.name, - "type": "object", - "properties": {}, - }); - - if !object.docstring.is_empty() { - schema["description"] = json!(object.docstring); - } - - if object.term.is_some() { - schema["term"] = json!(object.term.as_ref().unwrap()); - } - - for attribute in &object.attributes { - let (primitives, references) = extract_primitives_and_refs(&attribute.dtypes); - - for primitive in primitives { - process_primitive(&mut schema["properties"], attribute, &primitive); - } - - for reference in references { - if enum_names.contains(&reference) { - all_refs.insert(RefType::Enum(reference.clone())); - process_enum_reference( - &attribute.name, - &mut schema["properties"], - reference.as_str(), - ); - } else if object_names.contains(&reference) { - all_refs.insert(RefType::Object(reference.clone())); - process_reference(&mut schema["properties"], attribute, &reference); - } else { - panic!("Reference {} not found in the markdown file", reference); - } - } - } - - (schema, all_refs) -} - -fn process_enum(enumeration: &Enumeration) -> serde_json::Value { - let values = enumeration - .mappings - .values() - .cloned() - .collect::>(); - - json!({ - "title": enumeration.name, - "type": "string", - "enum": values, - }) -} - -/// Extracts primitive types and references from a list of data types. -/// -/// # Arguments -/// * `dtypes` - The list of data types to process. -/// -/// # Returns -/// A tuple containing lists of primitive types and references. -fn extract_primitives_and_refs(dtypes: &Vec) -> (Vec, Vec) { - let primitives = PrimitiveTypes::new(); - let references = primitives.filter_non_primitives(dtypes); - let primitives = primitives.filter_primitive(dtypes); - - (primitives, references) -} - -/// Creates a JSON property with a capitalized title. -/// -/// # Arguments -/// * `name` - The name of the property. -/// -/// # Returns -/// A JSON value representing the property. -fn create_property(name: &String) -> serde_json::Value { - json!({ - "title": name, - }) -} - -/// Processes a primitive attribute and adds it to the properties. -/// -/// # Arguments -/// * `properties` - The properties JSON object. -/// * `attribute` - The attribute to process. -/// * `primitive` - The primitive type of the attribute. -fn process_primitive( - properties: &mut serde_json::Value, - attribute: &attribute::Attribute, - primitive: &String, -) { - let name = &attribute.name; - properties[name] = create_property(name); - - if !attribute.docstring.is_empty() { - properties[name]["description"] = json!(attribute.docstring); - } - - if let Some(ref term) = attribute.term { - properties[name]["term"] = json!(term); - } - - set_primitive_dtype(properties, attribute, primitive); - set_options(&mut properties[name], &attribute.options); -} - -/// Sets the data type of a primitive attribute. -/// -/// # Arguments -/// * `properties` - The properties JSON object. -/// * `attribute` - The attribute to process. -/// * `primitive` - The primitive type of the attribute. -fn set_primitive_dtype( - properties: &mut serde_json::Value, - attribute: &attribute::Attribute, - primitive: &String, -) { - let is_array = attribute.is_array; - let name = &attribute.name; - let primitives = PrimitiveTypes::new(); - let json_dtype = primitives.dtype_to_json(primitive); - - if is_array { - properties[name]["type"] = json!("array"); - properties[name]["items"] = json!({ - "type": json_dtype - }); - - return; - } - - properties[name]["type"] = json!(json_dtype); -} - -/// Sets additional options for a JSON property. -/// -/// # Arguments -/// * `property` - The property JSON object. -/// * `options` - The list of attribute options. -fn set_options(property: &mut serde_json::Value, options: &Vec) { - for option in options { - match is_numeric(&option.value) { - true => { - property[option.key()] = json!(option.value().parse::().unwrap()); - } - false => { - property[option.key()] = json!(option.value()); - } - } - } -} - -/// Checks if a value is numeric or a string. -/// -/// # Arguments -/// * `value` - The value to check. -fn is_numeric(value: &str) -> bool { - value.parse::().is_ok() -} - -/// Processes a reference attribute and adds it to the properties. -/// -/// # Arguments -/// * `properties` - The properties JSON object. -/// * `attribute` - The attribute to process. -/// * `reference` - The reference type of the attribute. -fn process_reference( - properties: &mut serde_json::Value, - attribute: &attribute::Attribute, - reference: &String, -) { - let name = &attribute.name; - if let Some(ref term) = attribute.term { - properties[name]["term"] = json!(term); - } - - set_ref_dtype(properties, attribute, reference); - set_options(&mut properties[name], &attribute.options); -} - -/// Processes an enum reference attribute and adds it to the properties. -/// -/// # Arguments -/// * `name` - The name of the attribute. -/// * `properties` - The properties JSON object. -/// * `enumeration` - The enumeration object. -fn process_enum_reference(name: &String, properties: &mut serde_json::Value, reference: &str) { - properties[name] = create_property(name); - let def_path = format!("#/{}/{}", DEFINITIONS_KEY, reference); - properties[name]["$ref"] = json!(def_path); -} - -/// Sets the data type of a reference attribute. -/// -/// # Arguments -/// * `properties` - The properties JSON object. -/// * `attribute` - The attribute to process. -/// * `reference` - The reference type of the attribute. -fn set_ref_dtype( - properties: &mut serde_json::Value, - attribute: &attribute::Attribute, - reference: &String, -) { - let name = &attribute.name; - let def_path = format!("#/{}/{}", DEFINITIONS_KEY, reference); - if attribute.is_array { - properties[name]["type"] = json!("array"); - properties[name]["items"] = json!({ - "$ref": json!(def_path) - }); - - return; - } - - properties[name]["$ref"] = json!(def_path); -} From 3b7337efe7d91dbf88494d094bd510341f1cb5cd Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 00:40:33 +0100 Subject: [PATCH 03/17] remove clippy pre-commit --- .pre-commit-config.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dbb24b6..fcf23b5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,9 +6,3 @@ repos: entry: cargo fmt -- --check language: system types: [rust] - - - id: clippy - name: clippy - entry: cargo clippy --all-targets --all-features -- -D warnings - language: system - types: [rust] \ No newline at end of file From 3e1ecef8c73d3ffe5db6c4224317c4ac5cafc242 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 00:41:06 +0100 Subject: [PATCH 04/17] rework json schema export --- src/json/datatype.rs | 140 +------ src/json/export.rs | 534 +++++++++++++++++++++++++++ src/json/schema.rs | 269 ++++++++++++++ src/lib.rs | 13 +- tests/data/expected_json_schema.json | 76 ++-- tests/data/model_json_schema.md | 55 +++ tests/integration_tests.rs | 77 +--- 7 files changed, 940 insertions(+), 224 deletions(-) create mode 100644 src/json/export.rs create mode 100644 src/json/schema.rs create mode 100644 tests/data/model_json_schema.md diff --git a/src/json/datatype.rs b/src/json/datatype.rs index 20f808f..6e69de4 100644 --- a/src/json/datatype.rs +++ b/src/json/datatype.rs @@ -23,16 +23,13 @@ use std::{fmt::Display, str::FromStr}; -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub enum DataType { String, Integer, Number, Boolean, - Object { properties: serde_json::Value }, Array, - Enum { values: Vec }, - Reference { reference: String }, } impl Display for DataType { @@ -43,21 +40,6 @@ impl Display for DataType { DataType::Number => write!(f, "number"), DataType::Boolean => write!(f, "boolean"), DataType::Array => write!(f, "array"), - DataType::Reference { reference } => write!(f, "reference [{}]", reference), - DataType::Object { properties } => { - let properties = properties - .as_object() - .unwrap() - .keys() - .map(|k| k.to_string()) - .collect::>() - .join(", "); - write!(f, "object [{}]", properties) - } - DataType::Enum { values } => { - let values = values.join(", "); - write!(f, "enum [{}]", values) - } } } } @@ -76,44 +58,6 @@ impl FromStr for DataType { } } -impl DataType { - pub fn from_object(value: &serde_json::Value) -> Self { - if let Some(reference) = value.get("$ref") { - DataType::Reference { - reference: reference - .as_str() - .unwrap() - .split('/') - .last() - .unwrap() - .to_string(), - } - } else if let Some(values) = value.get("enum") { - let values = values - .as_array() - .unwrap() - .iter() - .map(|v| v.as_str().unwrap().to_string()) - .collect(); - return DataType::Enum { values }; - } else if let Some(data_type) = value.get("type") { - let data_type = data_type.as_str().unwrap(); - if data_type == "object" { - let properties = value.get("properties").unwrap(); - return DataType::Object { - properties: properties.clone(), - }; - } else { - return data_type.parse().expect( - "Could not parse the data type. Make sure the data type is a valid type.", - ); - } - } else { - panic!("Could not find a data type in the JSON schema"); - } - } -} - #[cfg(test)] mod tests { use super::*; @@ -124,62 +68,6 @@ mod tests { assert_eq!(data_type, DataType::String); } - #[test] - fn test_data_type_from_object() { - let data_type = DataType::from_object(&serde_json::json!({ - "type": "string" - })); - assert_eq!(data_type, DataType::String); - } - - #[test] - fn test_data_type_from_object_with_enum() { - let data_type = DataType::from_object(&serde_json::json!({ - "enum": ["one", "two"] - })); - assert_eq!( - data_type, - DataType::Enum { - values: vec!["one".to_string(), "two".to_string()] - } - ); - } - - #[test] - fn test_data_type_from_object_with_reference() { - let data_type = DataType::from_object(&serde_json::json!({ - "$ref": "#/definitions/Person" - })); - assert_eq!( - data_type, - DataType::Reference { - reference: "Person".to_string() - } - ); - } - - #[test] - fn test_data_type_from_object_with_object() { - let data_type = DataType::from_object(&serde_json::json!({ - "type": "object", - "properties": { - "name": { - "type": "string" - } - } - })); - assert_eq!( - data_type, - DataType::Object { - properties: serde_json::json!({ - "name": { - "type": "string" - } - }) - } - ); - } - #[test] fn test_data_type_display() { let data_type = DataType::String; @@ -196,31 +84,5 @@ mod tests { let data_type = DataType::Array; assert_eq!(data_type.to_string(), "array"); - - let data_type = DataType::Reference { - reference: "Person".to_string(), - }; - assert_eq!(data_type.to_string(), "reference [Person]"); - - let data_type = DataType::Object { - properties: serde_json::json!({ - "name": { - "type": "string" - } - }), - }; - assert_eq!(data_type.to_string(), "object [name]"); - - let data_type = DataType::Enum { - values: vec!["one".to_string(), "two".to_string()], - }; - assert_eq!(data_type.to_string(), "enum [one, two]"); - } - - #[test] - #[should_panic] - fn test_display_panic() { - let data_type = DataType::from_object(&serde_json::json!({})); - data_type.to_string(); } } diff --git a/src/json/export.rs b/src/json/export.rs new file mode 100644 index 0000000..e451af6 --- /dev/null +++ b/src/json/export.rs @@ -0,0 +1,534 @@ +/* + * Copyright (c) 2024 Jan Range + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +use std::{ + collections::{BTreeMap, HashMap, HashSet}, + str::FromStr, +}; + +use crate::{ + attribute::Attribute, + datamodel::DataModel, + object::{Enumeration, Object}, + validation::BASIC_TYPES, +}; + +use super::schema::{self, PrimitiveType}; + +const SCHEMA: &str = "https://json-schema.org/draft/2020-12/schema"; + +/// Converts a `DataModel` into a JSON schema representation. +/// +/// # Arguments +/// +/// * `model` - A reference to the `DataModel` to be converted. +/// * `root` - The root object name in the model. +/// +/// # Returns +/// +/// A `Result` containing the `SchemaObject` or an error message. +pub fn to_json_schema(model: &DataModel, root: &str) -> Result { + let root_object = retrieve_object(model, root)?; + + let mut schema_object = schema::SchemaObject::try_from(root_object)?; + let mut used_types = HashSet::new(); + let mut used_enums = HashSet::new(); + + collect_definitions(root_object, model, &mut used_types, &mut used_enums)?; + + let definitions = collect_definitions_from_model(model, &used_types, &used_enums)?; + + schema_object.schema = Some(SCHEMA.to_string()); + schema_object.definitions = definitions; + + if let Some(config) = model.config.clone() { + schema_object.id = Some(config.repo.clone()); + if let Some(prefixes) = config.prefixes { + resolve_prefixes(&mut schema_object, &prefixes); + } + } + + Ok(schema_object) +} + +/// Retrieves an object from the `DataModel` by name. +/// +/// # Arguments +/// +/// * `model` - A reference to the `DataModel`. +/// * `name` - The name of the object to retrieve. +/// +/// # Returns +/// +/// A `Result` containing a reference to the `Object` or an error message. +fn retrieve_object<'a>(model: &'a DataModel, name: &'a str) -> Result<&'a Object, String> { + model + .objects + .iter() + .find(|obj| obj.name == name) + .ok_or(format!("Object {} not found", name)) +} + +/// Retrieves an enumeration from the `DataModel` by name. +/// +/// # Arguments +/// +/// * `model` - A reference to the `DataModel`. +/// * `name` - The name of the enumeration to retrieve. +/// +/// # Returns +/// +/// A `Result` containing a reference to the `EnumObject` or an error message. +fn retrieve_enum<'a>(model: &'a DataModel, name: &'a str) -> Result<&'a Enumeration, String> { + model + .enums + .iter() + .find(|e| e.name == name) + .ok_or(format!("Enum {} not found", name)) +} + +/// Collects definitions from the `DataModel` based on used types and enums. +/// +/// # Arguments +/// +/// * `model` - A reference to the `DataModel`. +/// * `used_types` - A reference to a set of used type names. +/// * `used_enums` - A reference to a set of used enum names. +/// +/// # Returns +/// +/// A `Result` containing a `BTreeMap` of schema definitions or an error message. +fn collect_definitions_from_model( + model: &DataModel, + used_types: &HashSet, + used_enums: &HashSet, +) -> Result, String> { + let mut definitions = BTreeMap::new(); + + for obj_name in used_types { + let obj = retrieve_object(model, obj_name)?; + definitions.insert(obj_name.clone(), schema::SchemaType::try_from(obj)?); + } + + for enum_name in used_enums { + let enum_object = retrieve_enum(model, enum_name)?; + definitions.insert( + enum_name.clone(), + schema::SchemaType::try_from(enum_object)?, + ); + } + + Ok(definitions) +} + +/// Collects definitions from an object and updates the used types and enums sets. +/// +/// # Arguments +/// +/// * `object` - A reference to the `Object`. +/// * `model` - A reference to the `DataModel`. +/// * `used_types` - A mutable reference to a set of used type names. +/// * `used_enums` - A mutable reference to a set of used enum names. +/// +/// # Returns +/// +/// A `Result` indicating success or an error message. +fn collect_definitions( + object: &Object, + model: &DataModel, + used_types: &mut HashSet, + used_enums: &mut HashSet, +) -> Result<(), String> { + for attr in object.attributes.iter() { + for dtype in attr.dtypes.iter() { + if BASIC_TYPES.contains(&dtype.as_str()) || used_types.contains(dtype) { + continue; + } + + let object = model.objects.iter().find(|obj| obj.name == *dtype); + let enumeration = model.enums.iter().find(|e| e.name == *dtype); + + if let Some(object) = object { + used_types.insert(dtype.clone()); + collect_definitions(object, model, used_types, used_enums)?; + } else if let Some(enumeration) = enumeration { + used_enums.insert(enumeration.name.clone()); + } else { + return Err(format!("Object or enumeration {} not found", dtype)); + } + } + } + + Ok(()) +} + +fn resolve_prefixes(schema: &mut schema::SchemaObject, prefixes: &HashMap) { + for (_, property) in schema.properties.iter_mut() { + if let Some(reference) = property.term.clone() { + let (prefix, term) = reference.split_once(":").unwrap_or(("", "")); + if let Some(prefix) = prefixes.get(prefix) { + property.term = Some(format!("{}{}", prefix, term)); + } + } + } +} + +impl TryFrom<&Enumeration> for schema::SchemaType { + type Error = String; + + /// Attempts to convert an `Enumeration` into a `SchemaType`. + /// + /// # Arguments + /// + /// * `enumeration` - A reference to the `Enumeration`. + /// + /// # Returns + /// + /// A `Result` containing the `SchemaType` or an error message. + fn try_from(enumeration: &Enumeration) -> Result { + Ok(schema::SchemaType::Enum(schema::EnumObject::try_from( + enumeration, + )?)) + } +} + +impl TryFrom<&Object> for schema::SchemaType { + type Error = String; + + /// Attempts to convert an `Object` into a `SchemaType`. + /// + /// # Arguments + /// + /// * `obj` - A reference to the `Object`. + /// + /// # Returns + /// + /// A `Result` containing the `SchemaType` or an error message. + fn try_from(obj: &Object) -> Result { + Ok(schema::SchemaType::Object(schema::SchemaObject::try_from( + obj, + )?)) + } +} + +impl TryFrom<&Object> for schema::SchemaObject { + type Error = String; + + /// Attempts to convert an `Object` into a `SchemaObject`. + /// + /// # Arguments + /// + /// * `obj` - A reference to the `Object`. + /// + /// # Returns + /// + /// A `Result` containing the `SchemaObject` or an error message. + fn try_from(obj: &Object) -> Result { + let properties: Result, String> = obj + .attributes + .iter() + .map(|attr| -> Result<(String, schema::Property), String> { + Ok((attr.name.clone(), schema::Property::try_from(attr)?)) + }) + .collect(); + + let required: Vec = obj + .attributes + .iter() + .filter(|attr| attr.required) + .map(|attr| attr.name.clone()) + .collect(); + + Ok(schema::SchemaObject { + title: obj.name.clone(), + dtype: schema::DataType::Object, + description: Some(obj.docstring.clone()), + properties: properties?, + definitions: BTreeMap::new(), + required, + schema: None, + id: None, + additional_properties: false, + }) + } +} + +impl TryFrom<&Enumeration> for schema::EnumObject { + type Error = String; + + /// Attempts to convert an `Enumeration` into an `EnumObject`. + /// + /// # Arguments + /// + /// * `enumeration` - A reference to the `Enumeration`. + /// + /// # Returns + /// + /// A `Result` containing the `EnumObject` or an error message. + fn try_from(enumeration: &Enumeration) -> Result { + let values = enumeration + .mappings + .values() + .cloned() + .collect::>(); + + Ok(schema::EnumObject { + title: enumeration.name.clone(), + dtype: schema::DataType::String, + description: Some(enumeration.docstring.clone()), + enum_values: values, + }) + } +} + +impl TryFrom<&Attribute> for schema::Property { + type Error = String; + + /// Attempts to convert an `Attribute` into a `Property`. + /// + /// # Arguments + /// + /// * `attr` - A reference to the `Attribute`. + /// + /// # Returns + /// + /// A `Result` containing the `Property` or an error message. + fn try_from(attr: &Attribute) -> Result { + let mut dtype = (!attr.is_enum) + .then(|| schema::DataType::try_from(attr)) + .transpose()?; + + let options: HashMap = attr + .options + .iter() + .map(|o| (o.key.clone(), PrimitiveType::from(&o.value))) + .collect(); + + let reference: Option = + if attr.is_enum || matches!(dtype, Some(schema::DataType::Object { .. })) { + Some(format!("#/$defs/{}", attr.dtypes[0])) + } else { + None + }; + + let items: Option = attr.into(); + let any_of = (!attr.is_array).then(|| attr.into()); + let description = (!attr.docstring.is_empty()).then(|| attr.docstring.clone()); + let enum_values = if attr.is_enum { Some(Vec::new()) } else { None }; + + if any_of.is_some() { + dtype = None; + } + + Ok(schema::Property { + title: attr.name.clone(), + dtype, + description, + term: attr.term.clone(), + reference, + options, + any_of, + items, + enum_values, + }) + } +} + +impl TryFrom<&Attribute> for schema::DataType { + type Error = String; + + /// Attempts to convert an `Attribute` into a `DataType`. + /// + /// # Arguments + /// + /// * `attr` - A reference to the `Attribute`. + /// + /// # Returns + /// + /// A `Result` containing the `DataType` or an error message. + /// + /// # Errors + /// + /// Returns an error if the `dtypes` vector in the attribute is empty. + fn try_from(attr: &Attribute) -> Result { + if attr.is_array { + return Ok(schema::DataType::Array); + } + + schema::DataType::try_from( + attr.dtypes + .first() + .ok_or(format!("No data types found for attribute: {}", attr.name))?, + ) + } +} + +/// Specific case for the `items` field in the JSON schema. +impl From<&Attribute> for Option { + /// Converts an `Attribute` into an `Option`. + /// + /// # Arguments + /// + /// * `attr` - A reference to the `Attribute`. + /// + /// # Returns + /// + /// An `Option` representing the attribute's items. + fn from(attr: &Attribute) -> Self { + if !attr.is_array { + // No need for 'items' when the attr is not + // an array type + return None; + } + + // Check if it is an AnyOf case + let any_of: Vec = attr.into(); + + if any_of.is_empty() { + // There is just a single type + Some(process_dtype(&attr.dtypes[0])) + } else { + Some(schema::Item::AnyOfItem(schema::AnyOfItemType { any_of })) + } + } +} + +impl From<&Attribute> for Vec { + /// Converts an `Attribute` into a `Vec`. + /// + /// # Arguments + /// + /// * `attr` - A reference to the `Attribute`. + /// + /// # Returns + /// + /// A `Vec` representing the attribute's items. + fn from(attr: &Attribute) -> Self { + if attr.dtypes.len() == 1 { + return Vec::new(); + } + + let mut items = Vec::new(); + for dtype in attr.dtypes.iter() { + items.push(process_dtype(dtype)); + } + + items + } +} + +/// Processes a data type string and returns an `Item`. +/// +/// # Arguments +/// +/// * `dtype` - A reference to the data type string. +/// +/// # Returns +/// +/// An `Item` representing the data type. +fn process_dtype(dtype: &str) -> schema::Item { + match schema::DataType::from_str(dtype) { + Ok(basic_type) => { + schema::Item::DataTypeItem(schema::DataTypeItemType { dtype: basic_type }) + } + Err(_) => schema::Item::ReferenceItem(schema::ReferenceItemType { + reference: format!("#/$defs/{}", dtype), + }), + } +} + +#[cfg(test)] +mod tests { + use serde_json::{json, Value}; + + use super::*; + use crate::attribute::Attribute; + + #[test] + fn test_attribute_with_multiple_types() { + let attr = Attribute { + name: "test_attribute".to_string(), + is_array: false, + is_id: false, + dtypes: vec!["string".to_string(), "RefType".to_string()], + docstring: "".to_string(), + options: vec![], + term: None, + required: false, + default: None, + xml: None, + is_enum: false, + }; + + let property: schema::Property = + schema::Property::try_from(&attr).expect("Failed to convert Attribute to Property"); + + let serialized_property = + serde_json::to_value(&property).expect("Failed to serialize Property to JSON"); + + let expected_json = json!({ + "title": "test_attribute", + "anyOf": [ + {"type": "string"}, + {"$ref": "#/$defs/RefType"}, + ] + }); + + assert_eq!(serialized_property, expected_json); + } + + #[test] + fn test_array_attribute() { + let attr = Attribute { + name: "test_attribute".to_string(), + is_array: true, + is_id: false, + dtypes: vec!["string".to_string(), "RefType".to_string()], + docstring: "".to_string(), + options: vec![], + term: None, + required: false, + default: None, + xml: None, + is_enum: false, + }; + + let property: schema::Property = + schema::Property::try_from(&attr).expect("Failed to convert Attribute to Property"); + let serialized_property: Value = + serde_json::to_value(&property).expect("Failed to serialize Property to JSON"); + + let expected_json = json!({ + "title": "test_attribute", + "type": "array", + "items": { + "anyOf": [ + {"type": "string"}, + {"$ref": "#/$defs/RefType"} + ] + } + }); + + assert_eq!(serialized_property, expected_json); + } +} diff --git a/src/json/schema.rs b/src/json/schema.rs new file mode 100644 index 0000000..de572ff --- /dev/null +++ b/src/json/schema.rs @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2024 Jan Range + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::{ + collections::{BTreeMap, HashMap}, + str::FromStr, +}; + +#[derive(Debug, Deserialize, Serialize)] +#[serde(untagged)] +pub enum SchemaType { + Object(SchemaObject), + Enum(EnumObject), +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct SchemaObject { + #[serde(rename = "$schema", skip_serializing_if = "Option::is_none")] + pub schema: Option, + #[serde(rename = "$id", skip_serializing_if = "Option::is_none")] + pub id: Option, + pub title: String, + #[serde(rename = "type")] + pub dtype: DataType, + #[serde(skip_serializing_if = "skip_empty_string")] + pub description: Option, + pub properties: BTreeMap, + #[serde( + rename = "$defs", + skip_serializing_if = "BTreeMap::is_empty", + alias = "definitions" + )] + pub definitions: BTreeMap, + pub required: Vec, + #[serde(rename = "additionalProperties", default = "default_false")] + pub additional_properties: bool, +} + +impl SchemaObject { + pub fn to_value(&self) -> Result { + serde_json::to_value(self) + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct EnumObject { + pub title: String, + #[serde(rename = "type")] + pub dtype: DataType, + #[serde(skip_serializing_if = "skip_empty_string")] + pub description: Option, + #[serde(rename = "enum")] + pub enum_values: Vec, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct Property { + pub title: String, + #[serde(rename = "type", skip_serializing_if = "Option::is_none")] + pub dtype: Option, + #[serde(skip_serializing_if = "skip_empty_string")] + pub description: Option, + #[serde(rename = "$term", skip_serializing_if = "skip_empty_string")] + pub term: Option, + #[serde(rename = "$ref", skip_serializing_if = "Option::is_none")] + pub reference: Option, + #[serde(flatten)] + pub options: HashMap, + #[serde(skip_serializing_if = "Option::is_none")] + pub items: Option, + #[serde(rename = "anyOf", skip_serializing_if = "skip_empty")] + pub any_of: Option>, + #[serde(skip_serializing_if = "skip_empty", rename = "enum")] + pub enum_values: Option>, +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +pub enum Item { + ReferenceItem(ReferenceItemType), + AnyOfItem(AnyOfItemType), + DataTypeItem(DataTypeItemType), +} + +impl Serialize for Item { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + Item::ReferenceItem(ref_item) => ref_item.serialize(serializer), + Item::AnyOfItem(any_of_item) => any_of_item.serialize(serializer), + Item::DataTypeItem(data_type_item) => data_type_item.serialize(serializer), + } + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ReferenceItemType { + #[serde(rename = "$ref")] + pub reference: String, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct AnyOfItemType { + #[serde(rename = "anyOf")] + pub any_of: Vec, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct DataTypeItemType { + #[serde(rename = "type")] + pub dtype: DataType, +} + +/// Represents various data types that can be used in a JSON schema. +#[derive(Debug, Deserialize, Serialize, PartialEq)] +pub enum DataType { + #[serde(rename = "string")] + String, + #[serde(rename = "integer")] + Integer, + #[serde(rename = "number")] + Number, + #[serde(rename = "boolean")] + Boolean, + #[serde(rename = "object")] + Object, + #[serde(rename = "array")] + Array, +} + +impl Default for DataType { + /// Provides a default value for the DataType, which is `String`. + fn default() -> Self { + DataType::String + } +} + +impl FromStr for DataType { + type Err = String; + + /// Converts a string representation of a data type into a `DataType` enum. + /// + /// # Errors + /// + /// Returns an error if the string is empty or does not match any known data type. + fn from_str(s: &str) -> Result { + match s { + "string" => Ok(DataType::String), + "number" => Ok(DataType::Number), + "float" => Ok(DataType::Number), + "integer" => Ok(DataType::Integer), + "boolean" => Ok(DataType::Boolean), + "object" => Ok(DataType::Object), + "array" => Ok(DataType::Array), + _ => Err(format!("Invalid data type: {}", s)), + } + } +} + +impl TryFrom<&String> for DataType { + type Error = String; + + fn try_from(s: &String) -> Result { + match s.as_str() { + "string" => Ok(DataType::String), + "number" => Ok(DataType::Number), + "integer" => Ok(DataType::Integer), + "boolean" => Ok(DataType::Boolean), + "array" => Ok(DataType::Array), + "float" => Ok(DataType::Number), + _ => Ok(DataType::Object), + } + } +} + +#[derive(Debug, Deserialize, Serialize)] +#[serde(untagged)] +pub enum PrimitiveType { + String(String), + Number(f64), + Integer(i64), + Boolean(bool), +} + +impl From<&String> for PrimitiveType { + /// Converts a string reference into a `PrimitiveType` enum. + /// + /// # Arguments + /// + /// * `s` - A reference to the string to be converted. + /// + /// # Returns + /// + /// A `PrimitiveType` enum variant corresponding to the parsed value. + fn from(s: &String) -> Self { + if let Ok(number) = s.parse::() { + return PrimitiveType::Number(number); + } + + if let Ok(boolean) = s.to_lowercase().parse::() { + return PrimitiveType::Boolean(boolean); + } + + if let Ok(integer) = s.parse::() { + return PrimitiveType::Integer(integer); + } + + PrimitiveType::String(s.clone()) + } +} + +fn skip_empty(option: &Option>) -> bool { + match option { + Some(vec) => vec.is_empty(), + None => true, + } +} + +fn skip_empty_string(option: &Option) -> bool { + match option { + Some(string) => string.is_empty(), + None => true, + } +} + +fn default_false() -> bool { + false +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + /// Tests the conversion from string to DataType enum variants. + /// It checks for correct parsing of basic types and custom references. + fn test_from_str() { + assert_eq!(DataType::from_str("string").unwrap(), DataType::String); + assert_eq!(DataType::from_str("number").unwrap(), DataType::Number); + assert_eq!(DataType::from_str("integer").unwrap(), DataType::Integer); + assert_eq!(DataType::from_str("boolean").unwrap(), DataType::Boolean); + assert_eq!(DataType::from_str("object").unwrap(), DataType::Object); + assert_eq!(DataType::from_str("array").unwrap(), DataType::Array); + } +} diff --git a/src/lib.rs b/src/lib.rs index e7aec3e..9cee236 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,13 +35,12 @@ pub mod validation; pub(crate) mod attribute; pub(crate) mod object; -pub(crate) mod primitives; -pub(crate) mod schema; pub(crate) mod xmltype; -pub(crate) mod json { +pub mod json { mod datatype; - pub(crate) mod parser; + pub mod export; + pub mod schema; } pub(crate) mod markdown { @@ -49,6 +48,12 @@ pub(crate) mod markdown { pub(crate) mod parser; } +#[cfg(feature = "openai")] +pub mod llm { + pub mod extraction; + pub mod input; +} + pub mod bindings { #[cfg(feature = "python")] pub(crate) mod python; diff --git a/tests/data/expected_json_schema.json b/tests/data/expected_json_schema.json index 1636b17..f1d35fd 100644 --- a/tests/data/expected_json_schema.json +++ b/tests/data/expected_json_schema.json @@ -1,32 +1,61 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://www.github.com/my/repo/", "title": "Test", "type": "object", "properties": { - "name": { - "title": "name", - "term": "schema:hello", - "type": "string", - "description": "The name of the test." + "array_valued": { + "title": "array_valued", + "type": "array", + "$term": "http://schema.org/something", + "items": { + "$ref": "#/$defs/Test2" + } }, - "number": { - "title": "number", - "term": "schema:one", - "type": "number" + "multiple_types": { + "title": "multiple_types", + "anyOf": [ + { + "type": "number" + }, + { + "$ref": "#/$defs/Test2" + } + ] }, - "test2": { - "term": "schema:something", + "multiple_types_array": { + "title": "multiple_types_array", "type": "array", "items": { - "$ref": "#/definitions/Test2" + "anyOf": [ + { + "type": "number" + }, + { + "$ref": "#/$defs/Test2" + } + ] } }, + "name": { + "title": "name", + "description": "A test description", + "$term": "http://schema.org/hello" + }, + "number": { + "title": "number", + "$term": "http://schema.org/one" + }, "ontology": { "title": "ontology", - "$ref": "#/definitions/Ontology" + "$ref": "#/$defs/Ontology" + }, + "single_valued": { + "title": "single_valued", + "$ref": "#/$defs/Test2" } }, - "definitions": { + "$defs": { "Ontology": { "title": "Ontology", "type": "string", @@ -42,19 +71,24 @@ "properties": { "names": { "title": "names", - "term": "schema:hello", "type": "array", + "$term": "schema:hello", "items": { "type": "string" } }, "number": { "title": "number", - "term": "schema:one", - "type": "number", + "$term": "schema:one", "minimum": 0.0 } - } + }, + "required": [], + "additionalProperties": false } - } -} + }, + "required": [ + "name" + ], + "additionalProperties": false +} \ No newline at end of file diff --git a/tests/data/model_json_schema.md b/tests/data/model_json_schema.md new file mode 100644 index 0000000..ca7bb13 --- /dev/null +++ b/tests/data/model_json_schema.md @@ -0,0 +1,55 @@ +--- +id-field: true +repo: "https://www.github.com/my/repo/" +prefix: "tst" +prefixes: + schema: http://schema.org/ +nsmap: + tst: http://example.com/test/ +--- + +### Test + +- **name** + - Type: Identifier + - Term: schema:hello + - Description: A test description +- number + - Type: float + - Term: schema:one +- array_valued + - Type: [Test2](#test2)[] + - Term: schema:something +- single_valued + - Type: [Test2](#test2) +- ontology + - Type: Ontology +- multiple_types + - Type: float, Test2 +- multiple_types_array + - Type: float, Test2 + - Multiple: true + +### Test2 + +- names + - Type: string[] + - Term: schema:hello + - XML: name +- number + - Type: float + - Term: schema:one + - XML: @number + - Minimum: 0 + +## Enumerations + +### Ontology + +Ontology endpoints for different types of sequences. + +``` +GO = "https://amigo.geneontology.org/amigo/term/" +SIO = "http://semanticscience.org/resource/" +ECO = "https://www.evidenceontology.org/term/" +``` diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index e188243..75cb7a5 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -132,62 +132,45 @@ mod tests { } #[test] - fn test_json_schema_known_obj() { + #[should_panic] + fn test_json_schema_no_objects() { // Arrange - let path = Path::new("tests/data/model.md"); - let model = DataModel::from_markdown(path).expect("Could not parse markdown"); + let model = mdmodels::datamodel::DataModel::new(None, None); // Act - let schema = model.json_schema(Some("Test".to_string())); - let schema: serde_json::Value = serde_json::from_str(&schema).unwrap(); - - // Assert - let expected_schema = - std::fs::read_to_string("tests/data/expected_json_schema.json").unwrap(); - // Parse with serde_json - let expected_schema: serde_json::Value = serde_json::from_str(&expected_schema).unwrap(); - - assert_eq!(schema, expected_schema); + model + .json_schema(Some("Test".to_string())) + .expect("Could not generate JSON schema"); } #[test] - fn test_json_schema_unknown_obj() { + fn test_json_schema() { // Arrange - let path = Path::new("tests/data/model.md"); + let path = Path::new("tests/data/model_json_schema.md"); let model = DataModel::from_markdown(path).expect("Could not parse markdown"); // Act - let schema = model.json_schema(None); - let schema: serde_json::Value = serde_json::from_str(&schema).unwrap(); + let schema = model + .json_schema(None) + .expect("Could not generate JSON schema"); // Assert - let expected_schema = - std::fs::read_to_string("tests/data/expected_json_schema.json").unwrap(); - // Parse with serde_json - let expected_schema: serde_json::Value = serde_json::from_str(&expected_schema).unwrap(); - - assert_eq!(schema, expected_schema); - } + let expected = std::fs::read_to_string("tests/data/expected_json_schema.json").unwrap(); - #[test] - #[should_panic] - fn test_json_schema_no_objects() { - // Arrange - let model = mdmodels::datamodel::DataModel::new(None, None); - - // Act - model.json_schema(Some("Test".to_string())); + assert_eq!(schema, expected); } #[test] #[should_panic] - fn test_json_schema_no_object() { + fn test_json_schema_object_not_found() { // Arrange let path = Path::new("tests/data/model.md"); let model = DataModel::from_markdown(path).expect("Could not parse markdown"); // Act - model.json_schema(Some("Test3".to_string())); + model + .json_schema(Some("Test3".to_string())) + .expect("Could not generate JSON schema"); } #[test] @@ -218,32 +201,6 @@ mod tests { model.sdrdm_schema(); } - #[test] - fn test_json_schema_all() { - // Arrange - let path = Path::new("tests/data/model.md"); - let model = DataModel::from_markdown(path).expect("Could not parse markdown"); - - // Act - model.json_schema_all("tests/intermediates/".to_string()); - - // Assert - let filenames = vec!["Test.json", "Test2.json"]; - for filename in filenames { - let obj_name = filename.replace(".json", ""); - let expected_schema = - std::fs::read_to_string(format!("tests/intermediates/{}", filename)).unwrap(); - let schema = model.json_schema(Some(obj_name)); - - assert_eq!( - serde_json::from_str::(schema.as_str()) - .expect("Could not parse generated schema"), - serde_json::from_str::(expected_schema.as_str()) - .expect("Could not parse expected schema") - ); - } - } - #[test] fn test_model_merge() { // Arrange From e733b573febd00af037eabc4addbbfc3b994c74e Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 00:41:23 +0100 Subject: [PATCH 05/17] add llm interface --- src/bin/cli.rs | 103 +++++++++++++++++++++++++++++++++++++++--- src/llm/extraction.rs | 94 ++++++++++++++++++++++++++++++++++++++ src/llm/input.rs | 46 +++++++++++++++++++ 3 files changed, 236 insertions(+), 7 deletions(-) create mode 100644 src/llm/extraction.rs create mode 100644 src/llm/input.rs diff --git a/src/bin/cli.rs b/src/bin/cli.rs index dd276c0..72230df 100644 --- a/src/bin/cli.rs +++ b/src/bin/cli.rs @@ -26,6 +26,7 @@ use colored::Colorize; use mdmodels::{ datamodel::DataModel, exporters::{render_jinja_template, Templates}, + llm::extraction::query_openai, pipeline::process_pipeline, }; use serde::{Deserialize, Serialize}; @@ -50,6 +51,8 @@ enum Commands { Validate(ValidateArgs), /// Pipeline for generating multiple files. Pipeline(PipelineArgs), + /// Large Language Model Extraction + Extract(ExtractArgs), } /// Arguments for the validate subcommand. @@ -92,6 +95,52 @@ struct PipelineArgs { input: PathBuf, } +/// Arguments for the extract subcommand. +#[derive(Parser, Debug)] +struct ExtractArgs { + /// Path or URL to the markdown model. + #[arg(short, long, help = "Path or URL to the markdown model")] + model: InputType, + + /// Prompt to use for extraction. + #[arg(short, long, help = "Path to the file to parse")] + input: PathBuf, + + /// Pre-prompt to use for extraction. + #[arg( + short, + long, + default_value = "You are a helpful assistant that extracts data from text input.", + help = "Pre-prompt to use for extraction" + )] + pre_prompt: String, + + /// OpenAI model to use for extraction. + #[arg( + short, + long, + default_value = "gpt-4o", + help = "OpenAI model to use for extraction. Defaults to 'gpt-4o'." + )] + llm_model: String, + + /// Root object to parse into. Defaults to the first entity in the model. + #[arg( + short, + long, + help = "Root object to parse into. Defaults to the first entity in the model." + )] + root: Option, + + /// Output file to write the extracted data to. + #[arg(short, long, help = "Output file to write the extracted data to")] + output: Option, + + /// Whether to extract multiple objects. + #[arg(long, help = "Whether to extract multiple objects")] + multiple: bool, +} + /// Represents the input type, either remote URL or local file path. #[derive(Deserialize, Serialize, Clone, Debug)] enum InputType { @@ -136,6 +185,7 @@ fn main() -> Result<(), Box> { Commands::Validate(args) => validate(args), Commands::Convert(args) => convert(args), Commands::Pipeline(args) => process_pipeline(&args.input), + Commands::Extract(args) => query_llm(args), } } @@ -176,6 +226,46 @@ fn print_validation_result(result: bool) { println!(" └── {}\n", message); } +fn query_llm(args: ExtractArgs) -> Result<(), Box> { + let path = resolve_input_path(&args.model); + let model = DataModel::from_markdown(&path)?; + let prompt = std::fs::read_to_string(&args.input)?; + let pre_prompt = args.pre_prompt; + let llm_model = args.llm_model; + let root = match args.root { + Some(root) => root, + None => model + .objects + .first() + .ok_or("No objects found in model".to_string())? + .name + .clone(), + }; + + let response = tokio::runtime::Runtime::new()?.block_on(query_openai( + &prompt, + &pre_prompt, + &model, + &root, + &llm_model, + args.multiple, + None, + ))?; + + match args.output { + Some(ref output) => { + let json_string = serde_json::to_string_pretty(&response)?; + std::fs::write(output, json_string).expect("Failed to write output"); + } + None => { + let json_string = serde_json::to_string_pretty(&response)?; + println!("{}", json_string); + } + } + + Ok(()) +} + /// Converts the markdown model specified in the arguments to another format. /// /// # Arguments @@ -194,7 +284,7 @@ fn convert(args: ConvertArgs) -> Result<(), Box> { // Render the template. let rendered = match args.template { - Templates::JsonSchema => model.json_schema(args.root), + Templates::JsonSchema => model.json_schema(args.root)?, _ => render_jinja_template(&args.template, &mut model, None)?, }; @@ -246,11 +336,10 @@ fn render_all_json_schemes( model: &DataModel, outdir: &Option, ) -> Result<(), Box> { - if outdir.is_none() { - panic!("Output directory is required for JSON Schema all"); - } - - let outdir = outdir.as_ref().unwrap(); + let outdir = match outdir { + Some(outdir) => outdir, + None => panic!("Output directory is required for JSON Schema all"), + }; // Check if the output is a directory if !outdir.is_dir() && outdir.exists() { @@ -261,7 +350,7 @@ fn render_all_json_schemes( fs::create_dir_all(outdir)?; // Render the JSON Schema for each entity - model.json_schema_all(outdir.to_str().unwrap().to_string()); + model.json_schema_all(outdir.to_path_buf())?; Ok(()) } diff --git a/src/llm/extraction.rs b/src/llm/extraction.rs new file mode 100644 index 0000000..078ef6f --- /dev/null +++ b/src/llm/extraction.rs @@ -0,0 +1,94 @@ +use std::env; + +use openai_api_rs::v1::{api::OpenAIClient, chat_completion}; +use serde_json::{json, Value}; + +use crate::{datamodel::DataModel, json::export::to_json_schema}; + +/// Queries the OpenAI API with a given prompt and pre-prompt, using a specified data model and root. +/// +/// # Arguments +/// +/// * `prompt` - The main prompt to send to the OpenAI API. +/// * `pre_prompt` - An additional pre-prompt to provide context or setup for the main prompt. +/// * `data_model` - The data model used to generate the JSON schema for the response format. +/// * `root` - The root name for the JSON schema. +/// * `model` - The OpenAI model to use for the chat completion. +/// * `multiple` - Whether to extract multiple objects. +/// +/// # Returns +/// +/// A `Result` containing a `serde_json::Value` with the parsed JSON response from the OpenAI API, or an error if the operation fails. +pub async fn query_openai( + prompt: &str, + pre_prompt: &str, + data_model: &DataModel, + root: &str, + model: &str, + multiple: bool, + api_key: Option, +) -> Result> { + let response_format = prepare_response_format(data_model, root, multiple)?; + let client = prepare_client(api_key)?; + let messages = vec![create_chat_message(pre_prompt), create_chat_message(prompt)]; + let req = chat_completion::ChatCompletionRequest::new(model.to_string(), messages) + .response_format(response_format) + .temperature(0.0); + + let result = client.chat_completion(req).await?; + let content = result + .choices + .first() + .and_then(|choice| choice.message.content.as_ref()) + .ok_or_else(|| format!("No content in response from {}", model))?; + + Ok(serde_json::from_str(content)?) +} + +fn prepare_response_format( + model: &DataModel, + root: &str, + multiple: bool, +) -> Result> { + let schema = to_json_schema(model, root)?; + + if multiple { + Ok(json!( + { "type": "json_schema", + "json_schema": { + "name": root, + "schema": { + "type": "object", + "properties": { + "items": { + "type": "array", + "items": schema + } + } + } + } + } + )) + } else { + Ok(json!({ "type": "json_schema", "json_schema": { "name": root, "schema": schema } })) + } +} + +fn prepare_client(api_key: Option) -> Result> { + let api_key = match api_key { + Some(api_key) => api_key, + None => env::var("OPENAI_API_KEY")?, + }; + + OpenAIClient::builder().with_api_key(api_key).build() +} + +fn create_chat_message(content: &str) -> chat_completion::ChatCompletionMessage { + chat_completion::ChatCompletionMessage { + role: chat_completion::MessageRole::user, + content: chat_completion::Content::Text(content.to_string()), + name: None, + tool_calls: None, + tool_call_id: None, + } +} diff --git a/src/llm/input.rs b/src/llm/input.rs new file mode 100644 index 0000000..a967986 --- /dev/null +++ b/src/llm/input.rs @@ -0,0 +1,46 @@ +use std::path::PathBuf; + +use reqwest::Url; + +use crate::datamodel::DataModel; + +/// Represents different types of models that can be used. +/// +/// `ModelType` can be constructed from a local file path, a remote URL, or a `DataModel` instance. +pub enum ModelType { + Path(PathBuf), + Remote(Url), + Model(DataModel), +} + +impl TryFrom for ModelType { + type Error = Box; + + /// Attempts to create a `ModelType` from a `PathBuf`. + /// + /// Returns an error if the path does not exist. + fn try_from(path: PathBuf) -> Result { + if !path.exists() { + return Err(Box::from("Path does not exist")); + } + Ok(Self::Path(path)) + } +} + +impl TryFrom for ModelType { + type Error = Box; + + /// Creates a `ModelType` from a `Url`. + fn try_from(url: Url) -> Result { + Ok(Self::Remote(url)) + } +} + +impl TryFrom for ModelType { + type Error = Box; + + /// Creates a `ModelType` from a `DataModel`. + fn try_from(model: DataModel) -> Result { + Ok(Self::Model(model)) + } +} From 53ccc57277b56a8b457015cbceb330765e0d21f6 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 00:41:34 +0100 Subject: [PATCH 06/17] add json schema export --- src/bindings/wasm.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/bindings/wasm.rs b/src/bindings/wasm.rs index 72c7faa..fa7f10c 100644 --- a/src/bindings/wasm.rs +++ b/src/bindings/wasm.rs @@ -23,6 +23,7 @@ use crate::datamodel::DataModel; use crate::exporters::Templates; +use crate::json::export::to_json_schema; use crate::validation::Validator; use serde_wasm_bindgen::to_value; use wasm_bindgen::prelude::*; @@ -75,6 +76,40 @@ pub fn convert_to(markdown_content: &str, template: Templates) -> Result) -> Result { + let model = DataModel::from_markdown_string(markdown_content) + .map_err(|e| JsValue::from_str(&format!("Error parsing markdown content: {}", e)))?; + + let root = match root { + Some(root) => root, + None => model + .objects + .first() + .ok_or(JsValue::from_str("No objects found in model"))? + .name + .clone(), + }; + + let json_schema = to_json_schema(&model, &root) + .map_err(|e| JsValue::from_str(&format!("Error serializing schema: {}", e)))?; + + // Directly return the JSON schema object instead of converting it to a JsValue + Ok(serde_json::to_string(&json_schema).unwrap()) +} + /// Validates the given markdown content and returns the validation result as a `JsValue`. /// /// # Arguments From c448662d4a23ed483410b6964ecef49495b38c66 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 00:41:44 +0100 Subject: [PATCH 07/17] add `is_enum` identifier --- src/attribute.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/attribute.rs b/src/attribute.rs index 0467611..9f71e4d 100644 --- a/src/attribute.rs +++ b/src/attribute.rs @@ -55,6 +55,8 @@ pub struct Attribute { /// XML type information for the attribute. #[serde(skip_serializing_if = "Option::is_none")] pub xml: Option, + /// Is an enumeration or not + pub is_enum: bool, } impl Attribute { @@ -76,6 +78,7 @@ impl Attribute { required, xml: Some(XMLType::from_str(name.as_str()).unwrap()), default: None, + is_enum: false, } } From df2c45901d41450d7e66e7c30bcd1911892d9c21 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 01:02:17 +0100 Subject: [PATCH 08/17] change to `oneOf` --- src/json/export.rs | 15 ++++++++------- src/json/schema.rs | 14 +++++++------- tests/data/expected_json_schema.json | 9 ++++++--- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/json/export.rs b/src/json/export.rs index e451af6..dc227f1 100644 --- a/src/json/export.rs +++ b/src/json/export.rs @@ -332,11 +332,12 @@ impl TryFrom<&Attribute> for schema::Property { }; let items: Option = attr.into(); - let any_of = (!attr.is_array).then(|| attr.into()); + let one_of = (!attr.is_array).then(|| attr.into()); let description = (!attr.docstring.is_empty()).then(|| attr.docstring.clone()); let enum_values = if attr.is_enum { Some(Vec::new()) } else { None }; - if any_of.is_some() { + if attr.dtypes.len() > 1 { + // If there are multiple types, we need to use the AnyOf case dtype = None; } @@ -347,7 +348,7 @@ impl TryFrom<&Attribute> for schema::Property { term: attr.term.clone(), reference, options, - any_of, + one_of, items, enum_values, }) @@ -401,14 +402,14 @@ impl From<&Attribute> for Option { return None; } - // Check if it is an AnyOf case - let any_of: Vec = attr.into(); + // Check if it is an OneOf case + let one_of: Vec = attr.into(); - if any_of.is_empty() { + if one_of.is_empty() { // There is just a single type Some(process_dtype(&attr.dtypes[0])) } else { - Some(schema::Item::AnyOfItem(schema::AnyOfItemType { any_of })) + Some(schema::Item::OneOfItem(schema::OneOfItemType { one_of })) } } } diff --git a/src/json/schema.rs b/src/json/schema.rs index de572ff..2aaa3c6 100644 --- a/src/json/schema.rs +++ b/src/json/schema.rs @@ -90,8 +90,8 @@ pub struct Property { pub options: HashMap, #[serde(skip_serializing_if = "Option::is_none")] pub items: Option, - #[serde(rename = "anyOf", skip_serializing_if = "skip_empty")] - pub any_of: Option>, + #[serde(rename = "oneOf", skip_serializing_if = "skip_empty")] + pub one_of: Option>, #[serde(skip_serializing_if = "skip_empty", rename = "enum")] pub enum_values: Option>, } @@ -100,7 +100,7 @@ pub struct Property { #[serde(untagged)] pub enum Item { ReferenceItem(ReferenceItemType), - AnyOfItem(AnyOfItemType), + OneOfItem(OneOfItemType), DataTypeItem(DataTypeItemType), } @@ -111,7 +111,7 @@ impl Serialize for Item { { match self { Item::ReferenceItem(ref_item) => ref_item.serialize(serializer), - Item::AnyOfItem(any_of_item) => any_of_item.serialize(serializer), + Item::OneOfItem(one_of_item) => one_of_item.serialize(serializer), Item::DataTypeItem(data_type_item) => data_type_item.serialize(serializer), } } @@ -124,9 +124,9 @@ pub struct ReferenceItemType { } #[derive(Debug, Deserialize, Serialize)] -pub struct AnyOfItemType { - #[serde(rename = "anyOf")] - pub any_of: Vec, +pub struct OneOfItemType { + #[serde(rename = "oneOf")] + pub one_of: Vec, } #[derive(Debug, Deserialize, Serialize)] diff --git a/tests/data/expected_json_schema.json b/tests/data/expected_json_schema.json index f1d35fd..e657786 100644 --- a/tests/data/expected_json_schema.json +++ b/tests/data/expected_json_schema.json @@ -14,7 +14,7 @@ }, "multiple_types": { "title": "multiple_types", - "anyOf": [ + "oneOf": [ { "type": "number" }, @@ -25,9 +25,8 @@ }, "multiple_types_array": { "title": "multiple_types_array", - "type": "array", "items": { - "anyOf": [ + "oneOf": [ { "type": "number" }, @@ -39,11 +38,13 @@ }, "name": { "title": "name", + "type": "string", "description": "A test description", "$term": "http://schema.org/hello" }, "number": { "title": "number", + "type": "number", "$term": "http://schema.org/one" }, "ontology": { @@ -52,6 +53,7 @@ }, "single_valued": { "title": "single_valued", + "type": "object", "$ref": "#/$defs/Test2" } }, @@ -79,6 +81,7 @@ }, "number": { "title": "number", + "type": "number", "$term": "schema:one", "minimum": 0.0 } From 3d2de8961f447784a3adce2a5ec4a55fed07eecf Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 01:02:32 +0100 Subject: [PATCH 09/17] remove pascal case check --- src/validation.rs | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/src/validation.rs b/src/validation.rs index 8efe194..3ed70de 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -34,7 +34,7 @@ use std::error::Error; use std::fmt::{Display, Formatter}; // Basic types that are ignored in the validation process -const BASIC_TYPES: [&str; 7] = [ +pub(crate) const BASIC_TYPES: [&str; 7] = [ "string", "number", "integer", "boolean", "float", "date", "bytes", ]; @@ -299,7 +299,6 @@ impl Validator { starts_with_character, contains_white_space, contains_special_characters, - is_pascal_case, ]; for check in checks { @@ -536,31 +535,3 @@ fn contains_special_characters(name: &str) -> Result<(), String> { || Err(format!("Name '{}' contains special characters, which are not valid except for underscores.", name)) ).unwrap_or(Ok(())) } - -/// Checks if the given name is in PascalCase. -/// -/// # Arguments -/// -/// * `name` - A string slice that holds the name to be checked. -/// -/// # Returns -/// -/// A `Result` which is: -/// - `Ok(())` if the name is in PascalCase. -/// - `Err(String)` if the name is not in PascalCase. -fn is_pascal_case(name: &str) -> Result<(), String> { - let no_snake = name.chars().all(|c| c.is_alphanumeric() || c == '_'); - let first_uppercase = name - .chars() - .next() - .map(|c| c.is_uppercase()) - .unwrap_or(false); - - if !no_snake || !first_uppercase { - return Err( - format!("Name '{}' is not in PascalCase. Names must be in PascalCase and not contain underscores", name) - ); - } - - Ok(()) -} From ae4107700b9485d59391d514e6e0615cc82e593f Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 01:02:46 +0100 Subject: [PATCH 10/17] update expected sdrdm schema --- tests/data/expected_sdrdm_full_schema.json | 44 +++++++++++++++------- tests/data/expected_sdrdm_schema.json | 32 ++++++++++++---- 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/tests/data/expected_sdrdm_full_schema.json b/tests/data/expected_sdrdm_full_schema.json index 986f639..b8551a4 100644 --- a/tests/data/expected_sdrdm_full_schema.json +++ b/tests/data/expected_sdrdm_full_schema.json @@ -8,7 +8,9 @@ "name": "name", "multiple": false, "is_id": false, - "dtypes": ["string"], + "dtypes": [ + "string" + ], "docstring": "", "options": [], "term": "schema:hello", @@ -16,13 +18,16 @@ "xml": { "is_attr": true, "name": "name" - } + }, + "is_enum": false }, { "name": "number", "multiple": false, "is_id": false, - "dtypes": ["float"], + "dtypes": [ + "float" + ], "docstring": "", "options": [], "term": "schema:one", @@ -30,13 +35,16 @@ "xml": { "is_attr": true, "name": "number" - } + }, + "is_enum": false }, { "name": "test2", "multiple": true, "is_id": false, - "dtypes": ["Test2"], + "dtypes": [ + "Test2" + ], "docstring": "", "options": [], "term": "schema:something", @@ -44,13 +52,16 @@ "xml": { "is_attr": false, "name": "SomeTest2" - } + }, + "is_enum": false }, { "name": "ontology", "multiple": false, "is_id": false, - "dtypes": ["Ontology"], + "dtypes": [ + "Ontology" + ], "docstring": "", "options": [], "term": null, @@ -58,7 +69,8 @@ "xml": { "is_attr": false, "name": "ontology" - } + }, + "is_enum": true } ], "docstring": "The schema defines the primary structure for the test entity. It contains multiple properties that capture various attributes of a test, including its name, numerical identifier, a nested array of test2 objects, and an ontology reference." @@ -70,7 +82,9 @@ "name": "names", "multiple": true, "is_id": false, - "dtypes": ["string"], + "dtypes": [ + "string" + ], "docstring": "", "options": [], "term": "schema:hello", @@ -78,13 +92,16 @@ "xml": { "is_attr": false, "name": "name" - } + }, + "is_enum": false }, { "name": "number", "multiple": false, "is_id": false, - "dtypes": ["float"], + "dtypes": [ + "float" + ], "docstring": "", "options": [ { @@ -97,7 +114,8 @@ "xml": { "is_attr": true, "name": "number" - } + }, + "is_enum": false } ], "docstring": "The schema represents a secondary structure used within the primary schema. It is designed to hold an array of strings for names, a numerical value with a specified minimum, and other nested properties." @@ -125,4 +143,4 @@ "repo": "https://www.github.com/my/repo/", "prefix": "tst" } -} +} \ No newline at end of file diff --git a/tests/data/expected_sdrdm_schema.json b/tests/data/expected_sdrdm_schema.json index 48057fb..8f4df55 100644 --- a/tests/data/expected_sdrdm_schema.json +++ b/tests/data/expected_sdrdm_schema.json @@ -7,11 +7,14 @@ "name": "name", "multiple": false, "is_id": true, - "dtypes": ["string"], + "dtypes": [ + "string" + ], "docstring": "The name of the test.", "options": [], "term": "schema:hello", "required": true, + "is_enum": false, "xml": { "is_attr": true, "name": "name" @@ -21,12 +24,15 @@ "name": "number", "multiple": false, "is_id": false, - "dtypes": ["float"], + "dtypes": [ + "float" + ], "docstring": "", "options": [], "term": "schema:one", "required": false, "default": 1.0, + "is_enum": false, "xml": { "is_attr": true, "name": "number" @@ -36,11 +42,14 @@ "name": "test2", "multiple": true, "is_id": false, - "dtypes": ["Test2"], + "dtypes": [ + "Test2" + ], "docstring": "", "options": [], "term": "schema:something", "required": false, + "is_enum": false, "xml": { "is_attr": false, "name": "SomeTest2" @@ -50,11 +59,14 @@ "name": "ontology", "multiple": false, "is_id": false, - "dtypes": ["Ontology"], + "dtypes": [ + "Ontology" + ], "docstring": "", "options": [], "term": null, "required": false, + "is_enum": true, "xml": { "is_attr": false, "name": "ontology" @@ -70,11 +82,14 @@ "name": "names", "multiple": true, "is_id": false, - "dtypes": ["string"], + "dtypes": [ + "string" + ], "docstring": "", "options": [], "term": "schema:hello", "required": false, + "is_enum": false, "xml": { "is_attr": false, "name": "name" @@ -84,7 +99,9 @@ "name": "number", "multiple": false, "is_id": false, - "dtypes": ["float"], + "dtypes": [ + "float" + ], "docstring": "", "options": [ { @@ -94,6 +111,7 @@ ], "term": "schema:one", "required": false, + "is_enum": false, "xml": { "is_attr": true, "name": "number" @@ -125,4 +143,4 @@ "repo": "https://www.github.com/my/repo/", "prefix": "tst" } -} +} \ No newline at end of file From 111059c74823c9f73b5e06f84b154dd2b5478d9c Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 01:03:02 +0100 Subject: [PATCH 11/17] change json schema import --- src/datamodel.rs | 48 ++++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/src/datamodel.rs b/src/datamodel.rs index 9f575f0..c756977 100644 --- a/src/datamodel.rs +++ b/src/datamodel.rs @@ -22,17 +22,17 @@ */ use std::collections::HashMap; +use std::path::PathBuf; use std::{error::Error, fs, path::Path}; use log::error; use serde::{Deserialize, Serialize}; use crate::exporters::{render_jinja_template, Templates}; -use crate::json::parser::parse_json_schema; +use crate::json::export::to_json_schema; use crate::markdown::frontmatter::FrontMatter; use crate::markdown::parser::parse_markdown; use crate::object::{Enumeration, Object}; -use crate::schema; use colored::Colorize; use crate::validation::Validator; @@ -102,7 +102,7 @@ impl DataModel { // # Returns // // A JSON schema string - pub fn json_schema(&self, obj_name: Option) -> String { + pub fn json_schema(&self, obj_name: Option) -> Result> { if self.objects.is_empty() { panic!("No objects found in the markdown file"); } @@ -112,9 +112,12 @@ impl DataModel { if self.objects.iter().all(|o| o.name != name) { panic!("Object '{}' not found in the markdown file", name); } - schema::to_json_schema(&name, self) + Ok(serde_json::to_string_pretty(&to_json_schema(self, &name)?)?) } - None => schema::to_json_schema(&self.objects[0].name, self), + None => Ok(serde_json::to_string_pretty(&to_json_schema( + self, + &self.objects[0].name, + )?)?), } } @@ -134,7 +137,7 @@ impl DataModel { // model.parse("path/to/file.md".to_string()); // model.json_schema_all("path/to/directory".to_string()); // ``` - pub fn json_schema_all(&self, path: String) { + pub fn json_schema_all(&self, path: PathBuf) -> Result<(), Box> { if self.objects.is_empty() { panic!("No objects found in the markdown file"); } @@ -144,11 +147,15 @@ impl DataModel { fs::create_dir_all(&path).expect("Could not create directory"); } + let base_path = path.to_str().ok_or("Failed to convert path to string")?; for object in &self.objects { - let schema = schema::to_json_schema(&object.name, self); - let file_name = format!("{}/{}.json", path, object.name); - fs::write(file_name, schema).expect("Could not write file"); + let schema = to_json_schema(self, &object.name)?; + let file_name = format!("{}/{}.json", base_path, object.name); + fs::write(file_name, serde_json::to_string_pretty(&schema)?) + .expect("Could not write file"); } + + Ok(()) } // Get the SDRDM schema for the markdown file @@ -314,13 +321,6 @@ impl DataModel { pub fn from_markdown_string(content: &str) -> Result { parse_markdown(content) } - - /// Parse a JSON schema and create a data model - /// - /// * `path` - Path to the JSON schema file - pub fn from_json_schema(path: &Path) -> Result> { - parse_json_schema(path) - } } #[cfg(test)] @@ -350,6 +350,7 @@ mod tests { required: false, xml: None, default: None, + is_enum: false, }); let mut obj2 = Object::new("Object2".to_string(), None); @@ -364,6 +365,7 @@ mod tests { required: false, xml: None, default: None, + is_enum: false, }); let enm1 = Enumeration { @@ -411,6 +413,7 @@ mod tests { required: false, xml: None, default: Some(DataType::String("".to_string())), + is_enum: false, }); obj.add_attribute(crate::attribute::Attribute { @@ -424,6 +427,7 @@ mod tests { required: true, xml: None, default: None, + is_enum: false, }); model.objects.push(obj); @@ -449,18 +453,6 @@ mod tests { assert_eq!(model.enums.len(), 1); } - #[test] - fn test_from_json_schema() { - // Arrange - let path = Path::new("tests/data/expected_json_schema.json"); - - // Act - let model = DataModel::from_json_schema(path).expect("Failed to parse JSON schema"); - - // Assert - assert_eq!(model.objects.len(), 2); - } - #[test] fn test_from_markdown_w_html() { // Arrange From 1a529573dc93cec875af09ce8fb237447178497c Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 01:03:24 +0100 Subject: [PATCH 12/17] mark enums for later processing --- src/markdown/parser.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/markdown/parser.rs b/src/markdown/parser.rs index 2ade175..7df9b5d 100644 --- a/src/markdown/parser.rs +++ b/src/markdown/parser.rs @@ -103,6 +103,9 @@ pub fn parse_markdown(content: &str) -> Result { model.enums = enums.into_iter().filter(|e| e.has_values()).collect(); model.objects = objects.into_iter().filter(|o| o.has_attributes()).collect(); + // Set 'is_enum' for all attributes using an enumeration + set_enum_attributes(&mut model); + // Add internal types, if used add_internal_types(&mut model); @@ -553,3 +556,35 @@ fn add_internal_types(model: &mut DataModel) { } } } + +/// Sets the `is_enum` flag for attributes that are enumerations. +/// +/// This function iterates through all objects and their attributes in the data model. +/// If an attribute's data types match any of the enumeration names, the `is_enum` flag +/// is set to `true`. If an attribute has data types that do not match any enumeration, +/// an error is returned. +/// +/// # Arguments +/// +/// * `model` - A mutable reference to the data model. +fn set_enum_attributes(model: &mut DataModel) { + let enums = model + .enums + .iter() + .map(|e| e.name.clone()) + .collect::>(); + + for object in model.objects.iter_mut() { + for attr in object.attributes.iter_mut() { + let enum_dtypes: Vec = attr + .dtypes + .iter() + .filter(|dtype| enums.contains(dtype)) + .cloned() + .collect(); + if !enum_dtypes.is_empty() && enum_dtypes.len() == attr.dtypes.len() { + attr.is_enum = true; + } + } + } +} From 434dbae44f07a5c4af2174a75da15b4e2243cca0 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 01:03:50 +0100 Subject: [PATCH 13/17] convert to `PathBuf` --- src/pipeline.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pipeline.rs b/src/pipeline.rs index 2ed3683..684a484 100644 --- a/src/pipeline.rs +++ b/src/pipeline.rs @@ -312,7 +312,7 @@ fn serialize_to_json_schema( match root { Some(root) => { - let schema = model.json_schema(Some(root)); + let schema = model.json_schema(Some(root))?; save_to_file(out, &schema)?; print_render_msg(out, &Templates::JsonSchema); Ok(()) @@ -376,7 +376,7 @@ fn serialize_all_json_schemes( match merge_state { MergeState::Merge => { let model = build_models(specs)?; - model.json_schema_all(out.to_str().unwrap().to_string()); + model.json_schema_all(out.to_path_buf())?; print_render_msg(out, &Templates::JsonSchemaAll); Ok(()) } @@ -384,7 +384,7 @@ fn serialize_all_json_schemes( for spec in specs { let model = DataModel::from_markdown(spec)?; let path = out.join(get_file_name(spec)); - model.json_schema_all(path.to_str().unwrap().to_string()); + model.json_schema_all(path.to_path_buf())?; print_render_msg(&path, &Templates::JsonSchemaAll); } Ok(()) From 4adc919740f28fccaf21463269a0fd3d56e73613 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 01:04:01 +0100 Subject: [PATCH 14/17] update unit definition --- types/equation/equation-internal.json | 30 ++++++++++------ types/equation/equation.json | 34 +++++++++---------- .../unit-definition-internal.json | 21 ++++++++---- 3 files changed, 51 insertions(+), 34 deletions(-) diff --git a/types/equation/equation-internal.json b/types/equation/equation-internal.json index 7a6ff2a..f28a8a8 100644 --- a/types/equation/equation-internal.json +++ b/types/equation/equation-internal.json @@ -18,7 +18,8 @@ "xml": { "is_attr": false, "name": "equation" - } + }, + "is_enum": false }, { "name": "variables", @@ -34,7 +35,8 @@ "xml": { "is_attr": false, "name": "list_of_variables" - } + }, + "is_enum": false }, { "name": "parameters", @@ -50,7 +52,8 @@ "xml": { "is_attr": false, "name": "list_of_parameters" - } + }, + "is_enum": false } ], "docstring": "Represents an equation that can be used in a data model." @@ -72,7 +75,8 @@ "xml": { "is_attr": true, "name": "id" - } + }, + "is_enum": false }, { "name": "name", @@ -88,7 +92,8 @@ "xml": { "is_attr": true, "name": "name" - } + }, + "is_enum": false }, { "name": "symbol", @@ -104,7 +109,8 @@ "xml": { "is_attr": true, "name": "symbol" - } + }, + "is_enum": false } ], "docstring": "Represents a variable that is used in the equation." @@ -126,7 +132,8 @@ "xml": { "is_attr": true, "name": "id" - } + }, + "is_enum": false }, { "name": "name", @@ -142,7 +149,8 @@ "xml": { "is_attr": true, "name": "name" - } + }, + "is_enum": false }, { "name": "symbol", @@ -158,7 +166,8 @@ "xml": { "is_attr": true, "name": "symbol" - } + }, + "is_enum": false }, { "name": "value", @@ -174,7 +183,8 @@ "xml": { "is_attr": true, "name": "value" - } + }, + "is_enum": false } ], "docstring": "Represents a parameter that is used in the equation." diff --git a/types/equation/equation.json b/types/equation/equation.json index 76a60a2..6a9008f 100644 --- a/types/equation/equation.json +++ b/types/equation/equation.json @@ -23,54 +23,54 @@ }, "description": "Represents an equation that can be used in a data model.", "definitions": { - "EqVariable": { - "title": "EqVariable", + "EqParameter": { + "title": "EqParameter", "type": "object", "properties": { "id": { "title": "id", - "description": "Unique identifier for the variable.", + "description": "Unique identifier for the parameter.", "type": "string" }, "name": { "title": "name", - "description": "Name of the variable.", + "description": "Name of the parameter.", "type": "string" }, "symbol": { "title": "symbol", - "description": "Symbol of the variable.", + "description": "Symbol of the parameter.", "type": "string" + }, + "value": { + "title": "value", + "description": "Value of the parameter.", + "type": "number" } }, - "description": "Represents a variable that is used in the equation." + "description": "Represents a parameter that is used in the equation." }, - "EqParameter": { - "title": "EqParameter", + "EqVariable": { + "title": "EqVariable", "type": "object", "properties": { "id": { "title": "id", - "description": "Unique identifier for the parameter.", + "description": "Unique identifier for the variable.", "type": "string" }, "name": { "title": "name", - "description": "Name of the parameter.", + "description": "Name of the variable.", "type": "string" }, "symbol": { "title": "symbol", - "description": "Symbol of the parameter.", + "description": "Symbol of the variable.", "type": "string" - }, - "value": { - "title": "value", - "description": "Value of the parameter.", - "type": "number" } }, - "description": "Represents a parameter that is used in the equation." + "description": "Represents a variable that is used in the equation." } } } \ No newline at end of file diff --git a/types/unit-definition/unit-definition-internal.json b/types/unit-definition/unit-definition-internal.json index 55bf939..5cc0ece 100644 --- a/types/unit-definition/unit-definition-internal.json +++ b/types/unit-definition/unit-definition-internal.json @@ -18,7 +18,8 @@ "xml": { "is_attr": true, "name": "id" - } + }, + "is_enum": false }, { "name": "name", @@ -34,7 +35,8 @@ "xml": { "is_attr": true, "name": "name" - } + }, + "is_enum": false }, { "name": "base_units", @@ -50,7 +52,8 @@ "xml": { "is_attr": false, "name": "base_units" - } + }, + "is_enum": false } ], "docstring": "Represents a unit definition that is based on the SI unit system." @@ -72,7 +75,8 @@ "xml": { "is_attr": true, "name": "kind" - } + }, + "is_enum": true }, { "name": "exponent", @@ -88,7 +92,8 @@ "xml": { "is_attr": true, "name": "exponent" - } + }, + "is_enum": false }, { "name": "multiplier", @@ -104,7 +109,8 @@ "xml": { "is_attr": true, "name": "multiplier" - } + }, + "is_enum": false }, { "name": "scale", @@ -120,7 +126,8 @@ "xml": { "is_attr": true, "name": "scale" - } + }, + "is_enum": false } ], "docstring": "Represents a base unit in the unit definition." From b1678ecf22910144014f36eb60fbdbc54fa0cc6a Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 01:06:10 +0100 Subject: [PATCH 15/17] bump version --- Cargo.lock | 2 +- Cargo.toml | 2 +- pyproject.toml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 036a9b7..e76ce56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -881,7 +881,7 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "mdmodels" -version = "0.1.5" +version = "0.1.6" dependencies = [ "assert_cmd", "clap", diff --git a/Cargo.toml b/Cargo.toml index f8b3825..f7621c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "mdmodels" authors = ["Jan Range "] description = "A tool to generate models, code and schemas from markdown files" -version = "0.1.5" +version = "0.1.6" edition = "2021" license = "MIT" repository = "https://github.com/FAIRChemistry/md-models" diff --git a/pyproject.toml b/pyproject.toml index a895c06..1879530 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "mdmodels_core" -version = "0.1.5" +version = "0.1.6" description = "A tool to generate models, code and schemas from markdown files" requires-python = ">=3.8" classifiers = [ @@ -15,4 +15,4 @@ classifiers = [ [tool.maturin] -features = ["pyo3/extension-module"] \ No newline at end of file +features = ["pyo3/extension-module"] From a8ebda445ef8e6f5fb0c4c6f8ec5c89a7efd6d80 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 01:06:19 +0100 Subject: [PATCH 16/17] add feature flag for cli --- .github/workflows/assets.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/assets.yml b/.github/workflows/assets.yml index 33ed71f..3dcd0ed 100644 --- a/.github/workflows/assets.yml +++ b/.github/workflows/assets.yml @@ -45,7 +45,7 @@ jobs: targets: ${{ matrix.target }} - name: Build - run: cargo build --verbose --release --target ${{ matrix.target }} + run: cargo build --features openai --verbose --release --target ${{ matrix.target }} - name: Build binary shell: bash From 55e9036c3433d72c07d9140526537fb726f420dc Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 12 Dec 2024 01:09:34 +0100 Subject: [PATCH 17/17] fix missing type --- src/json/export.rs | 6 +++--- tests/data/expected_json_schema.json | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/json/export.rs b/src/json/export.rs index dc227f1..0402417 100644 --- a/src/json/export.rs +++ b/src/json/export.rs @@ -336,7 +336,7 @@ impl TryFrom<&Attribute> for schema::Property { let description = (!attr.docstring.is_empty()).then(|| attr.docstring.clone()); let enum_values = if attr.is_enum { Some(Vec::new()) } else { None }; - if attr.dtypes.len() > 1 { + if attr.dtypes.len() > 1 && !attr.is_array { // If there are multiple types, we need to use the AnyOf case dtype = None; } @@ -489,7 +489,7 @@ mod tests { let expected_json = json!({ "title": "test_attribute", - "anyOf": [ + "oneOf": [ {"type": "string"}, {"$ref": "#/$defs/RefType"}, ] @@ -523,7 +523,7 @@ mod tests { "title": "test_attribute", "type": "array", "items": { - "anyOf": [ + "oneOf": [ {"type": "string"}, {"$ref": "#/$defs/RefType"} ] diff --git a/tests/data/expected_json_schema.json b/tests/data/expected_json_schema.json index e657786..b4ecfcf 100644 --- a/tests/data/expected_json_schema.json +++ b/tests/data/expected_json_schema.json @@ -25,6 +25,7 @@ }, "multiple_types_array": { "title": "multiple_types_array", + "type": "array", "items": { "oneOf": [ {