Skip to content

Commit 14ceed0

Browse files
committedSep 6, 2024·
Add support for interpolated strings (f-strings) in the parser
Original compiler PR: kaitai-io/kaitai_struct_compiler#258
1 parent eb30135 commit 14ceed0

File tree

2 files changed

+129
-0
lines changed

2 files changed

+129
-0
lines changed
 

‎src/model/expressions.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ pub enum OwningNode {
2727
/// Boolean constant
2828
Bool(bool),
2929

30+
/// String with embedded expressions (interpolated string, f-string).
31+
///
32+
/// Literal parts represented by [`OwningNode::Str`] node, interpolated parts
33+
/// represented by any other nodes.
34+
InterpolatedStr(Vec<OwningNode>),
35+
3036
/// Name of field of the type in which attribute expression is defined
3137
Attr(FieldName),
3238
/// Built-in variable
@@ -127,6 +133,7 @@ impl OwningNode {
127133
Node::Int(val) => Int(val),
128134
Node::Float(val)=> Float(val),
129135
Node::Bool(val) => Bool(val),
136+
Node::InterpolatedStr(val) => InterpolatedStr(Self::validate_all(val)),
130137

131138
//TODO: Name already contains only valid symbols, but need to check that it is really exists
132139
Node::Attr(val) => Attr(FieldName::valid(val)),

‎src/parser/expressions.rs

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ pub enum Node<'input> {
2626
/// Boolean constant
2727
Bool(bool),
2828

29+
/// String with embedded expressions (interpolated string, f-string).
30+
///
31+
/// Literal parts represented by [`Node::Str`] node, interpolated parts
32+
/// represented by any other nodes.
33+
InterpolatedStr(Vec<Node<'input>>),
34+
2935
/// Name of field of the type in which attribute expression is defined
3036
Attr(&'input str),
3137
/// Built-in variable
@@ -486,6 +492,18 @@ peg::parser! {
486492
rule quoted_oct() -> char = s:$(oct()+) {? to_char(s, 8) };
487493
rule quoted_hex() -> char = ['u'] s:$(hex()*<4>) {? to_char(s, 16) };
488494

495+
/// String which interpolates expressions inside `{}`.
496+
/// Defined as string literal prefixed with `f` character: `f"..."`
497+
rule fstring() -> Vec<Node<'input>> = "f\"" e:fstring_element()* "\"" {e};
498+
rule fstring_element() -> Node<'input>
499+
// Interpolated expression inside of f-string (inside `{}`)
500+
= "{" _ e:expr() _ "}" {e}
501+
// Literal part of interpolated string
502+
/ v:(fstring_ch() / escaped())+ { Node::Str(String::from_iter(v.into_iter())) }
503+
;
504+
/// Single non-escaped character in f-string
505+
rule fstring_ch() -> char = [^ '"' | '\\' | '{'];
506+
489507
rule integer() -> BigInt
490508
= n:$(['1'..='9'] ['0'..='9' | '_']*) {? to_int(n, 10) }
491509
/ "0" ['b' | 'B'] n:$(bin()+) {? to_int(n, 2) }
@@ -608,6 +626,7 @@ peg::parser! {
608626
/ "[" _ l:list()? _ "]" { Node::List(l.unwrap_or_default()) }
609627
/ "sizeof" _ "<" _ t:type_ref() _ ">" { Node::SizeOf { type_: t, bit: false } }
610628
/ "bitsizeof" _ "<" _ t:type_ref() _ ">" { Node::SizeOf { type_: t, bit: true } }
629+
/ e:fstring() { Node::InterpolatedStr(e) }
611630
/ v:(s:string() _ {s})+ { Node::Str(String::from_iter(v.into_iter())) }
612631
/ n:special_name() !name_part() { n }
613632
/ e:enum_name() { e }
@@ -1009,6 +1028,109 @@ mod parse {
10091028
}
10101029
}
10111030

1031+
mod f_string {
1032+
use super::*;
1033+
use pretty_assertions::assert_eq;
1034+
1035+
#[test]
1036+
fn empty() {
1037+
assert_eq!(parse_single(r#" f"" "#), Ok(InterpolatedStr(vec![])));
1038+
}
1039+
1040+
#[test]
1041+
fn literal() {
1042+
assert_eq!(parse_single(r#" f"\n\r\t 1\n\r\t 2\n\r\t " "#), Ok(InterpolatedStr(vec![
1043+
Str("\n\r\t 1\n\r\t 2\n\r\t ".into()),
1044+
])));
1045+
}
1046+
1047+
#[test]
1048+
fn literal_then_expr() {
1049+
assert_eq!(parse_single(r#" f"foo={123}" "#), Ok(InterpolatedStr(vec![
1050+
Str("foo=".into()),
1051+
Int(123.into()),
1052+
])));
1053+
}
1054+
1055+
#[test]
1056+
fn expr_then_literal() {
1057+
assert_eq!(parse_single(r#" f"{123}=abc" "#), Ok(InterpolatedStr(vec![
1058+
Int(123.into()),
1059+
Str("=abc".into()),
1060+
])));
1061+
}
1062+
1063+
#[test]
1064+
fn expr_then_expr() {
1065+
assert_eq!(parse_single(r#" f"{123}{abc}" "#), Ok(InterpolatedStr(vec![
1066+
Int(123.into()),
1067+
Attr("abc".into()),
1068+
])));
1069+
}
1070+
1071+
mod interpolated {
1072+
use super::*;
1073+
use pretty_assertions::assert_eq;
1074+
1075+
#[test]
1076+
fn int_expr() {
1077+
assert_eq!(parse_single(r#" f"{123}" "#), Ok(InterpolatedStr(vec![
1078+
Int(123.into()),
1079+
])));
1080+
}
1081+
1082+
#[test]
1083+
fn str_expr() {
1084+
assert_eq!(parse_single(r#" f"abc{"def"}ghi" "#), Ok(InterpolatedStr(vec![
1085+
Str("abc".into()),
1086+
Str("def".into()),
1087+
Str("ghi".into()),
1088+
])));
1089+
}
1090+
1091+
#[test]
1092+
fn f_str_literal_expr() {
1093+
assert_eq!(parse_single(r#" f" { f"def" } " "#), Ok(InterpolatedStr(vec![
1094+
Str(" ".into()),
1095+
// f"def"
1096+
InterpolatedStr(vec![Str("def".into())]),
1097+
Str(" ".into()),
1098+
])));
1099+
}
1100+
1101+
#[test]
1102+
fn f_str_expr_expr() {
1103+
assert_eq!(parse_single(r#" f" { f"{def}" } " "#), Ok(InterpolatedStr(vec![
1104+
Str(" ".into()),
1105+
// f"abc{def}"
1106+
InterpolatedStr(vec![Attr("def".into())]),
1107+
Str(" ".into()),
1108+
])));
1109+
}
1110+
}
1111+
1112+
#[test]
1113+
fn double_quote_in_literal() {
1114+
assert_eq!(parse_single(r#" f"this \" is a quote" "#), Ok(InterpolatedStr(vec![
1115+
Str("this \" is a quote".into())
1116+
])));
1117+
}
1118+
1119+
#[test]
1120+
fn starts_with_quote() {
1121+
assert_eq!(parse_single(r#" f"\" is a quote" "#), Ok(InterpolatedStr(vec![
1122+
Str("\" is a quote".into())
1123+
])));
1124+
}
1125+
1126+
#[test]
1127+
fn starts_with_space_quote() {
1128+
assert_eq!(parse_single(r#" f" \" is a quote" "#), Ok(InterpolatedStr(vec![
1129+
Str(" \" is a quote".into())
1130+
])));
1131+
}
1132+
}
1133+
10121134
mod expr {
10131135
use super::*;
10141136
use pretty_assertions::assert_eq;

0 commit comments

Comments
 (0)
Please sign in to comment.