Skip to content

Commit

Permalink
Merge pull request #287 from zmstone/0214-fix-triple-quotes-indentati…
Browse files Browse the repository at this point in the history
…on-parser

fix(hocon_scanner): ignore identation of the closing triple-quote
  • Loading branch information
zmstone authored Feb 14, 2024
2 parents 09979bc + 8c45e88 commit 3912cae
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 22 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,14 @@ HOCON spec for reference: https://lightbend.github.io/config/
* Or add `~` around the string value: `"""~a"~"""` (see below).
- Multiline strings allow indentation (spaces, not tabs).
If `~\n` (or `~\r\n`) are the only characters following the opening triple-quote, then it's a multiline string with indentation:
* The first line `~\n` is ignored;
* The indentation spaces of the following lines are trimed;
* The first line `~\n` is discarded;
* The closing triple-quote can be either `"""` or `~"""` (`~` allows the string to end with `"` without escaping).
* Indentation is allowed but not required for empty lines;
* Indentation level is determined by the least number of leading spaces among the non-empty lines;
* If the closing triple-quote takes the whole line, it's allowed to be indented less than other lines,
but if it's indented more than other lines, the spaces are treated as part of the string.
* Backslashes are treated as escape characters, i.e. should be escaped with another backslash;
* There is no need to escape quotes in multiline strings, but it's allowed;
* The closing triple-quote can be either `"""` or `~"""` (`~` allows the string to end with `"` without escaping).
* There is no need to escape quotes in multiline strings, but it's allowed.

## Schema

Expand Down
57 changes: 46 additions & 11 deletions src/hocon_pp.erl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@

-include("hocon_private.hrl").

%% either '\n' or '\r\n' depending on the `newline` option in the `Opts` map.
-define(NL, newline).
%% '\n' for triple quote string, not used anywhere else.
-define(LF, lf).
%% '\r\n' for triple quote string, not used anywhere else.
-define(CRLF, crlf).
-define(TRIPLE_QUOTE, <<"\"\"\"">>).
-define(INDENT_STEP, 2).

Expand Down Expand Up @@ -179,31 +184,51 @@ gen_triple_quote_str(Str, Opts) ->
maybe_indent(Chars, Opts) ->
case is_multiline(Chars) of
true ->
["~", indent_multiline_str(Chars, indent_inc(Opts)), "~"];
["~", ?NL, indent_multiline_str(Chars, indent_inc(Opts)), "~"];
false ->
Chars
end.
indent_multiline_str(Chars, Opts) ->
Lines = hocon_scanner:split_lines(Chars),
Lines = split_lines(Chars),
indent_str_value_lines(Lines, Opts).
%% Split the lines with '\n' and and remove the trailing '\n' from each line.
%% Keep '\r' as a a part of the line because `\n` will be added back.
split_lines(Chars) ->
split_lines(Chars, [], []).
split_lines([], LastLineR, Lines) ->
LastLine = lists:reverse(LastLineR),
lists:reverse([LastLine | Lines]);
split_lines([$\n | Chars], Line, Lines) ->
split_lines(Chars, [], [lists:reverse(Line) | Lines]);
split_lines([Char | Chars], Line, Lines) ->
split_lines(Chars, [Char | Line], Lines).
real_nl() ->
io_lib:nl().
%% mark each line for indentation with 'indent'
%% except for empty lines in the middle of the string
%% Mark each line for indentation with 'indent'
%% except for empty lines in the middle of the string.
%% Insert '\n', but not real_nl() because '\r' is treated as value when splitted.
indent_str_value_lines([[]], Opts) ->
%% last line being empty, no need to indent
[nl_indent(indent_dec(Opts))];
indent_str_value_lines([LastLine], Opts) ->
%% last line is not empty
[nl_indent(Opts), (bin(LastLine))];
%% last line being empty, indent less for the closing triple-quote
[indent(indent_dec(Opts))];
indent_str_value_lines([[$\r]], Opts) ->
%% last line being empty, indent less for the closing triple-quote
[indent(indent_dec(Opts))];
indent_str_value_lines([[] | Lines], Opts) ->
%% do not indent empty line
[real_nl() | indent_str_value_lines(Lines, Opts)];
[?LF | indent_str_value_lines(Lines, Opts)];
indent_str_value_lines([[$\r] | Lines], Opts) ->
%% do not indent empty line
[?CRLF | indent_str_value_lines(Lines, Opts)];
indent_str_value_lines([LastLine], Opts) ->
%% last line is not empty
[indent(Opts), LastLine];
indent_str_value_lines([Line | Lines], Opts) ->
[nl_indent(Opts), (bin(Line)) | indent_str_value_lines(Lines, Opts)].
[indent(Opts), Line, ?LF | indent_str_value_lines(Lines, Opts)].
gen_list(L, Opts) ->
case is_oneliner(L, Opts) of
Expand Down Expand Up @@ -352,6 +377,12 @@ opts_nl(Opts) ->

render_nl([], LastLine, Lines, _NL) ->
lists:reverse(add_line_r(lists:reverse(LastLine), Lines));
render_nl([?CRLF | Rest], Line0, Lines, NL) ->
Line = lists:reverse([$\n, $\r | Line0]),
render_nl(Rest, [], add_line_r(Line, Lines), NL);
render_nl([?LF | Rest], Line0, Lines, NL) ->
Line = lists:reverse([$\n | Line0]),
render_nl(Rest, [], add_line_r(Line, Lines), NL);
render_nl([?NL | Rest], Line0, Lines, NL) ->
Line = lists:reverse([NL | Line0]),
render_nl(Rest, [], add_line_r(Line, Lines), NL);
Expand All @@ -373,6 +404,10 @@ flatten([B | T]) when is_binary(B) ->
[B | flatten(T)];
flatten([L | T]) when is_list(L) ->
flatten(L ++ T);
flatten([?CRLF | T]) ->
[?CRLF | flatten(T)];
flatten([?LF | T]) ->
[?LF | flatten(T)];
flatten([?NL | T]) ->
[?NL | dedup_nl(flatten(T))];
flatten(B) when is_binary(B) ->
Expand Down
31 changes: 25 additions & 6 deletions src/hocon_scanner.xrl
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@ Rules.
Erlang code.
-export([split_lines/1]).
maybe_include("include", TokenLine) -> {include, TokenLine};
maybe_include(TokenChars, TokenLine) -> {unqstr, TokenLine, TokenChars}.
Expand Down Expand Up @@ -136,7 +134,7 @@ split_lines(Chars) ->
%% ~"""
%% into ["line1\n", "line2\n"]
split_lines([], LastLineR, Lines) ->
%% if the last line ends with '-' drop it
%% if the last line ends with '~' drop it
LastLine = case LastLineR of
[$~ | Rest] ->
lists:reverse(Rest);
Expand All @@ -149,16 +147,35 @@ split_lines([$\n | Chars], Line, Lines) ->
split_lines([Char | Chars], Line, Lines) ->
split_lines(Chars, [Char | Line], Lines).

min_indent(Lines) ->
min_indent([LastLine]) ->
case indent_level(LastLine) of
ignore ->
0;
Indent ->
Indent
end;
min_indent(Lines0) ->
[LastLine | Lines] = lists:reverse(Lines0),
LastLineIndent = indent_level(LastLine),
Indents0 = lists:map(fun indent_level/1, Lines),
case lists:filter(fun erlang:is_integer/1, Indents0) of
MinIndent = case lists:filter(fun erlang:is_integer/1, Indents0) of
[] ->
0;
Indents ->
lists:min(Indents)
end,
%% If last line is all space, use the minimum indent of the preceding lines,
%% because the last line is allowed to indent less than other lines.
case LastLine =/= [] andalso lists:all(fun(I) -> I =:= $\s end, LastLine) of
true ->
MinIndent;
false ->
min(LastLineIndent, MinIndent)
end.

indent_level("") ->
indent_level([]) ->
ignore;
indent_level([$\r]) ->
ignore;
indent_level(Line) ->
indent_level(Line, 0).
Expand All @@ -170,6 +187,8 @@ indent_level(_, Count) ->

trim_indents([], _Indent) ->
[];
trim_indents([$\r], _Indent) ->
[$\r];
trim_indents(Chars, 0) ->
Chars;
trim_indents([$\s | Chars], Indent) when Indent > 0 ->
Expand Down
25 changes: 25 additions & 0 deletions test/hocon_pp_tests.erl
Original file line number Diff line number Diff line change
Expand Up @@ -333,3 +333,28 @@ no_triple_quote_string_when_oneliner_test_() ->
),
?_assertEqual([<<"root {a = \"a\\nb\"}">>], hocon_pp:do(Value, #{newline => <<>>}))
].

crlf_multiline_test_() ->
Value = #{<<"root">> => #{<<"x">> => <<"\r\n\r\na\r\nb\n">>}},
CRLF = <<"\r\n">>,
IndentCRLF = <<" \r\n">>,
Hocon = fun(NewLine) ->
[
<<"root {\r\n">>,
<<" x = \"\"\"~\r\n">>,
NewLine,
NewLine,
<<" a\r\n">>,
%% the last newline is just \n, should not be replaced
<<" b\n">>,
<<" ~\"\"\"\r\n">>,
<<"}\r\n">>
]
end,
Expected = Hocon(CRLF),
Variant = Hocon(IndentCRLF),
[
?_assertEqual(Expected, hocon_pp:do(Value, #{newline => "\r\n"})),
?_assertEqual({ok, Value}, hocon:binary(Expected)),
?_assertEqual({ok, Value}, hocon:binary(Variant))
].
6 changes: 5 additions & 1 deletion test/hocon_tests.erl
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,11 @@ triple_quote_string_test_() ->
%% empty string with closing quote in the next line
?_assertEqual(<<"">>, Parse(<<"~\n">>)),
%% empty string with indented closing quote in the next line
?_assertEqual(<<"">>, Parse(<<"~\n ~">>))
?_assertEqual(<<"">>, Parse(<<"~\n ~">>)),
%% last line is space only, must indent more than other non-space-only lines
?_assertEqual(<<"a\n ">>, Parse(<<"~\n a\n ~">>)),
%% last line is space only, ignored if it indents less than other non-space-only lines
?_assertEqual(<<"a\n">>, Parse(<<"~\n a\n ~">>))
].
obj_inside_array_test_() ->
Expand Down

0 comments on commit 3912cae

Please sign in to comment.