Compare commits

...

9 commits

Author SHA1 Message Date
Jade Lovelace
dce253ee01 burn it all
Change-Id: I0db6eee7d6aae3e02ea0155c04a55a7142e456c3
2024-03-13 22:03:42 -07:00
José Luis Lafuente
2a95127732 Add clang format configuration
(cherry picked from commit 53fdcbca509b6c5dacaea3d3c465d86e49b0dd74)
Change-Id: I5446fd45de2bf644e34112f719afb3318a440b30
2024-03-13 15:46:55 -07:00
Jade Lovelace
c9e77f0595 Allow dlopen of plugins to fail
It happens with some frequency that plugins that might be unimportant to
the evaluation at hand mismatch with the nix version, leading to
spurious load failures. Let's make these non fatal.

Change-Id: Iba10e951d171725ccf1a121bcd9be1e1d6ad69eb
2024-03-13 15:46:55 -07:00
Jade Lovelace
063d436c56 Test that :st does ... something
Change-Id: I97c00b5eb1288f68d8c2b484436cc185d040b8b2
2024-03-13 15:46:55 -07:00
Jade Lovelace
d2e6fec7ce repl_characterization: Also verify the stack trace exists
Change-Id: I8b2d8211a24011fae1586a1182d7d0772a039cd7
2024-03-13 15:46:55 -07:00
Jade Lovelace
212654d68f repl_characterization: eat newlines after commands and source-dir paths
This is because they are unrepresentable in the source files with
commentary but not in the output, so we should just eat them in
normalization. It's ok.

Change-Id: I2cb7e8b3fc7b00874885bb287cbaa200b41cb16b
2024-03-13 15:46:55 -07:00
Jade Lovelace
9c3a1babe6 Add regression tests for #9917, #9918
Change-Id: Ib0591e1499c5dba5e5a83ee75a899c9d16986827
2024-03-13 15:46:55 -07:00
Jade Lovelace
b85085157a Implement a repl characterization test system
This allows for automating using the repl without needing a PTY, with
very easy to write test files.

Change-Id: Ia8d7854edd91f93477638942cb6fc261354e6035
2024-03-13 15:46:55 -07:00
Jade Lovelace
ed95b02215 Implement a parser for a literate testing system for the repl
This parser can be reused for other purposes. It's inspired by
https://bitheap.org/cram/

Although eelco's impostor exists https://github.com/mobusoperandi/eelco,
it is not very nice to depend on out of tree testing frameworks with no
way to customize them.

Change-Id: Ifca50177e09730182baf0ebf829c3505bbb0274a
2024-03-13 15:46:55 -07:00
30 changed files with 1221 additions and 6 deletions

30
.clang-format Normal file
View file

@ -0,0 +1,30 @@
BasedOnStyle: LLVM
IndentWidth: 4
BreakBeforeBraces: Custom
BraceWrapping:
AfterStruct: true
AfterClass: true
AfterFunction: true
AfterUnion: true
SplitEmptyRecord: false
PointerAlignment: Middle
FixNamespaceComments: false
SortIncludes: Never
#IndentPPDirectives: BeforeHash
SpaceAfterCStyleCast: true
SpaceAfterTemplateKeyword: false
AccessModifierOffset: -4
AlignAfterOpenBracket: AlwaysBreak
AlignEscapedNewlines: DontAlign
ColumnLimit: 120
BreakStringLiterals: false
BitFieldColonSpacing: None
AllowShortFunctionsOnASingleLine: Empty
AlwaysBreakTemplateDeclarations: Yes
BinPackParameters: false
BreakConstructorInitializers: BeforeComma
EmptyLineAfterAccessModifier: Leave # change to always/never later?
EmptyLineBeforeAccessModifier: Leave
#PackConstructorInitializers: BinPack
BreakBeforeBinaryOperators: NonAssignment
AlwaysBreakBeforeMultilineStrings: true

View file

@ -41,6 +41,7 @@ makefiles += \
tests/functional/ca/local.mk \
tests/functional/dyn-drv/local.mk \
tests/functional/test-libstoreconsumer/local.mk \
tests/functional/repl_characterization/local.mk \
tests/functional/plugins/local.mk
else
makefiles += \

View file

@ -342,7 +342,7 @@ void initPlugins()
void *handle =
dlopen(file.c_str(), RTLD_LAZY | RTLD_LOCAL);
if (!handle)
throw Error("could not dynamically open plugin file '%s': %s", file, dlerror());
warn("could not dynamically open plugin file '%s': %s", file, dlerror());
}
}

View file

@ -131,7 +131,7 @@ ifeq ($(HAVE_LIBCPUID), 1)
endif
ifeq ($(ENABLE_BUILD), yes)
nix_tests += test-libstoreconsumer.sh
nix_tests += test-libstoreconsumer.sh test-repl-characterization.sh
ifeq ($(BUILD_SHARED_LIBS), 1)
nix_tests += plugins.sh
@ -141,7 +141,10 @@ endif
$(d)/test-libstoreconsumer.sh.test $(d)/test-libstoreconsumer.sh.test-debug: \
$(buildprefix)$(d)/test-libstoreconsumer/test-libstoreconsumer
$(d)/plugins.sh.test $(d)/plugins.sh.test-debug: \
$(buildprefix)$(d)/plugins/libplugintest.$(SO_EXT)
$(buildprefix)$(d)/plugins/libplugintest.$(SO_EXT) \
$(buildprefix)$(d)/plugins/libplugintestfail.$(SO_EXT)
$(d)/test-repl-characterization.sh.test $(d)/test-repl-characterization.sh.test-debug: \
$(buildprefix)$(d)/repl_characterization/test-repl-characterization
install-tests += $(foreach x, $(nix_tests), $(d)/$(x))

View file

@ -4,6 +4,20 @@ if [[ $BUILD_SHARED_LIBS != 1 ]]; then
skipTest "Plugins are not supported"
fi
res=$(nix --option setting-set true --option plugin-files $PWD/plugins/libplugintest* eval --expr builtins.anotherNull)
res=$(nix --option setting-set true --option plugin-files $PWD/plugins/libplugintest.* eval --expr builtins.anotherNull)
[ "$res"x = "nullx" ]
# Plugin load failing due to missing symbols
res=$(nix --option plugin-files $PWD/plugins/libplugintestfail.* eval --expr '1234 + 5' 2>&1)
# We expect this to succeed evaluating
echo "$res" | grep 1239 >/dev/null
# On Linux, we expect this to print some failure of dlopen.
# Only on Linux do we expect for sure that -z now is set on the .so file, so it
# will definitely fail to load instead of lazy loading (and thus not hitting
# the missing symbol).
# FIXME(jade): does there exist an equivalent of -z now on macOS that eluded us
# in search?
if [[ "$(uname -s)" == Linux ]]; then
echo "$res" | grep "could not dynamically open plugin file" >/dev/null
fi

View file

@ -1,4 +1,4 @@
libraries += libplugintest
libraries += libplugintest libplugintestfail
libplugintest_DIR := $(d)
@ -9,3 +9,19 @@ libplugintest_ALLOW_UNDEFINED := 1
libplugintest_EXCLUDE_FROM_LIBRARY_LIST := 1
libplugintest_CXXFLAGS := -I src/libutil -I src/libstore -I src/libexpr -I src/libfetchers
libplugintestfail_DIR := $(d)
libplugintestfail_SOURCES := $(d)/plugintestfail.cc
libplugintestfail_ALLOW_UNDEFINED := 1
libplugintestfail_EXCLUDE_FROM_LIBRARY_LIST := 1
libplugintestfail_CXXFLAGS := -I src/libutil -I src/libstore -I src/libexpr -I src/libfetchers -DMISSING_REFERENCE
# Make sure that the linker strictly evaluates all symbols on .so load on Linux
# so it will definitely fail to load as expected.
ifdef HOST_LINUX
libplugintestfail_LDFLAGS += -z now
endif

View file

@ -1,8 +1,15 @@
#include "config.hh"
#include "primops.hh"
#include <stdlib.h>
using namespace nix;
#ifdef MISSING_REFERENCE
extern void meow();
#else
#define meow() {}
#endif
struct MySettings : Config
{
Setting<bool> settingSet{this, false, "setting-set",
@ -13,6 +20,11 @@ MySettings mySettings;
static GlobalConfig::Register rs(&mySettings);
[[gnu::used, gnu::unused, gnu::retain]]
static void maybeRequireMeowForDlopen() {
meow();
}
static void prim_anotherNull (EvalState & state, const PosIdx pos, Value ** args, Value & v)
{
if (mySettings.settingSet)

View file

@ -0,0 +1 @@
plugintest.cc

View file

@ -0,0 +1 @@
test-repl-characterization

View file

@ -0,0 +1,17 @@
Commentary "meow meow meow"
Command "command"
Output "output output one"
Output ""
Output ""
Output "output output two"
Commentary "meow meow"
Command "command two"
Output "output output output"
Commentary "commentary"
Output "output output output"
Output ""
Commentary "the blank below should be chomped"
Command "command three"
Commentary ""
Output "meow output"
Output ""

View file

@ -0,0 +1,17 @@
meow meow meow
nix-repl> command
output output one
output output two
meow meow
nix-repl> command two
output output output
commentary
output output output
the blank below should be chomped
nix-repl> command three
meow output

View file

@ -0,0 +1,60 @@
nix-repl> 1 + 1
2
nix-repl> :doc builtins.head
Synopsis: builtins.head list
Return the first element of a list; abort evaluation if
the argument isnt a list or is an empty list. You can
test whether a list is empty by comparing it with [].
nix-repl> f = a: "" + a
Expect the trace to not contain any traceback:
nix-repl> f 2
error:
… while evaluating a path segment
at «string»:1:10:
1| a: "" + a
| ^
error: cannot coerce an integer to a string: 2
nix-repl> :te
showing error traces
Expect the trace to have traceback:
nix-repl> f 2
error:
… from call site
at «string»:1:1:
1| f 2
| ^
… while calling anonymous lambda
at «string»:1:2:
1| a: "" + a
| ^
… while evaluating a path segment
at «string»:1:10:
1| a: "" + a
| ^
error: cannot coerce an integer to a string: 2
Turning it off should also work:
nix-repl> :te
not showing error traces
nix-repl> f 2
error:
… while evaluating a path segment
at «string»:1:10:
1| a: "" + a
| ^
error: cannot coerce an integer to a string: 2

View file

@ -0,0 +1,10 @@
Command "command"
Output "output output one"
Output ""
Output ""
Output "output output two"
Command "command two"
Output "output output output"
Output "output output output"
Command "command three"
Output "meow output"

View file

@ -0,0 +1,9 @@
let
a = builtins.trace "before inner break" (
builtins.break { msg = "hello"; }
);
b = builtins.trace "before outer break" (
builtins.break a
);
in
b

View file

@ -0,0 +1,34 @@
https://github.com/NixOS/nix/pull/9917 (Enter debugger more reliably in let expressions and function calls)
This test ensures that continues don't skip opportunities to enter the debugger.
trace: before outer break
info: breakpoint reached
nix-repl> :c
trace: before inner break
info: breakpoint reached
nix-repl> :bt
0: error: breakpoint reached
«none»:0
1: while calling a function
TEST_DATA/regression_9917.nix:3:5
2| a = builtins.trace "before inner break" (
3| builtins.break { msg = "hello"; }
| ^
4| );
2: while calling a function
TEST_DATA/regression_9917.nix:2:7
1| let
2| a = builtins.trace "before inner break" (
| ^
3| builtins.break { msg = "hello"; }
nix-repl> :c
nix-repl> msg
"hello"

View file

@ -0,0 +1,5 @@
let
r = [];
x = builtins.throw r;
in
x

View file

@ -0,0 +1,16 @@
error:
… while evaluating the error message passed to builtin.throw
error: cannot coerce a list to a string: [ ]
We expect to be able to see locals like r in the debugger:
nix-repl> r
[ ]
nix-repl> :env
Env level 0
static: x r
Env level 1
builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation

View file

@ -0,0 +1,9 @@
let
a = builtins.trace "before inner break" (
let meow' = 3; in builtins.break { msg = "hello"; }
);
b = builtins.trace "before outer break" (
let meow = 2; in builtins.break a
);
in
b

View file

@ -0,0 +1,74 @@
trace: before outer break
info: breakpoint reached
Here we are in the outer break and the let of "meow". st should show meow there
as it is in scope.
nix-repl> :st
0: error: breakpoint reached
«none»:0
Env level 0
static: meow
Env level 1
static: a b
Env level 2
builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
nix-repl> meow
2
If we :st past the frame in the backtrace with the meow in it, the meow should not be there.
nix-repl> :st 3
3: while calling a function
TEST_DATA/stack_vars.nix:5:7
4| );
5| b = builtins.trace "before outer break" (
| ^
6| let meow = 2; in builtins.break a
Env level 0
static: a b
Env level 1
builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
nix-repl> :c
trace: before inner break
info: breakpoint reached
nix-repl> :st
0: error: breakpoint reached
«none»:0
Env level 0
static: meow'
Env level 1
static: a b
Env level 2
builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
nix-repl> meow'
3
nix-repl> :st 3
3: while calling a function
TEST_DATA/stack_vars.nix:2:7
1| let
2| a = builtins.trace "before inner break" (
| ^
3| let meow' = 3; in builtins.break { msg = "hello"; }
Env level 0
static: a b
Env level 1
builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation

View file

@ -0,0 +1,15 @@
programs += test-repl-characterization
test-repl-characterization_DIR := $(d)
# do not install
test-repl-characterization_INSTALL_DIR :=
test-repl-characterization_SOURCES := \
$(wildcard $(d)/*.cc) \
test-repl-characterization_CXXFLAGS += -I src/libutil -I tests/unit/libutil-support -DNIX_BIN_DIR="\"$(bindir)\""
test-repl-characterization_LIBS = libutil libutil-test-support
test-repl-characterization_LDFLAGS = $(THREAD_LDFLAGS) $(SODIUM_LIBS) $(EDITLINE_LIBS) $(BOOST_LDFLAGS) $(LOWDOWN_LIBS) $(GTEST_LIBS)

View file

@ -0,0 +1,129 @@
#include <filesystem>
#include <gtest/gtest.h>
#include <string>
#include <string_view>
#include <optional>
#include <unistd.h>
#include <boost/algorithm/string/replace.hpp>
#include "test-session.hh"
#include "util.hh"
#include "tests/characterization.hh"
#include "tests/cli-literate-parser.hh"
#include "tests/terminal-code-eater.hh"
using namespace std::string_literals;
namespace nix {
static constexpr const char * REPL_PROMPT = "nix-repl> ";
// ASCII ENQ character
static constexpr const char * AUTOMATION_PROMPT = "\x05";
static std::string_view trimOutLog(std::string_view outLog)
{
const std::string trailer = "\n"s + AUTOMATION_PROMPT;
if (outLog.ends_with(trailer)) {
outLog.remove_suffix(trailer.length());
}
return outLog;
}
class ReplSessionTest : public CharacterizationTest
{
Path unitTestData = getUnitTestData();
public:
Path goldenMaster(std::string_view testStem) const override
{
return unitTestData + "/" + testStem;
}
void runReplTest(std::string_view const & content, std::vector<std::string> extraArgs = {}) const
{
auto syntax = CLILiterateParser::parse(REPL_PROMPT, content);
Strings args{"--quiet", "repl", "--quiet", "--extra-experimental-features", "repl-automation"};
args.insert(args.end(), extraArgs.begin(), extraArgs.end());
auto nixBin = canonPath(getEnvNonEmpty("NIX_BIN_DIR").value_or(NIX_BIN_DIR));
// TODO: why the fuck does this need two --quiets
auto process = RunningProcess::start(nixBin + "/nix", args);
auto session = TestSession{AUTOMATION_PROMPT, std::move(process)};
for (auto & bit : syntax) {
if (bit.kind != CLILiterateParser::NodeKind::COMMAND) {
continue;
}
if (!session.waitForPrompt()) {
ASSERT_TRUE(false);
}
session.runCommand(bit.text);
}
if (!session.waitForPrompt()) {
ASSERT_TRUE(false);
}
session.close();
auto replacedOutLog = boost::algorithm::replace_all_copy(session.outLog, unitTestData, "TEST_DATA");
auto cleanedOutLog = trimOutLog(replacedOutLog);
auto parsedOutLog = CLILiterateParser::parse(AUTOMATION_PROMPT, cleanedOutLog, 0);
CLILiterateParser::tidyOutputForComparison(parsedOutLog);
CLILiterateParser::tidyOutputForComparison(syntax);
ASSERT_EQ(parsedOutLog, syntax);
}
};
TEST_F(ReplSessionTest, parses)
{
writeTest("basic.ast", [this]() {
const std::string content = readFile(goldenMaster("basic.test"));
auto parser = CLILiterateParser{REPL_PROMPT};
parser.feed(content);
std::ostringstream out{};
for (auto & bit : parser.syntax()) {
out << bit.print() << "\n";
}
return out.str();
});
writeTest("basic_tidied.ast", [this]() {
const std::string content = readFile(goldenMaster("basic.test"));
auto syntax = CLILiterateParser::parse(REPL_PROMPT, content);
CLILiterateParser::tidyOutputForComparison(syntax);
std::ostringstream out{};
for (auto & bit : syntax) {
out << bit.print() << "\n";
}
return out.str();
});
}
TEST_F(ReplSessionTest, repl_basic)
{
readTest("basic_repl.test", [this](std::string input) { runReplTest(input); });
}
#define DEBUGGER_TEST(name) \
TEST_F(ReplSessionTest, name) \
{ \
readTest(#name ".test", [this](std::string input) { \
runReplTest(input, {"--debugger", "-f", goldenMaster(#name ".nix")}); \
}); \
}
DEBUGGER_TEST(regression_9918);
DEBUGGER_TEST(regression_9917);
DEBUGGER_TEST(stack_vars);
};

View file

@ -0,0 +1,151 @@
#include <iostream>
#include <unistd.h>
#include "test-session.hh"
#include "util.hh"
#include "tests/debug-char.hh"
namespace nix {
static constexpr const bool DEBUG_REPL_PARSER = false;
RunningProcess RunningProcess::start(std::string executable, Strings args)
{
args.push_front(executable);
Pipe procStdin{};
Pipe procStdout{};
procStdin.create();
procStdout.create();
// This is separate from runProgram2 because we have different IO requirements
pid_t pid = startProcess([&]() {
if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1)
throw SysError("dupping stdout");
if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1)
throw SysError("dupping stdin");
procStdin.writeSide.close();
procStdout.readSide.close();
if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1)
throw SysError("dupping stderr");
execve(executable.c_str(), stringsToCharPtrs(args).data(), environ);
throw SysError("exec did not happen");
});
procStdout.writeSide.close();
procStdin.readSide.close();
return RunningProcess{
.pid = pid,
.procStdin = std::move(procStdin),
.procStdout = std::move(procStdout),
};
}
[[gnu::unused]]
std::ostream & operator<<(std::ostream & os, ReplOutputParser::State s)
{
switch (s) {
case ReplOutputParser::State::Prompt:
os << "prompt";
break;
case ReplOutputParser::State::Context:
os << "context";
break;
}
return os;
}
void ReplOutputParser::transition(State new_state, char responsible_char, bool wasPrompt)
{
if constexpr (DEBUG_REPL_PARSER) {
std::cerr << "transition " << new_state << " for " << DebugChar{responsible_char}
<< (wasPrompt ? " [prompt]" : "") << "\n";
}
state = new_state;
pos_in_prompt = 0;
}
bool ReplOutputParser::feed(char c)
{
if (c == '\n') {
transition(State::Prompt, c);
return false;
}
switch (state) {
case State::Context:
break;
case State::Prompt:
if (pos_in_prompt == prompt.length() - 1 && prompt[pos_in_prompt] == c) {
transition(State::Context, c, true);
return true;
}
if (pos_in_prompt >= prompt.length() - 1 || prompt[pos_in_prompt] != c) {
transition(State::Context, c);
break;
}
pos_in_prompt++;
break;
}
return false;
}
/** Waits for the prompt and then returns if a prompt was found */
bool TestSession::waitForPrompt()
{
std::vector<char> buf(1024);
for (;;) {
ssize_t res = read(proc.procStdout.readSide.get(), buf.data(), buf.size());
if (res < 0) {
throw SysError("read");
}
if (res == 0) {
return false;
}
bool foundPrompt = false;
for (ssize_t i = 0; i < res; ++i) {
// foundPrompt = foundPrompt || outputParser.feed(buf[i]);
bool wasEaten = true;
eater.feed(buf[i], [&](char c) {
wasEaten = false;
foundPrompt = outputParser.feed(buf[i]) || foundPrompt;
outLog.push_back(c);
});
if constexpr (DEBUG_REPL_PARSER) {
std::cerr << "raw " << DebugChar{buf[i]} << (wasEaten ? " [eaten]" : "") << "\n";
}
}
if (foundPrompt) {
return true;
}
}
}
void TestSession::close()
{
proc.procStdin.close();
proc.procStdout.close();
}
void TestSession::runCommand(std::string command)
{
if constexpr (DEBUG_REPL_PARSER)
std::cerr << "runCommand " << command << "\n";
command += "\n";
// We have to feed a newline into the output parser, since Nix might not
// give us a newline before a prompt in all cases (it might clear line
// first, e.g.)
outputParser.feed('\n');
// Echo is disabled, so we have to make our own
outLog.append(command);
writeFull(proc.procStdin.writeSide.get(), command, false);
}
};

View file

@ -0,0 +1,69 @@
#pragma once
///@file
#include <sched.h>
#include <string>
#include "util.hh"
#include "tests/terminal-code-eater.hh"
namespace nix {
struct RunningProcess
{
pid_t pid;
Pipe procStdin;
Pipe procStdout;
static RunningProcess start(std::string executable, Strings args);
};
/** DFA that catches repl prompts */
class ReplOutputParser
{
public:
ReplOutputParser(std::string prompt)
: prompt(prompt)
{
assert(!prompt.empty());
}
/** Feeds in a character and returns whether this is an open prompt */
bool feed(char c);
enum class State {
Prompt,
Context,
};
private:
State state = State::Prompt;
size_t pos_in_prompt = 0;
std::string const prompt;
void transition(State state, char responsible_char, bool wasPrompt = false);
};
struct TestSession
{
RunningProcess proc;
ReplOutputParser outputParser;
TerminalCodeEater eater;
std::string outLog;
std::string prompt;
TestSession(std::string prompt, RunningProcess && proc)
: proc(std::move(proc))
, outputParser(prompt)
, eater({})
, outLog({})
, prompt(prompt)
{
}
bool waitForPrompt();
void runCommand(std::string command);
void close();
};
};

View file

@ -0,0 +1,3 @@
source common.sh
_NIX_TEST_UNIT_DATA=$(pwd)/repl_characterization/data ./repl_characterization/test-repl-characterization

View file

@ -10,6 +10,7 @@ libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc)
libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES)
libutil-test-support_LIBS = libutil
# libexpr so we can steal their string printer from print.cc
libutil-test-support_LIBS = libutil libexpr
libutil-test-support_LDFLAGS := -pthread -lrapidcheck

View file

@ -0,0 +1,246 @@
#include "cli-literate-parser.hh"
#include "libexpr/print.hh"
#include "debug-char.hh"
#include "types.hh"
#include "util.hh"
#include <iostream>
#include <memory>
#include <boost/algorithm/string/trim.hpp>
using namespace std::string_literals;
namespace nix {
static constexpr const bool DEBUG_PARSER = false;
constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char *
{
return std::visit(
overloaded{// clang-format off
[](Indent const&) -> const char * { return "indent"; },
[](Commentary const&) -> const char * { return "indent"; },
[](Prompt const&) -> const char * { return "prompt"; },
[](Command const&) -> const char * { return "command"; },
[](OutputLine const&) -> const char * { return "output_line"; }},
// clang-format on
s);
}
auto CLILiterateParser::Node::print() const -> std::string
{
std::ostringstream s{};
switch (kind) {
case NodeKind::COMMENTARY:
s << "Commentary ";
break;
case NodeKind::COMMAND:
s << "Command ";
break;
case NodeKind::OUTPUT:
s << "Output ";
break;
}
printLiteralString(s, this->text);
return s.str();
}
void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os)
{
for (auto & node : nodes) {
*os << node.print() << "\\n";
}
}
auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector<Node>
{
CLILiterateParser p{std::move(prompt), indent};
p.feed(input);
return std::move(p).intoSyntax();
}
auto CLILiterateParser::intoSyntax() && -> std::vector<Node>
{
return std::move(this->syntax_);
}
CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent)
: state_(indent == 0 ? State(Prompt{}) : State(Indent{}))
, prompt_(prompt)
, indent_(indent)
, lastWasOutput_(false)
, syntax_{}
{
assert(!prompt.empty());
}
void CLILiterateParser::feed(char c)
{
if constexpr (DEBUG_PARSER) {
std::cout << stateDebug(state_) << " " << DebugChar{c} << "\n";
}
if (c == '\n') {
onNewline();
return;
}
std::visit(
overloaded{
[&](Indent & s) {
if (c == ' ') {
if (++s.pos >= indent_) {
transition(Prompt{});
}
} else {
transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}});
}
},
[&](Prompt & s) {
if (s.pos >= prompt_.length()) {
transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}});
return;
} else if (c == prompt_[s.pos]) {
// good prompt character
++s.pos;
} else {
// didn't match the prompt, so it must have actually been output.
s.lineAccumulator.push_back(c);
transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}});
return;
}
s.lineAccumulator.push_back(c);
},
[&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }},
state_);
}
void CLILiterateParser::onNewline()
{
State lastState = std::move(state_);
bool newLastWasOutput = false;
syntax_.push_back(std::visit(
overloaded{
[&](Indent & s) {
// XXX: technically this eats trailing spaces
// a newline following output is considered part of that output
if (lastWasOutput_) {
newLastWasOutput = true;
return Node::mkOutput("");
}
return Node::mkCommentary("");
},
[&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); },
[&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); },
[&](OutputLine & s) {
newLastWasOutput = true;
return Node::mkOutput(std::move(s.lineAccumulator));
},
[&](Prompt & s) {
// INDENT followed by newline is also considered a blank output line
return Node::mkOutput(std::move(s.lineAccumulator));
}},
lastState));
transition(Indent{});
lastWasOutput_ = newLastWasOutput;
}
void CLILiterateParser::feed(std::string_view s)
{
for (char ch : s) {
feed(ch);
}
}
void CLILiterateParser::transition(State new_state)
{
// When we expect INDENT and we are parsing without indents, commentary
// cannot exist, so we want to transition directly into PROMPT before
// resuming normal processing.
if (Indent * i = std::get_if<Indent>(&new_state); i != nullptr && indent_ == 0) {
new_state = Prompt{AccumulatingState{}, i->pos};
}
state_ = new_state;
}
auto CLILiterateParser::syntax() const -> std::vector<Node> const &
{
return syntax_;
}
auto CLILiterateParser::unparse(const std::string & prompt, const std::vector<Node> & syntax, size_t indent)
-> std::string
{
std::string indent_str(indent, ' ');
std::ostringstream out{};
for (auto & node : syntax) {
switch (node.kind) {
case NodeKind::COMMENTARY:
// TODO: should not ignore commentary
break;
case NodeKind::COMMAND:
out << indent_str << prompt << node.text << "\n";
break;
case NodeKind::OUTPUT:
out << indent_str << node.text << "\n";
break;
}
}
return out.str();
}
void CLILiterateParser::tidyOutputForComparison(std::vector<Node> & syntax)
{
std::vector<Node> newSyntax{};
// Eat trailing newlines, so assume that the very end was actually a command
bool lastWasCommand = true;
bool newLastWasCommand = true;
auto v = std::ranges::reverse_view(syntax);
for (auto it = v.begin(); it != v.end(); ++it) {
Node item = *it;
lastWasCommand = newLastWasCommand;
// chomp commentary
if (item.kind == NodeKind::COMMENTARY) {
continue;
}
if (item.kind == NodeKind::COMMAND) {
newLastWasCommand = true;
if (item.text == "") {
// chomp empty commands
continue;
}
}
if (item.kind == NodeKind::OUTPUT) {
// TODO: horrible
bool nextIsCommand = (it + 1 == v.end()) ? false : (it + 1)->kind == NodeKind::COMMAND;
std::string trimmedText = boost::algorithm::trim_right_copy(item.text);
if ((lastWasCommand || nextIsCommand) && trimmedText == "") {
// chomp empty text above or directly below commands
continue;
}
// real output, stop chomping
newLastWasCommand = false;
item = Node::mkOutput(std::move(trimmedText));
}
newSyntax.push_back(std::move(item));
}
std::reverse(newSyntax.begin(), newSyntax.end());
syntax = std::move(newSyntax);
}
};

View file

@ -0,0 +1,134 @@
#pragma once
///@file
#include <compare>
#include <memory>
#include <sstream>
#include <variant>
#include <vector>
#include <string>
namespace nix {
/*
* A DFA parser for literate test cases for CLIs.
*
* FIXME: implement merging of these, so you can auto update cases that have
* comments.
*
* Format:
* COMMENTARY
* INDENT PROMPT COMMAND
* INDENT OUTPUT
*
* e.g.
* commentary commentary commentary
* nix-repl> :t 1
* an integer
*
* Yields:
* Commentary "commentary commentary commentary"
* Command ":t 1"
* Output "an integer"
*
* Note: one Output line is generated for each line of the sources, because
* this is effectively necessary to be able to align them in the future to
* auto-update tests.
*/
class CLILiterateParser
{
public:
enum class NodeKind {
COMMENTARY,
COMMAND,
OUTPUT,
};
struct Node
{
NodeKind kind;
std::string text;
std::strong_ordering operator<=>(Node const &) const = default;
static Node mkCommentary(std::string text)
{
return Node{.kind = NodeKind::COMMENTARY, .text = text};
}
static Node mkCommand(std::string text)
{
return Node{.kind = NodeKind::COMMAND, .text = text};
}
static Node mkOutput(std::string text)
{
return Node{.kind = NodeKind::OUTPUT, .text = text};
}
auto print() const -> std::string;
};
CLILiterateParser(std::string prompt, size_t indent = 2);
auto syntax() const -> std::vector<Node> const &;
/** Feeds a character into the parser */
void feed(char c);
/** Feeds a string into the parser */
void feed(std::string_view s);
/** Parses an input in a non-streaming fashion */
static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
/** Returns, losslessly, the string that would have generated a syntax tree */
static auto unparse(std::string const & prompt, std::vector<Node> const & syntax, size_t indent = 2) -> std::string;
/** Consumes a CLILiterateParser and gives you the syntax out of it */
auto intoSyntax() && -> std::vector<Node>;
/** Tidies syntax to remove trailing whitespace from outputs and remove any
* empty prompts */
static void tidyOutputForComparison(std::vector<Node> & syntax);
private:
struct AccumulatingState
{
std::string lineAccumulator;
};
struct Indent
{
size_t pos = 0;
};
struct Commentary : public AccumulatingState
{};
struct Prompt : AccumulatingState
{
size_t pos = 0;
};
struct Command : public AccumulatingState
{};
struct OutputLine : public AccumulatingState
{};
using State = std::variant<Indent, Commentary, Prompt, Command, OutputLine>;
State state_;
constexpr static auto stateDebug(State const&) -> const char *;
const std::string prompt_;
const size_t indent_;
/** Last line was output, so we consider a blank to be part of the output */
bool lastWasOutput_;
std::vector<Node> syntax_;
void transition(State newState);
void onNewline();
};
// Override gtest printing for lists of nodes
void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os);
};

View file

@ -0,0 +1,24 @@
///@file
#include <ostream>
#include <boost/io/ios_state.hpp>
namespace nix {
struct DebugChar
{
char c;
};
inline std::ostream & operator<<(std::ostream & s, DebugChar c)
{
boost::io::ios_flags_saver _ifs(s);
if (isprint(c.c)) {
s << static_cast<char>(c.c);
} else {
s << std::hex << "0x" << (static_cast<unsigned int>(c.c) & 0xff);
}
return s;
}
}

View file

@ -0,0 +1,85 @@
#include "terminal-code-eater.hh"
#include "debug-char.hh"
#include <assert.h>
#include <cstdint>
#include <iostream>
namespace nix {
static constexpr const bool DEBUG_EATER = false;
void TerminalCodeEater::feed(char c, std::function<void(char)> on_char)
{
auto isParamChar = [](char v) -> bool { return v >= 0x30 && v <= 0x3f; };
auto isIntermediateChar = [](char v) -> bool { return v >= 0x20 && v <= 0x2f; };
auto isFinalChar = [](char v) -> bool { return v >= 0x40 && v <= 0x7e; };
if constexpr (DEBUG_EATER) {
std::cerr << "eater" << DebugChar{c} << "\n";
}
switch (state) {
case State::ExpectESC:
switch (c) {
case '\e':
transition(State::ExpectESCSeq);
return;
// Just eat \r, since it is part of clearing a line
case '\r':
return;
}
if constexpr (DEBUG_EATER) {
std::cerr << "eater uneat" << DebugChar{c} << "\n";
}
on_char(c);
break;
case State::ExpectESCSeq:
switch (c) {
// CSI
case '[':
transition(State::InCSIParams);
return;
default:
transition(State::ExpectESC);
return;
}
break;
// https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences
// A CSI sequence is: CSI [\x30-\x3f]* [\x20-\x2f]* [\x40-\x7e]
// ^ params ^ intermediates ^ final byte
case State::InCSIParams:
if (isFinalChar(c)) {
transition(State::ExpectESC);
return;
} else if (isIntermediateChar(c)) {
transition(State::InCSIIntermediates);
return;
} else if (isParamChar(c)) {
return;
} else {
// Corrupt escape sequence? Throw an assert, for now.
// transition(State::ExpectESC);
assert(false && "Corrupt terminal escape sequence");
return;
}
break;
case State::InCSIIntermediates:
if (isFinalChar(c)) {
transition(State::ExpectESC);
return;
} else if (isIntermediateChar(c)) {
return;
} else {
// Corrupt escape sequence? Throw an assert, for now.
// transition(State::ExpectESC);
assert(false && "Corrupt terminal escape sequence in intermediates");
return;
}
break;
}
}
void TerminalCodeEater::transition(State new_state)
{
state = new_state;
}
};

View file

@ -0,0 +1,29 @@
#pragma once
/// @file
#include <functional>
namespace nix {
/** DFA that eats terminal escapes
*
* See: https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
*/
class TerminalCodeEater
{
public:
void feed(char c, std::function<void(char)> on_char);
private:
enum class State {
ExpectESC,
ExpectESCSeq,
InCSIParams,
InCSIIntermediates,
};
State state = State::ExpectESC;
void transition(State new_state);
};
};