Implement a parser for a literate testing system for the repl
This parser can be reused for other purposes. It's inspired by https://bitheap.org/cram/ Although eelco's impostor exists https://github.com/mobusoperandi/eelco, it is not very nice to depend on out of tree testing frameworks with no way to customize them. Change-Id: Ifca50177e09730182baf0ebf829c3505bbb0274a
This commit is contained in:
parent
06952cf7c4
commit
ed95b02215
10 changed files with 412 additions and 1 deletions
1
Makefile
1
Makefile
|
@ -41,6 +41,7 @@ makefiles += \
|
|||
tests/functional/ca/local.mk \
|
||||
tests/functional/dyn-drv/local.mk \
|
||||
tests/functional/test-libstoreconsumer/local.mk \
|
||||
tests/functional/repl_characterization/local.mk \
|
||||
tests/functional/plugins/local.mk
|
||||
else
|
||||
makefiles += \
|
||||
|
|
1
tests/functional/repl_characterization/.gitignore
vendored
Normal file
1
tests/functional/repl_characterization/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
test-repl-characterization
|
11
tests/functional/repl_characterization/data/basic.ast
Normal file
11
tests/functional/repl_characterization/data/basic.ast
Normal file
|
@ -0,0 +1,11 @@
|
|||
Commentary "meow meow meow"
|
||||
Command "command"
|
||||
Output "output output one"
|
||||
Output ""
|
||||
Output ""
|
||||
Output "output output two"
|
||||
Commentary "meow meow"
|
||||
Command "command two"
|
||||
Output "output output output"
|
||||
Commentary "commentary"
|
||||
Output "output output output"
|
11
tests/functional/repl_characterization/data/basic.test
Normal file
11
tests/functional/repl_characterization/data/basic.test
Normal file
|
@ -0,0 +1,11 @@
|
|||
meow meow meow
|
||||
nix-repl> command
|
||||
output output one
|
||||
|
||||
|
||||
output output two
|
||||
meow meow
|
||||
nix-repl> command two
|
||||
output output output
|
||||
commentary
|
||||
output output output
|
19
tests/functional/repl_characterization/local.mk
Normal file
19
tests/functional/repl_characterization/local.mk
Normal file
|
@ -0,0 +1,19 @@
|
|||
programs += test-repl-characterization
|
||||
|
||||
installcheck: test-repl-characterization_RUN
|
||||
|
||||
test-repl-characterization_DIR := $(d)
|
||||
|
||||
test-repl-characterization_ENV := _NIX_TEST_UNIT_DATA=$(shell realpath "$(d)")/data
|
||||
|
||||
# do not install
|
||||
test-repl-characterization_INSTALL_DIR :=
|
||||
|
||||
test-repl-characterization_SOURCES := \
|
||||
$(wildcard $(d)/*.cc) \
|
||||
|
||||
test-repl-characterization_CXXFLAGS += -I src/libutil -I tests/unit/libutil-support
|
||||
|
||||
test-repl-characterization_LIBS = libutil libutil-test-support
|
||||
|
||||
test-repl-characterization_LDFLAGS = $(THREAD_LDFLAGS) $(SODIUM_LIBS) $(EDITLINE_LIBS) $(BOOST_LDFLAGS) $(LOWDOWN_LIBS) $(GTEST_LIBS)
|
|
@ -0,0 +1,42 @@
|
|||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <optional>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "tests/characterization.hh"
|
||||
#include "tests/cli-literate-parser.hh"
|
||||
|
||||
using namespace std::string_literals;
|
||||
|
||||
namespace nix {
|
||||
|
||||
static constexpr const char * REPL_PROMPT = "nix-repl> ";
|
||||
|
||||
class ReplSessionTest : public CharacterizationTest
|
||||
{
|
||||
Path unitTestData = getUnitTestData();
|
||||
|
||||
public:
|
||||
Path goldenMaster(std::string_view testStem) const override
|
||||
{
|
||||
return unitTestData + "/" + testStem;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(ReplSessionTest, parses)
|
||||
{
|
||||
writeTest("basic.ast", [this]() {
|
||||
const std::string content = readFile(goldenMaster("basic.test"));
|
||||
auto parser = CLILiterateParser{REPL_PROMPT};
|
||||
parser.feed(content);
|
||||
|
||||
std::ostringstream out{};
|
||||
for (auto & bit : parser.syntax()) {
|
||||
out << bit.print() << "\n";
|
||||
}
|
||||
return out.str();
|
||||
});
|
||||
}
|
||||
};
|
|
@ -10,6 +10,7 @@ libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc)
|
|||
|
||||
libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES)
|
||||
|
||||
libutil-test-support_LIBS = libutil
|
||||
# libexpr so we can steal their string printer from print.cc
|
||||
libutil-test-support_LIBS = libutil libexpr
|
||||
|
||||
libutil-test-support_LDFLAGS := -pthread -lrapidcheck
|
||||
|
|
174
tests/unit/libutil-support/tests/cli-literate-parser.cc
Normal file
174
tests/unit/libutil-support/tests/cli-literate-parser.cc
Normal file
|
@ -0,0 +1,174 @@
|
|||
#include "cli-literate-parser.hh"
|
||||
#include "libexpr/print.hh"
|
||||
#include "debug-char.hh"
|
||||
#include "types.hh"
|
||||
#include "util.hh"
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <boost/algorithm/string/trim.hpp>
|
||||
|
||||
using namespace std::string_literals;
|
||||
|
||||
namespace nix {
|
||||
|
||||
static constexpr const bool DEBUG_PARSER = false;
|
||||
|
||||
constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char *
|
||||
{
|
||||
return std::visit(
|
||||
overloaded{// clang-format off
|
||||
[](Indent const&) -> const char * { return "indent"; },
|
||||
[](Commentary const&) -> const char * { return "indent"; },
|
||||
[](Prompt const&) -> const char * { return "prompt"; },
|
||||
[](Command const&) -> const char * { return "command"; },
|
||||
[](OutputLine const&) -> const char * { return "output_line"; }},
|
||||
// clang-format on
|
||||
s);
|
||||
}
|
||||
|
||||
auto CLILiterateParser::Node::print() const -> std::string
|
||||
{
|
||||
std::ostringstream s{};
|
||||
switch (kind) {
|
||||
case NodeKind::COMMENTARY:
|
||||
s << "Commentary ";
|
||||
break;
|
||||
case NodeKind::COMMAND:
|
||||
s << "Command ";
|
||||
break;
|
||||
case NodeKind::OUTPUT:
|
||||
s << "Output ";
|
||||
break;
|
||||
}
|
||||
printLiteralString(s, this->text);
|
||||
return s.str();
|
||||
}
|
||||
|
||||
void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os)
|
||||
{
|
||||
for (auto & node : nodes) {
|
||||
*os << node.print() << "\\n";
|
||||
}
|
||||
}
|
||||
|
||||
auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector<Node>
|
||||
{
|
||||
CLILiterateParser p{std::move(prompt), indent};
|
||||
p.feed(input);
|
||||
return std::move(p).intoSyntax();
|
||||
}
|
||||
|
||||
auto CLILiterateParser::intoSyntax() && -> std::vector<Node>
|
||||
{
|
||||
return std::move(this->syntax_);
|
||||
}
|
||||
|
||||
CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent)
|
||||
: state_(indent == 0 ? State(Prompt{}) : State(Indent{}))
|
||||
, prompt_(prompt)
|
||||
, indent_(indent)
|
||||
, lastWasOutput_(false)
|
||||
, syntax_{}
|
||||
{
|
||||
assert(!prompt.empty());
|
||||
}
|
||||
|
||||
void CLILiterateParser::feed(char c)
|
||||
{
|
||||
if constexpr (DEBUG_PARSER) {
|
||||
std::cout << stateDebug(state_) << " " << DebugChar{c} << "\n";
|
||||
}
|
||||
|
||||
if (c == '\n') {
|
||||
onNewline();
|
||||
return;
|
||||
}
|
||||
|
||||
std::visit(
|
||||
overloaded{
|
||||
[&](Indent & s) {
|
||||
if (c == ' ') {
|
||||
if (++s.pos >= indent_) {
|
||||
transition(Prompt{});
|
||||
}
|
||||
} else {
|
||||
transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}});
|
||||
}
|
||||
},
|
||||
[&](Prompt & s) {
|
||||
if (s.pos >= prompt_.length()) {
|
||||
transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}});
|
||||
return;
|
||||
} else if (c == prompt_[s.pos]) {
|
||||
// good prompt character
|
||||
++s.pos;
|
||||
} else {
|
||||
// didn't match the prompt, so it must have actually been output.
|
||||
s.lineAccumulator.push_back(c);
|
||||
transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}});
|
||||
return;
|
||||
}
|
||||
s.lineAccumulator.push_back(c);
|
||||
},
|
||||
[&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }},
|
||||
state_);
|
||||
}
|
||||
|
||||
void CLILiterateParser::onNewline()
|
||||
{
|
||||
State lastState = std::move(state_);
|
||||
bool newLastWasOutput = false;
|
||||
|
||||
syntax_.push_back(std::visit(
|
||||
overloaded{
|
||||
[&](Indent & s) {
|
||||
// XXX: technically this eats trailing spaces
|
||||
|
||||
// a newline following output is considered part of that output
|
||||
if (lastWasOutput_) {
|
||||
newLastWasOutput = true;
|
||||
return Node::mkOutput("");
|
||||
}
|
||||
return Node::mkCommentary("");
|
||||
},
|
||||
[&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); },
|
||||
[&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); },
|
||||
[&](OutputLine & s) {
|
||||
newLastWasOutput = true;
|
||||
return Node::mkOutput(std::move(s.lineAccumulator));
|
||||
},
|
||||
[&](Prompt & s) {
|
||||
// INDENT followed by newline is also considered a blank output line
|
||||
return Node::mkOutput(std::move(s.lineAccumulator));
|
||||
}},
|
||||
lastState));
|
||||
|
||||
transition(Indent{});
|
||||
lastWasOutput_ = newLastWasOutput;
|
||||
}
|
||||
|
||||
void CLILiterateParser::feed(std::string_view s)
|
||||
{
|
||||
for (char ch : s) {
|
||||
feed(ch);
|
||||
}
|
||||
}
|
||||
|
||||
void CLILiterateParser::transition(State new_state)
|
||||
{
|
||||
// When we expect INDENT and we are parsing without indents, commentary
|
||||
// cannot exist, so we want to transition directly into PROMPT before
|
||||
// resuming normal processing.
|
||||
if (Indent * i = std::get_if<Indent>(&new_state); i != nullptr && indent_ == 0) {
|
||||
new_state = Prompt{AccumulatingState{}, i->pos};
|
||||
}
|
||||
|
||||
state_ = new_state;
|
||||
}
|
||||
|
||||
auto CLILiterateParser::syntax() const -> std::vector<Node> const &
|
||||
{
|
||||
return syntax_;
|
||||
}
|
||||
|
||||
};
|
127
tests/unit/libutil-support/tests/cli-literate-parser.hh
Normal file
127
tests/unit/libutil-support/tests/cli-literate-parser.hh
Normal file
|
@ -0,0 +1,127 @@
|
|||
#pragma once
|
||||
///@file
|
||||
|
||||
#include <compare>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace nix {
|
||||
/*
|
||||
* A DFA parser for literate test cases for CLIs.
|
||||
*
|
||||
* FIXME: implement merging of these, so you can auto update cases that have
|
||||
* comments.
|
||||
*
|
||||
* Format:
|
||||
* COMMENTARY
|
||||
* INDENT PROMPT COMMAND
|
||||
* INDENT OUTPUT
|
||||
*
|
||||
* e.g.
|
||||
* commentary commentary commentary
|
||||
* nix-repl> :t 1
|
||||
* an integer
|
||||
*
|
||||
* Yields:
|
||||
* Commentary "commentary commentary commentary"
|
||||
* Command ":t 1"
|
||||
* Output "an integer"
|
||||
*
|
||||
* Note: one Output line is generated for each line of the sources, because
|
||||
* this is effectively necessary to be able to align them in the future to
|
||||
* auto-update tests.
|
||||
*/
|
||||
class CLILiterateParser
|
||||
{
|
||||
public:
|
||||
|
||||
enum class NodeKind {
|
||||
COMMENTARY,
|
||||
COMMAND,
|
||||
OUTPUT,
|
||||
};
|
||||
|
||||
struct Node
|
||||
{
|
||||
NodeKind kind;
|
||||
std::string text;
|
||||
std::strong_ordering operator<=>(Node const &) const = default;
|
||||
|
||||
static Node mkCommentary(std::string text)
|
||||
{
|
||||
return Node{.kind = NodeKind::COMMENTARY, .text = text};
|
||||
}
|
||||
|
||||
static Node mkCommand(std::string text)
|
||||
{
|
||||
return Node{.kind = NodeKind::COMMAND, .text = text};
|
||||
}
|
||||
|
||||
static Node mkOutput(std::string text)
|
||||
{
|
||||
return Node{.kind = NodeKind::OUTPUT, .text = text};
|
||||
}
|
||||
|
||||
auto print() const -> std::string;
|
||||
};
|
||||
|
||||
CLILiterateParser(std::string prompt, size_t indent = 2);
|
||||
|
||||
auto syntax() const -> std::vector<Node> const &;
|
||||
|
||||
/** Feeds a character into the parser */
|
||||
void feed(char c);
|
||||
|
||||
/** Feeds a string into the parser */
|
||||
void feed(std::string_view s);
|
||||
|
||||
/** Parses an input in a non-streaming fashion */
|
||||
static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
|
||||
|
||||
/** Consumes a CLILiterateParser and gives you the syntax out of it */
|
||||
auto intoSyntax() && -> std::vector<Node>;
|
||||
|
||||
private:
|
||||
|
||||
struct AccumulatingState
|
||||
{
|
||||
std::string lineAccumulator;
|
||||
};
|
||||
struct Indent
|
||||
{
|
||||
size_t pos = 0;
|
||||
};
|
||||
struct Commentary : public AccumulatingState
|
||||
{};
|
||||
struct Prompt : AccumulatingState
|
||||
{
|
||||
size_t pos = 0;
|
||||
};
|
||||
struct Command : public AccumulatingState
|
||||
{};
|
||||
struct OutputLine : public AccumulatingState
|
||||
{};
|
||||
|
||||
using State = std::variant<Indent, Commentary, Prompt, Command, OutputLine>;
|
||||
State state_;
|
||||
|
||||
constexpr static auto stateDebug(State const&) -> const char *;
|
||||
|
||||
const std::string prompt_;
|
||||
const size_t indent_;
|
||||
|
||||
/** Last line was output, so we consider a blank to be part of the output */
|
||||
bool lastWasOutput_;
|
||||
|
||||
std::vector<Node> syntax_;
|
||||
|
||||
void transition(State newState);
|
||||
void onNewline();
|
||||
};
|
||||
|
||||
// Override gtest printing for lists of nodes
|
||||
void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os);
|
||||
};
|
24
tests/unit/libutil-support/tests/debug-char.hh
Normal file
24
tests/unit/libutil-support/tests/debug-char.hh
Normal file
|
@ -0,0 +1,24 @@
|
|||
///@file
|
||||
#include <ostream>
|
||||
#include <boost/io/ios_state.hpp>
|
||||
|
||||
namespace nix {
|
||||
|
||||
struct DebugChar
|
||||
{
|
||||
char c;
|
||||
};
|
||||
|
||||
inline std::ostream & operator<<(std::ostream & s, DebugChar c)
|
||||
{
|
||||
boost::io::ios_flags_saver _ifs(s);
|
||||
|
||||
if (isprint(c.c)) {
|
||||
s << static_cast<char>(c.c);
|
||||
} else {
|
||||
s << std::hex << "0x" << (static_cast<unsigned int>(c.c) & 0xff);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in a new issue