Re: Type System as Design Tool [Was: We do not use C++ exceptions]
Hi!
thant.tessman@gmail.com schrieb:
http://www.thant.com/projects/combinators/lex.sml
Without comments, this code fits on a single page. Can you imagine
writing code to lex floating-point numbers in a single page of C++,
let alone writing what is effectively a lexer generator in a single
page?
Yes. Almost.
First I had to implement foldl, map and concatMap (I'm a Haskell guy,
this is why these functions take on the Haskell names and semantics).
The lexer generator takes lines 83 to 124, that are 41 lines which today
is roughly a page on screen. (Note that some lines are split up to make
them fit into a usenet message. This shouldn't break compiling.)
The "optional" functions like pos, opt, and friends make up another 20
lines, most of the functions being one liners.
Afterwards some output and the "key" function, namely "matchReal" which
takes nearly 20 lines, because I formatted the expressions for better
readability (I could hardly figure out, what your expression does).
The main method takes input parameters and run the lexer, afterwards
displaying the result.
It took me about 3hrs.
As noted by others, lines of code is not the best measure. Yes, the C++
code takes more lines than your SML implementation. But the way of using
functions that return functions is the same. It _can_ be done in C++.
Frank
lex.cpp:
//Author Frank Birbacher (C)2009
//using Boost 1.38.0 (see www.boost.org)
//compiled using gcc 4.2.4
#include <deque>
#include <list>
#include <utility>
#include <string>
#include <cctype>
#include <boost/optional.hpp>
#include <boost/function.hpp>
#include <boost/foreach.hpp>
#include <boost/lambda/lambda.hpp>
#include <boost/lambda/bind.hpp>
#include <boost/assign/list_of.hpp>
//HACK for introducing push/pop_front
struct string : std::string
{
string() {}
string(std::string const& str) : std::string(str) {}
string(const char* const str) : std::string(str) {}
char front() const { return at(0); }
void pop_front() { erase(begin()); }
void push_front(char c) { insert(begin(), c); }
};
using std::deque;
using std::list;
using boost::optional;
using boost::function;
using boost::assign::list_of;
// make up for predefined functions of Haskell:
template<typename Argument, typename List>
Argument foldl(
function<Argument (Argument, typename List::value_type)> const& func,
Argument result,
List const& list
)
{
BOOST_FOREACH(typename List::value_type const& value, list)
result = func(result, value);
return result;
}
template<typename ListA, typename ListB>
ListB map(
function<typename ListB::value_type (typename ListA::value_type)> const& func,
ListA const& list
)
{
ListB result;
BOOST_FOREACH(typename ListA::value_type const& a, list)
result.push_back(func(a));
return result;
}
template<typename A, typename ListB, typename ListA>
ListB concatMap(
function<ListB (A)> const& func,
ListA const& list
)
{
ListB result;
BOOST_FOREACH(A const& a, list)
{
ListB const value = func(a);
result.insert(result.end(), value.begin(), value.end());
}
return result;
}
namespace match {
using namespace boost::lambda;
typedef char Token;
typedef string TokenList;
typedef std::pair<TokenList, TokenList> Stream;
typedef list<Stream> StreamList;
typedef function<StreamList (StreamList)> Matcher;
typedef list<Matcher> MatcherList;
StreamList matchPred(
const function<bool (Token)> pred,
const StreamList& streams
)
{
StreamList result;
BOOST_FOREACH(Stream stream, streams)
{
if(stream.second.empty()) continue;
const Token t = stream.second.front();
if( ! pred(t)) continue;
stream.first.push_front(t);
stream.second.pop_front();
result.push_back(stream);
}
return result;
}
Matcher pred(const function<bool (Token)> p)
{ return bind(&matchPred, p, _1); }
Matcher tok(const Token token)
{ return pred(token==_1); }
Matcher seq(MatcherList const& matchers)
{ return bind(&foldl<StreamList, MatcherList>, unlambda(bind(_2, _1)), _1, matchers); }
//to circumvent missing functionality in boost::lambda
function<StreamList (Matcher)> applyMatcher(StreamList const list)
{ return bind(_1, list); }
Matcher bar(MatcherList const& matchers)
{ return bind(&concatMap<Matcher, StreamList, MatcherList>, bind(&applyMatcher, _1), matchers); }
StreamList starImpl(Matcher const matcher, StreamList current)
{
StreamList result;
for(; ! current.empty(); current = matcher(current))
result.insert(result.end(), current.begin(), current.end());
return result;
}
Matcher star(Matcher const matcher)
{ return bind(&starImpl, matcher, _1); }
Matcher pos(Matcher const matcher)
{ return seq(list_of(matcher)(star(matcher))); }
Matcher opt(Matcher const matcher)
{ return bar(list_of(matcher)(_1)); }
Matcher range(Token const min, Token const max)
{ return pred(min <= _1 && _1 <= max); }
Matcher lit(TokenList const& list)
{ return seq(::map<TokenList, MatcherList>(&tok, list)); }
Matcher set(TokenList const& list)
{ return bar(::map<TokenList, MatcherList>(&tok, list)); }
optional<Stream> longest(StreamList const& list)
{
optional<Stream> result;
BOOST_FOREACH(Stream const& stream, list)
if(!result || result->first.size() < stream.first.size())
result = stream;
return result;
}
optional<Stream> lex(Matcher const rule, TokenList const s)
{ return longest(rule(list_of(std::make_pair(TokenList(), s)))); }
}
#include <iterator>
#include <ostream>
#include <iostream>
void printStream(match::Stream const& stream)
{
using namespace match;
TokenList const& consumed = stream.first;
std::copy(consumed.rbegin(), consumed.rend(), std::ostream_iterator<Token>(std::cout));
TokenList const& rest = stream.second;
if( ! rest.empty())
{
std::cout << " - ";
std::copy(rest.begin(), rest.end(), std::ostream_iterator<Token>(std::cout));
}else
std::cout << " matched";
std::cout << '\n';
}
void printStream(optional<match::Stream> const& stream)
{
if(stream)
printStream(*stream);
else
std::cout << "no match\n";
}
void printStreamList(match::StreamList const& streamlist)
{
BOOST_FOREACH(match::Stream const& stream, streamlist)
printStream(stream);
}
#include <list>
match::Matcher matchReal()
{
using namespace match;
Matcher
digit = pred(static_cast<int(*)(int)>(&std::isdigit)),
digits = pos(digit),
mantissa = seq(list_of(tok('.'))(digits)),
exp = seq(list_of(set("eE"))(opt(set("+-")))(digits)),
real =
seq(list_of
(opt(tok('-')))
(bar(list_of
(seq(list_of
(digits)
(opt(mantissa))))
(mantissa)))
(opt(exp)))
;
return real;
}
int main(int argc, char* argv[])
{
using namespace match;
const std::list<TokenList> inputs(argv+1, argv+argc);
StreamList streams;
BOOST_FOREACH(TokenList input, inputs)
streams.push_back(std::make_pair(TokenList(), input));
const Matcher matcher = matchReal();
StreamList result = matcher(streams);
//printStreamList(result);
printStream(match::longest(result));
}
--
[ See http://www.gotw.ca/resources/clcm.htm for info about ]
[ comp.lang.c++.moderated. First time posters: Do this! ]