379 lines
11 KiB
C++
379 lines
11 KiB
C++
// Copyright (C) 2005 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
|
|
|
|
#include <string>
|
|
#include <sstream>
|
|
|
|
#include <dlib/tokenizer.h>
|
|
#include "tester.h"
|
|
|
|
namespace
|
|
{
|
|
using namespace test;
|
|
using namespace std;
|
|
using namespace dlib;
|
|
|
|
logger dlog("test.tokenizer");
|
|
|
|
template <
|
|
typename tok
|
|
>
|
|
void tokenizer_kernel_test (
|
|
)
|
|
/*!
|
|
requires
|
|
- tok is an implementation of tokenizer_kernel_abstract.h
|
|
ensures
|
|
- runs tests on tok for compliance with the specs
|
|
!*/
|
|
{
|
|
|
|
print_spinner();
|
|
|
|
tok test;
|
|
|
|
DLIB_TEST(test.numbers() == "0123456789");
|
|
DLIB_TEST(test.uppercase_letters() == "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
|
DLIB_TEST(test.lowercase_letters() == "abcdefghijklmnopqrstuvwxyz");
|
|
|
|
DLIB_TEST_MSG(test.get_identifier_body() == "_" + test.lowercase_letters() +
|
|
test.uppercase_letters() + test.numbers(),"");
|
|
DLIB_TEST_MSG(test.get_identifier_head() == "_" + test.lowercase_letters() +
|
|
test.uppercase_letters(),"");
|
|
|
|
DLIB_TEST(test.stream_is_set() == false);
|
|
test.clear();
|
|
DLIB_TEST(test.stream_is_set() == false);
|
|
|
|
DLIB_TEST_MSG(test.get_identifier_body() == "_" + test.lowercase_letters() +
|
|
test.uppercase_letters() + test.numbers(),"");
|
|
DLIB_TEST_MSG(test.get_identifier_head() == "_" + test.lowercase_letters() +
|
|
test.uppercase_letters(),"");
|
|
|
|
tok test2;
|
|
|
|
ostringstream sout;
|
|
istringstream sin;
|
|
test2.set_stream(sin);
|
|
|
|
DLIB_TEST(test2.stream_is_set());
|
|
DLIB_TEST(&test2.get_stream() == &sin);
|
|
|
|
int type;
|
|
string token;
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::END_OF_FILE);
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::END_OF_FILE);
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::END_OF_FILE);
|
|
|
|
|
|
sin.clear();
|
|
sin.str(" The cat 123asdf1234 ._ \n test.");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST(token == " ");
|
|
|
|
DLIB_TEST(test2.peek_type() == tok::IDENTIFIER);
|
|
DLIB_TEST(test2.peek_token() == "The");
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "The");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST(token == " ");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "cat");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST(token == " ");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::NUMBER);
|
|
DLIB_TEST_MSG(token == "123","token: " << token);
|
|
|
|
DLIB_TEST(test2.peek_type() == tok::IDENTIFIER);
|
|
DLIB_TEST(test2.peek_token() == "asdf1234");
|
|
DLIB_TEST(test2.peek_type() == tok::IDENTIFIER);
|
|
DLIB_TEST(test2.peek_token() == "asdf1234");
|
|
DLIB_TEST(test2.peek_type() == tok::IDENTIFIER);
|
|
DLIB_TEST(test2.peek_token() == "asdf1234");
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "asdf1234");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST_MSG(token == " ","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::CHAR);
|
|
DLIB_TEST_MSG(token == ".","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "_");
|
|
|
|
DLIB_TEST(test2.peek_type() == tok::WHITE_SPACE);
|
|
DLIB_TEST_MSG(test2.peek_token() == " ","token: \"" << token << "\"" <<
|
|
"\ntoken size: " << (unsigned int)token.size());
|
|
|
|
swap(test,test2);
|
|
|
|
DLIB_TEST(test2.stream_is_set() == false);
|
|
|
|
DLIB_TEST(test.peek_type() == tok::WHITE_SPACE);
|
|
DLIB_TEST_MSG(test.peek_token() == " ","token: \"" << token << "\"" <<
|
|
"\ntoken size: " << (unsigned int)token.size());
|
|
test.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST_MSG(token == " ","token: \"" << token << "\"" <<
|
|
"\ntoken size: " << (unsigned int)token.size());
|
|
|
|
test.get_token(type,token);
|
|
DLIB_TEST_MSG(type == tok::END_OF_LINE,"token: " << token);
|
|
DLIB_TEST_MSG(token == "\n","token: " << token);
|
|
|
|
swap(test,test2);
|
|
DLIB_TEST(test.stream_is_set() == false);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST_MSG(token == " ","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST_MSG(token == "test","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::CHAR);
|
|
DLIB_TEST_MSG(token == ".","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::END_OF_FILE);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test2.set_identifier_token("_" + test.uppercase_letters() +
|
|
test.lowercase_letters(),test.numbers() + "_" + test.uppercase_letters()
|
|
+test.lowercase_letters());
|
|
|
|
|
|
sin.clear();
|
|
sin.str(" The cat 123asdf1234 ._ \n\r test.");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST(token == " ");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "The");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST(token == " ");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "cat");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST(token == " ");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::NUMBER);
|
|
DLIB_TEST_MSG(token == "123","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "asdf1234");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST_MSG(token == " ","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::CHAR);
|
|
DLIB_TEST_MSG(token == ".","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "_");
|
|
|
|
swap(test,test2);
|
|
|
|
DLIB_TEST(test2.stream_is_set() == false);
|
|
|
|
test.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST_MSG(token == " ","token: \"" << token << "\"" <<
|
|
"\ntoken size: " << (unsigned int)token.size());
|
|
|
|
test.get_token(type,token);
|
|
DLIB_TEST_MSG(type == tok::END_OF_LINE,"token: " << token);
|
|
DLIB_TEST_MSG(token == "\n","token: " << token);
|
|
|
|
swap(test,test2);
|
|
DLIB_TEST(test.stream_is_set() == false);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST_MSG(token == "\r ","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST_MSG(token == "test","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::CHAR);
|
|
DLIB_TEST_MSG(token == ".","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::END_OF_FILE);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test2.set_identifier_token(test.uppercase_letters() +
|
|
test.lowercase_letters(),test.numbers() + test.uppercase_letters()
|
|
+test.lowercase_letters());
|
|
|
|
|
|
sin.clear();
|
|
sin.str(" The cat 123as_df1234 ._ \n test.");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST(token == " ");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "The");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST(token == " ");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "cat");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST(token == " ");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::NUMBER);
|
|
DLIB_TEST_MSG(token == "123","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "as");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::CHAR);
|
|
DLIB_TEST_MSG(token == "_","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST(token == "df1234");
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST_MSG(token == " ","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::CHAR);
|
|
DLIB_TEST_MSG(token == ".","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::CHAR);
|
|
DLIB_TEST(token == "_");
|
|
|
|
swap(test,test2);
|
|
|
|
DLIB_TEST(test2.stream_is_set() == false);
|
|
|
|
test.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST_MSG(token == " ","token: \"" << token << "\"" <<
|
|
"\ntoken size: " << (unsigned int)token.size());
|
|
|
|
test.get_token(type,token);
|
|
DLIB_TEST_MSG(type == tok::END_OF_LINE,"token: " << token);
|
|
DLIB_TEST_MSG(token == "\n","token: " << token);
|
|
|
|
swap(test,test2);
|
|
DLIB_TEST(test.stream_is_set() == false);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::WHITE_SPACE);
|
|
DLIB_TEST_MSG(token == " ","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::IDENTIFIER);
|
|
DLIB_TEST_MSG(token == "test","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::CHAR);
|
|
DLIB_TEST_MSG(token == ".","token: " << token);
|
|
|
|
test2.get_token(type,token);
|
|
DLIB_TEST(type == tok::END_OF_FILE);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class tokenizer_tester : public tester
|
|
{
|
|
public:
|
|
tokenizer_tester (
|
|
) :
|
|
tester ("test_tokenizer",
|
|
"Runs tests on the tokenizer component.")
|
|
{}
|
|
|
|
void perform_test (
|
|
)
|
|
{
|
|
dlog << LINFO << "testing kernel_1a";
|
|
tokenizer_kernel_test<tokenizer::kernel_1a> ();
|
|
dlog << LINFO << "testing kernel_1a_c";
|
|
tokenizer_kernel_test<tokenizer::kernel_1a_c>();
|
|
}
|
|
} a;
|
|
|
|
}
|
|
|
|
|