#include "lexer.h"

Lexer::Lexer( Errors& errors ) :
   Location( 1, 0 ),
   _errors( errors )
{
   _data.reserve( 64 );
   _unget_pos = 0;
   _comment_depth = 0;
}

Lexer::~Lexer()
{
}

bool Lexer::open( const std::string& filename )
{
   _comment_depth = 0;
   _file.open( filename.c_str() );

   set_line( 1 );
   set_pos( 0 );
   _unget_pos = 0;

   if( !_file.good() )
   {
      _errors.report() << "Couldn't open file '" << filename << "'";
   }

   return true;
}

void Lexer::close()
{
   _file.close();
}

Token::Ptr Lexer::next()
{
   Token::Ptr token;

   while( !token )
   {
      int c = get();

      if( c == EOF )
      {
         token = Token::Ptr( new Token( *this, Token::T_EOF ) );
      } else if(
         (c == '\n') ||
         (c == '\r') ||
         (c == '\t') ||
         (c == ' ')
         )
      {
         // whitespace
      } else if( c == '/' ) {
         token = had_slash();
      } else if( c == '0' ) {
         token = had_zero();
      } else if( c == '"' ) {
         token = had_string();
      } else if( (c >= '1') && (c <= '9') ) {
         token = had_number();
      } else if(
         (c == '_') ||
         ((c >= 'a') && (c <= 'z')) ||
         ((c >= 'A') && (c <= 'Z'))
         )
      {
         token = had_identifier();
      } else if( is_symbol( c ) ) {
         token = had_symbol();
      } else {
         token = Token::Ptr( new Token( *this, Token::T_UNEXPECTED_CHAR ) );
         _data.push_back( c );
         _errors.report( *this ) << "Unexpected character '" << c << "'";
      }
   }

   if( token )
   {
      token->finalize( _data );

      if( token->type() == Token::T_UNKNOWN_SYMBOL )
      {
         _errors.report( *token ) << "Unrecognised symbol '" << token->string() << "'";
      }
   }

   // empty the token buffer
   _data.clear();

   return token;
}

int Lexer::get()
{
   int c = _file.get();

   _unget_pos = pos();

   if( c == '\n' )
   {
      set_line( line() + 1 );
      set_pos( 0 );
   } else {
      set_pos( _unget_pos + 1 );
   }

   return c;
}

void Lexer::unget()
{
   _file.unget();

   int c = _file.peek();

   if( c == '\n' )
   {
      set_line( line() - 1 );
   }

   set_pos( _unget_pos );
}

Token::Ptr Lexer::had_slash()
{
   int c = get();

   if( c == '*' )
   {
      _comment_depth++;
      return nested_comment();
   } else if( c == '/' ) {
      return line_comment();
   }

   unget();

   return Token::Ptr( new Token( *this, Token::T_DIVIDE ) );
}

Token::Ptr Lexer::nested_comment()
{
   while( true )
   {
      int c = get();

      if( c == '*' )
      {
         int c2 = get();

         if( c2 == '/' )
         {
            _comment_depth--;

            if( _comment_depth == 0 )
            {
               break;
            }
         } else {
            unget();
         }
      } else if( c == '/' ) {
         int c2 = get();

         if( c2 == '*' )
         {
            _comment_depth++;
         } else {
            unget();
         }
      } else if( c == EOF ) {
         _errors.report( *this ) << "Unexpected end of file inside nested comment";
         break;
      }
   }

   return Token::Ptr();
}

Token::Ptr Lexer::line_comment()
{
   while( true )
   {
      int c = get();

      if( (c == '\n') || (c == EOF) )
      {
         break;
      }
   }

   return Token::Ptr();
}

Token::Ptr Lexer::had_string()
{
   Token::Ptr token = Token::Ptr( new Token( *this, Token::T_STRING ) );

   while( true )
   {
      int c = get();

      if( c == '\"' )
      {
         break;
      } else if( c == '\\' ) {
         // escape sequence
         int c2 = get();

         switch( c2 )
         {
         case 'n':
            _data.push_back( '\n' );
            break;

         case 'r':
            _data.push_back( '\r' );
            break;

         case 't':
            _data.push_back( '\t' );
            break;

         case '\\':
            _data.push_back( '\\' );
            break;

         case '\"':
            _data.push_back( '\"' );
            break;

         case 'x':
            {
               // hexadecimal
               int x, h;
               char hh = 0;
               bool fail = false;

               x = get();
               h = hex( x );

               if( h >= 0 )
               {
                  hh += h << 4;
               } else {
                  fail = true;
               }

               x = get();
               h = hex( x );

               if( h >= 0 )
               {
                  hh += h;
               } else {
                  fail = true;
               }

               if( fail == false )
               {
                  _data.push_back( hh );
               } else {
                  _errors.report( *this ) << "Invalid hexidecimal escape sequence inside string literal";
               }
            }
            break;

         default:
            _errors.report( *this ) << "Unrecognized escape sequence \\" << c2 << "inside string literal";
         }
      } else if( c == EOF ) {
         _errors.report( *this ) << "Unexpected end of file inside string literal";
         break;
      } else {
         _data.push_back( c );
      }
   }

   return token;
}

Token::Ptr Lexer::had_zero()
{
   int c = get();

   if( c == 'b' )
   {
      return had_binary();
   } else if( c == 'x' ) {
      return had_hexadecimal();
   } else if(
      ((c >= '0') && (c <= '9')) ||
      (c == '.')
      )
   {
      return had_number();
   }

   unget();

   return Token::Ptr( new Token( *this, Token::T_INTEGER ) );
}

Token::Ptr Lexer::had_binary()
{
   Token::Ptr token = Token::Ptr( new Token( *this, Token::T_BINARY ) );

   while( true )
   {
      int c = get();

      if( (c == '0') || (c == '1') )
      {
         _data.push_back( c );
      } else if( c == '_' ) {
         // drop the separator
      } else {
         unget();
         break;
      }
   }

   if( _data.length() == 0 )
   {
      _errors.report( *this ) << "Zero length binary literal";
   }

   return token;
}

Token::Ptr Lexer::had_hexadecimal()
{
   Token::Ptr token = Token::Ptr( new Token( *this, Token::T_HEXADECIMAL ) );

   while( true )
   {
      int c = get();

      if(
         ((c >= '0') && (c <= '9')) ||
         ((c >= 'A') && (c <= 'F'))
         )
      {
         _data.push_back( c );
      } else if( (c >= 'a') && (c <= 'f') ) {
         _data.push_back( c - 'a' + 'A' );
      } else if( c == '_' ) {
         // drop the separator
      } else {
         unget();
         break;
      }
   }

   if( _data.length() == 0 )
   {
      _errors.report( *this ) << "Zero length hexadecimal literal";
   }

   return token;
}

Token::Ptr Lexer::had_number()
{
   Token::Ptr token = Token::Ptr( new Token( *this, Token::T_INTEGER ) );
   unget();

   while( true )
   {
      int c = get();

      if( (c >= '0') && (c <= '9') )
      {
         _data.push_back( c );
      } else if( (c == '.') || (c == 'e') ) {
         unget();
         return had_real( token );
      } else {
         unget();
         break;
      }
   }

   return token;
}

Token::Ptr Lexer::had_real( Token::Ptr token )
{
   bool dot = false;
   bool exp = false;

   token->mutate( Token::T_REAL );

   while( true )
   {
      int c = get();

      if( (c >= '0') && (c <= '9') )
      {
         _data.push_back( c );
      } else if( (c == '.') && !dot ) {
         dot = true;
         _data.push_back( c );
      } else if( (c == 'e') && !exp ) {
         dot = true;
         exp = true;
         _data.push_back( c );

         c = get();

         if( (c == '+') || (c == '-') )
         {
            _data.push_back( c );
         } else {
            unget();
         }
      } else if( c == 'i' ) {
         token->mutate( Token::T_IMAGINARY );
         break;
      } else {
         unget();
         break;
      }
   }

   return token;
}

Token::Ptr Lexer::had_identifier()
{
   Token::Ptr token = Token::Ptr( new Token( *this, Token::T_IDENTIFIER ) );
   unget();

   while( true )
   {
      int c = get();

      if(
         (c == '_') ||
         ((c >= 'A') && (c <= 'Z')) ||
         ((c >= 'a') && (c <= 'z')) ||
         ((c >= '0') && (c <= '9'))
         )
      {
         _data.push_back( c );
      } else {
         unget();
         break;
      }
   }

   return token;
}

Token::Ptr Lexer::had_symbol()
{
   Token::Ptr token = Token::Ptr( new Token( *this, Token::T_UNKNOWN_SYMBOL ) );
   unget();

   while( true )
   {
      int c = get();

      if( is_symbol( c ) )
      {
         _data.push_back( c );

         if( !Token::is_symbol( _data ) )
         {
            _data.resize( _data.length() - 1 );
            unget();
            break;
         }
      } else {
         unget();
         break;
      }
   }

   return token;
}

bool Lexer::is_symbol( int c )
{
   return
      (c == '!') ||
      ((c >= '#') && (c <= '/')) ||
      ((c >= ':') && (c <= '@')) ||
      ((c >= '[') && (c <= '`')) ||
      ((c >= '{') && (c <= '~'));
}

int Lexer::hex( int x )
{
   if( (x >= '0') && (x <= '9') )
   {
      return (x - '0');
   } else if( (x >= 'a') && (x <= 'f') ) {
      return (x - 'a' + 10);
   } else if( (x >= 'A') && (x <= 'F') ) {
      return (x - 'A' + 10);
   }

   return -1;
}
