import lex

# List of token names. This is compulsory.
tokens = (
                'NUM',
                'VAR',
                'EQUALS',
                'ADDOP',
                'SUBOP'
                'MULOP',
                'DIVOP',
                'LPAREN',
                'RPAREN'
        )

# Regular statement rules for tokens.
t_VAR           = r'[a-zA-Z_][\w_]*'
t_EQUALS        = r'='
t_ADDOP         = r'\+'
t_SUBOP         = r'-'
t_MULOP         = r'\*'
t_DIVOP         = r'/'
t_LPAREN        = r'\('
t_RPAREN        = r'\)'

# A regular statement rule with some action code.
def t_NUM(t) :
    r'\d+'
    try:
         t.value = int(t.value)
    except ValueError:
         print "Line %d: Number %s is too large!" % (t.lineno, t.value)
    t.value = 0
    return t

# Define a rule so that we can track line numbers.
def t_newline(t):
    r'\n+'
    t.lineno += len(t.value)

# A string containing ignored characters (spaces and tabs).
t_ignore  = ' \t'

# Error handling rule
def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    t.skip(1)

# Build the lexer
lex.lex()

# Get the input
data = raw_input()

lex.input(data)

# Tokenize
while 1 :
        tok = lex.token()
        if not tok :
                break
        print tok

                     If you want to include reserved words, it is usually
   easier to just match a variable name (identifier) and do a special
   name lookup in a function like this:
   
reserved = {
   'if' : 'IF',
   'then' : 'THEN',
   'else' : 'ELSE',
   'while' : 'WHILE',
   ...
}

def t_VAR(t):
    r'[a-zA-Z_][\w_]*'
    t.type = reserved.get(t.value,'ID')    # Check for reserved words
    return t
