2019-04-13 21:48:29 Python

【Python】レキシカルアナライザ作ってみた件【字句解析】 パーサーは鋭意制作中?

Copy Copied! Full
#!/usr/bin/env python # encoding: utf-8 """ tokenizer.py Created by Frodo on 4/12/19. Copyright (c) 2019 Frodo. All rights reserved. """ import re from re import I, S class Token: def __init__(self, face, kind): self.face = face self.kind = kind def __str__(self): return f"<Token [face: {self.face}, kind: {self.kind}]>" def __repr__(self): return str(self) def some(name, priority, *tokens): @property def impl(self): m = re.match('|'.join(tokens), self.current, S|I) if not m: return None self.index += m.end() + 1 return Token(m.group(0), name) return impl, priority def pattern(name, priority, pattern): @property def impl(self): m = re.match(pattern, self.current, S|I) if not m: return None self.index += m.end() + 1 return Token(m.group(0), name) return impl, priority lit = r'[0-9]+\.[0-9]*|[0-9]*\.[0-9]+' class LexicalAnalyzer: def __init__(self, text): self.index = 0 self.text = text @property def is_eot(self): return self.text[self.index:] == '' @property def current(self): return self.text[self.index:] @property def is_skip_char(self): return self.current[0] in "\n\r\t\v  " @property def error(self): raise SyntaxError('Invalid character: {self.current[0]}') def __init_subclass__(cls, **ka): if not hasattr(cls, 'tokens'): raise TypeError("no token definitions found.") for name, val in cls.tokens.items(): setattr(cls, name, val[0]) cls.tokens = sorted( list(cls.tokens), key=lambda x: cls.tokens[x][1], reverse=True) def __next__(self): c = self.current self.index += 1 return c def tokenize(self): while self.is_skip_char: next(self) return next(filter(lambda x: x is not None, map(lambda n: getattr(self, n), self.tokens))) @classmethod def feed(cls, txt): lexer = cls(txt) tokens = [] while not lexer.is_eot: tokens.append(lexer.tokenize()) return tokens class TestLexer(LexicalAnalyzer): tokens = { 'ident': pattern( 'ident', -1, r'\S+'), 'reserved': some( 'reserved', 0, 'if', 'when', 'while', 'of', 'in', 'as', 'on', 'each', 'const', 'val', 'var'), 'start_bracket': some( 'start-bracket', 1, r'\[', '<', r'\(', '\{'), 'end_bracket': some( 'end-bracket', 1, r'\]', '>', r'\)', '\}'), 'spec_symbol': some( 'spec-symbol', 1, ':', '::', '->', '='), 'comment': pattern( 'comment', 1, r'//[^\n]*(?:\n|$)|/\*(?:\*(?!/)|[^*])*\*/'), 'string': pattern( 'string', 2, r'\$?"(?:[^"\n\r]|\\")*"|@"(?:[^"]|\\")*"'), 'char': pattern( 'char', 2, r"'\\(?:u[0-9a-f]{4}|0[0-8]{2}|x[0-9a-f]{2}|[a-z])|\S|' '"), 'integer': pattern( 'integer', 2, r'[0-9]+|0x[0-9a-f]+|0b[01]+'), 'decimal': pattern( 'decimal', 3, rf'(?:{lit}|[0-9]+)e[-+]?[0-9]+|{lit}|nan|inf|infinity') } if __name__ == '__main__': print(TestLexer.feed('''val x = 12e3'''))
【Python】レキシカルアナライザ作ってみた件【字句解析】

パーサーは鋭意制作中?