用Python写一个文本转HTML的脚本

来源：互联网发布：程序侠源码编辑：程序博客网时间：2024/04/28 03:24

在实验楼上看的，自己看了一下感觉用处很大，代码也不难懂，特分享一下。

给定一个.txt文件，将其转换成HTML网页。

我们先来编写代码：

#!/usr/bin/python# encoding: utf-8#util.pydef lines(file):    """    生成器,在文本最后加一空行    """    for line in file: yield line    yield '\n'def blocks(file):    """    生成器,生成单独的文本块    """    block = []    for line in lines(file):        if line.strip():            block.append(line)        elif block:            yield ''.join(block).strip()            block = []

#!/usr/bin/python# encoding: utf-8#handlers.pyclass Handler:    """    处理程序父类    """    def callback(self, prefix, name, *args):        method = getattr(self, prefix + name, None)        if callable(method): return method(*args)    def start(self, name):        self.callback('start_', name)    def end(self, name):        self.callback('end_', name)    def sub(self, name):        def substitution(match):            result = self.callback('sub_', name, match)            if result is None: result = match.group(0)            return result        return substitutionclass HTMLRenderer(Handler):    """    HTML 处理程序,给文本块加相应的 HTML 标记    """    def start_document(self):        print ('<html><head><title>ShiYanLou</title></head><body>')    def end_document(self):        print ('</body></html>')    def start_paragraph(self):        print ('<p style="color: #444;">')    def end_paragraph(self):        print ('</p>')    def start_heading(self):        print ('<h2 style="color: #68BE5D;">')    def end_heading(self):        print ('</h2>')    def start_list(self):        print ('<ul style="color: #363736;">')    def end_list(self):        print ('</ul>')    def start_listitem(self):        print ('<li>')    def end_listitem(self):        print ('</li>')    def start_title(self):        print ('<h1 style="color: #1ABC9C;">')    def end_title(self):        print ('</h1>')    def sub_emphasis(self, match):        return '<em>%s</em>' % match.group(1)    def sub_url(self, match):        return '<a target="_blank" style="text-decoration: none;color: #BC1A4B;" href="%s">%s</a>' % (match.group(1), match.group(1))    def sub_mail(self, match):        return '<a style="text-decoration: none;color: #BC1A4B;" href="mailto:%s">%s</a>' % (match.group(1), match.group(1))    def feed(self, data):        print (data)

#!/usr/bin/python# encoding: utf-8#rules.pyclass Rule:    """    规则父类    """    def action(self, block, handler):        """        加标记        """        handler.start(self.type)        handler.feed(block)        handler.end(self.type)        return Trueclass HeadingRule(Rule):    """    一号标题规则    """    type = 'heading'    def condition(self, block):        """        判断文本块是否符合规则        """        return not '\n' in block and len(block) <= 70 and not block[-1] == ':'class TitleRule(HeadingRule):    """    二号标题规则    """    type = 'title'    first = True    def condition(self, block):        if not self.first: return False        self.first = False        return HeadingRule.condition(self, block);class ListItemRule(Rule):    """    列表项规则    """    type = 'listitem'    def condition(self, block):        return block[0] == '-'    def action(self, block, handler):        handler.start(self.type)        handler.feed(block[1:].strip())        handler.end(self.type)        return Trueclass ListRule(ListItemRule):    """    列表规则    """    type = 'list'    inside = False    def condition(self, block):        return True    def action(self, block, handler):        if not self.inside and ListItemRule.condition(self, block):            handler.start(self.type)            self.inside = True        elif self.inside and not ListItemRule.condition(self, block):            handler.end(self.type)            self.inside = False        return Falseclass ParagraphRule(Rule):    """    段落规则    """    type = 'paragraph'    def condition(self, block):        return True

#!/usr/bin/python# encoding: utf-8#makeup.pyimport sys, refrom handlers import *from util import *from rules import *class Parser:    """    解析器父类    """    def __init__(self, handler):        self.handler = handler        self.rules = []        self.filters = []    def addRule(self, rule):        """        添加规则        """        self.rules.append(rule)    def addFilter(self, pattern, name):        """        添加过滤器        """        def filter(block, handler):            return re.sub(pattern, handler.sub(name), block)        self.filters.append(filter)    def parse(self, file):        """        解析        """        self.handler.start('document')        for block in blocks(file):            for filter in self.filters:                block = filter(block, self.handler)            for rule in self.rules:                if rule.condition(block):                    last = rule.action(block, self.handler)                    if last: break        self.handler.end('document')class BasicTextParser(Parser):    """    纯文本解析器    """    def __init__(self, handler):        Parser.__init__(self, handler)        self.addRule(ListRule())        self.addRule(ListItemRule())        self.addRule(TitleRule())        self.addRule(HeadingRule())        self.addRule(ParagraphRule())        self.addFilter(r'\*(.+?)\*', 'emphasis')        self.addFilter(r'(http://[\.a-zA-Z/]+)', 'url')        self.addFilter(r'([\.a-zA-Z]+@[\.a-zA-Z]+[a-zA-Z]+)', 'mail')"""运行程序"""handler = HTMLRenderer()parser = BasicTextParser(handler)parser.parse(sys.stdin)

写完之后，让我们来测试一下。

看，大功告成

0 0