博客从 typecho 迁移到 Hexo - 特殊字符和python3修改版

代码是修改了zhourongyu在github提供的Typecho2Hexo

修改部分:

  1. title含有特殊字符:如果typecho的文章title有一些特殊字符如:[email protected]#$%^&*()[]{};:,./<>?\|~-=_+`是会出现导出失败的,所以需要修改。
  2. torndb版本过旧: torndb需要用到mysqldb支持,不过一直都安装不上,后来才发现是版本过旧的原因,唯有安装python3的版本。

代码如下:

# -*- coding: utf-8 -*-

import codecs
import os
import torndb_for_python3
import arrow

def create_data(db):
    # 创建分类和标签
    categories = db.query("select type, slug, name from typecho_metas")
    for cate in categories:
        path = 'data/%s' % cate.slug
        if not os.path.exists(path):
            os.makedirs(path)
        f = codecs.open('%s/index.md' % path, 'w', "utf-8")
        f.write("title: %s\n" % cate.slug)
        f.write("date: %s\n" % arrow.now().format('YYYY-MM-DD HH:mm:ss'))
        # 区分分类和标签
        if cate.type == 'category':
            f.write('type: "categories"\n')
        elif cate.type == 'tags':
            f.write('type: "tags"\n')
        # 禁止评论
        f.write("comments: true\n")
        f.write("---\n")
        f.close()

    # 创建文章
    entries = db.query("select cid, title, slug, text, created from typecho_contents where type='post'")
    for e in entries:
        title = e.title.translate ({ord(c): " " for c in "[email protected]#$%^&*()[]{};:,./<>?\|`~-=_+"})
        urlname = e.slug
        print(title)
        content = str(e.text).replace('<!--markdown-->', '')
        tags = []
        category = ""
        # 找出文章的tag及category
        metas = db.query(
            "select type, name, slug from `typecho_relationships` ts, typecho_metas tm where tm.mid = ts.mid and ts.cid = %s",
            e.cid)
        for m in metas:
            if m.type == 'tag':
                tags.append(m.name)
            if m.type == 'category':
                category = m.slug
        path = 'data/_posts/'
        if not os.path.exists(path):
            os.makedirs(path)
        f = codecs.open(r"%s%s.md" % (path,title), 'w', "utf-8")
        f.write("---\n")
        f.write("title: %s\n" % title)
        f.write("date: %s\n" % arrow.get(e.created).format('YYYY-MM-DD HH:mm:ss'))
        f.write("categories: %s\n" % category)
        f.write("tags: [%s]\n" % ','.join(tags))
        f.write("urlname: %s\n" % urlname)
        f.write("---\n")
        f.write(content)
        f.close()


def main():
    # 把数据库连接信息
    db = torndb_for_python3.Connection(host="xxxxxxxxxxxx", database="xxxxxxxxx", user="xxxxxxxxx", password="xxxxxxxx")
    create_data(db)

if __name__ == "__main__":
    main()

参考:
Typecho2Hexo

replace-special-characters-in-a-string-python

评论

暂无

添加新评论