123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- #!/usr/bin/python
- from codecs import open
- import sys
- import os
- from lxml import etree
- COMMENT_DIR = 'comments'
- def wp2comments(xml):
- # xmlfile = open(xml, encoding='utf-8', mode='r').read()
- tree = etree.parse(xml)
- root = tree.getroot()
- items = root.findall('.//item')
- n = 0
- if not os.path.exists(COMMENT_DIR):
- os.makedirs(COMMENT_DIR)
- elif not os.path.isdir(COMMENT_DIR):
- print('"%s" exists but is not a directory!' % COMMENT_DIR)
- sys.exit(1)
- for item in items:
- title = item.find('title')
- if title is None:
- continue
- # Only fetch comments from published posts.
- status = item.find('wp:status', namespaces=root.nsmap)
- if status is None or status.text != 'publish':
- continue
- slug = item.find('wp:post_name', namespaces=root.nsmap)
- if slug is None:
- print('WARNING: skipping "%s" with no post_name')
- continue
- slug = slug.text
- comments = item.findall('wp:comment', namespaces=root.nsmap)
- if not comments:
- # No comments found for this post.
- continue
- for comment in comments:
- def comment_tag(tag):
- result = comment.find(tag, namespaces=root.nsmap)
- if result is None:
- return ''
- else:
- return result.text
- status = comment_tag('wp:comment_approved')
- if status != '1':
- continue
- author = comment_tag('wp:comment_author')
- ip = comment_tag('wp:comment_author_IP')
- date = comment_tag('wp:comment_date_gmt')
- email = comment_tag('wp:comment_author_email')
- url = comment_tag('wp:comment_author_url')
- content = comment_tag('wp:comment_content')
- n += 1
- f = open(os.path.join(COMMENT_DIR, '%s-%d.md' % (slug, n)),
- encoding='utf-8', mode='w')
- f.write(u'post_id: %s\n' % (slug, ))
- f.write(u'Author: %s\n' % (author, ))
- f.write(u'Date: %s\n' % (date, ))
- f.write(u'Author_Email: %s\n' % (email, ))
- f.write(u'Author_IP: %s\n' % (ip, ))
- f.write(u'Web: %s\n' % (url, ))
- f.write(u'\n%s\n' % (content, ))
- f.close()
- if __name__ == '__main__':
- wp2comments(sys.argv[1])
|