wp2comments 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. #!/usr/bin/python
  2. from codecs import open
  3. import sys
  4. import os
  5. from lxml import etree
  6. COMMENT_DIR = 'comments'
  7. def wp2comments(xml):
  8. # xmlfile = open(xml, encoding='utf-8', mode='r').read()
  9. tree = etree.parse(xml)
  10. root = tree.getroot()
  11. items = root.findall('.//item')
  12. n = 0
  13. if not os.path.exists(COMMENT_DIR):
  14. os.makedirs(COMMENT_DIR)
  15. elif not os.path.isdir(COMMENT_DIR):
  16. print('"%s" exists but is not a directory!' % COMMENT_DIR)
  17. sys.exit(1)
  18. for item in items:
  19. title = item.find('title')
  20. if title is None:
  21. continue
  22. # Only fetch comments from published posts.
  23. status = item.find('wp:status', namespaces=root.nsmap)
  24. if status is None or status.text != 'publish':
  25. continue
  26. slug = item.find('wp:post_name', namespaces=root.nsmap)
  27. if slug is None:
  28. print('WARNING: skipping "%s" with no post_name')
  29. continue
  30. slug = slug.text
  31. comments = item.findall('wp:comment', namespaces=root.nsmap)
  32. if not comments:
  33. # No comments found for this post.
  34. continue
  35. for comment in comments:
  36. def comment_tag(tag):
  37. result = comment.find(tag, namespaces=root.nsmap)
  38. if result is None:
  39. return ''
  40. else:
  41. return result.text
  42. status = comment_tag('wp:comment_approved')
  43. if status != '1':
  44. continue
  45. author = comment_tag('wp:comment_author')
  46. ip = comment_tag('wp:comment_author_IP')
  47. date = comment_tag('wp:comment_date_gmt')
  48. email = comment_tag('wp:comment_author_email')
  49. url = comment_tag('wp:comment_author_url')
  50. content = comment_tag('wp:comment_content')
  51. n += 1
  52. f = open(os.path.join(COMMENT_DIR, '%s-%d.md' % (slug, n)),
  53. encoding='utf-8', mode='w')
  54. f.write(u'post_id: %s\n' % (slug, ))
  55. f.write(u'Author: %s\n' % (author, ))
  56. f.write(u'Date: %s\n' % (date, ))
  57. f.write(u'Author_Email: %s\n' % (email, ))
  58. f.write(u'Author_IP: %s\n' % (ip, ))
  59. f.write(u'Web: %s\n' % (url, ))
  60. f.write(u'\n%s\n' % (content, ))
  61. f.close()
  62. if __name__ == '__main__':
  63. wp2comments(sys.argv[1])