textutils.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. #!/usr/bin/env python
  2. # vim:set et ts=4 sw=4:
  3. """Text utility functions
  4. @contact: Debian FTP Master <ftpmaster@debian.org>
  5. @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006 James Troup <james@nocrew.org>
  6. @license: GNU General Public License version 2 or later
  7. """
  8. # This program is free software; you can redistribute it and/or modify
  9. # it under the terms of the GNU General Public License as published by
  10. # the Free Software Foundation; either version 2 of the License, or
  11. # (at your option) any later version.
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program; if not, write to the Free Software
  18. # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19. import email.header
  20. import six
  21. from .dak_exceptions import *
  22. from .regexes import re_parse_maintainer
  23. ################################################################################
  24. def force_to_utf8(s):
  25. """
  26. Forces a string to UTF-8. If the string isn't already UTF-8,
  27. it's assumed to be ISO-8859-1.
  28. """
  29. if isinstance(s, six.text_type):
  30. return s
  31. try:
  32. six.text_type(s, 'utf-8')
  33. return s
  34. except UnicodeError:
  35. latin1_s = six.text_type(s, 'iso8859-1')
  36. return latin1_s.encode('utf-8')
  37. def rfc2047_encode(s):
  38. """
  39. Encodes a (header) string per RFC2047 if necessary. If the
  40. string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1.
  41. """
  42. for enc in ('ascii', 'utf-8', 'iso-8859-1'):
  43. try:
  44. return email.header.Header(s, enc, 998).encode()
  45. except UnicodeError:
  46. pass
  47. # If we get here, we're boned beyond belief
  48. return ''
  49. ################################################################################
  50. # <Culus> 'The standard sucks, but my tool is supposed to interoperate
  51. # with it. I know - I'll fix the suckage and make things
  52. # incompatible!'
  53. def fix_maintainer(maintainer):
  54. """
  55. Parses a Maintainer or Changed-By field and returns:
  56. 1. an RFC822 compatible version,
  57. 2. an RFC2047 compatible version,
  58. 3. the name
  59. 4. the email
  60. The name is forced to UTF-8 for both 1. and 3.. If the name field
  61. contains '.' or ',' (as allowed by Debian policy), 1. and 2. are
  62. switched to 'email (name)' format.
  63. """
  64. maintainer = maintainer.strip()
  65. if not maintainer:
  66. return ('', '', '', '')
  67. if maintainer.find("<") == -1:
  68. email = maintainer
  69. name = ""
  70. elif (maintainer[0] == "<" and maintainer[-1:] == ">"):
  71. email = maintainer[1:-1]
  72. name = ""
  73. else:
  74. m = re_parse_maintainer.match(maintainer)
  75. if not m:
  76. raise ParseMaintError("Doesn't parse as a valid Maintainer field.")
  77. name = m.group(1)
  78. email = m.group(2)
  79. # Get an RFC2047 compliant version of the name
  80. rfc2047_name = rfc2047_encode(name)
  81. # Force the name to be UTF-8
  82. name = force_to_utf8(name)
  83. if name.find(',') != -1 or name.find('.') != -1:
  84. rfc822_maint = "%s (%s)" % (email, name)
  85. rfc2047_maint = "%s (%s)" % (email, rfc2047_name)
  86. else:
  87. rfc822_maint = "%s <%s>" % (name, email)
  88. rfc2047_maint = "%s <%s>" % (rfc2047_name, email)
  89. if email.find("@") == -1 and email.find("buildd_") != 0:
  90. raise ParseMaintError("No @ found in email address part.")
  91. return (rfc822_maint, rfc2047_maint, name, email)
  92. ################################################################################
  93. def split_uploaders(field):
  94. import re
  95. for u in re.sub(">[ ]*,", ">\t", field).split("\t"):
  96. u = u.strip()
  97. # Trailing commas will give an empty final uploader
  98. if u:
  99. yield u