textutils.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. # vim:set et ts=4 sw=4:
  2. """Text utility functions
  3. @contact: Debian FTP Master <ftpmaster@debian.org>
  4. @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006 James Troup <james@nocrew.org>
  5. @license: GNU General Public License version 2 or later
  6. """
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation; either version 2 of the License, or
  10. # (at your option) any later version.
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. import email.header
  19. import six
  20. from .dak_exceptions import *
  21. from .regexes import re_parse_maintainer
  22. ################################################################################
  23. def force_to_utf8(s):
  24. """
  25. Forces a string to UTF-8. If the string isn't already UTF-8,
  26. it's assumed to be ISO-8859-1.
  27. """
  28. if isinstance(s, six.text_type):
  29. return s
  30. try:
  31. six.text_type(s, 'utf-8')
  32. return s
  33. except UnicodeError:
  34. latin1_s = six.text_type(s, 'iso8859-1')
  35. return latin1_s.encode('utf-8')
  36. def rfc2047_encode(s):
  37. """
  38. Encodes a (header) string per RFC2047 if necessary. If the
  39. string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1.
  40. """
  41. for enc in ('ascii', 'utf-8', 'iso-8859-1'):
  42. try:
  43. return email.header.Header(s, enc, 998).encode()
  44. except UnicodeError:
  45. pass
  46. # If we get here, we're boned beyond belief
  47. return ''
  48. ################################################################################
  49. # <Culus> 'The standard sucks, but my tool is supposed to interoperate
  50. # with it. I know - I'll fix the suckage and make things
  51. # incompatible!'
  52. def fix_maintainer(maintainer):
  53. """
  54. Parses a Maintainer or Changed-By field and returns:
  55. 1. an RFC822 compatible version,
  56. 2. an RFC2047 compatible version,
  57. 3. the name
  58. 4. the email
  59. The name is forced to UTF-8 for both 1. and 3.. If the name field
  60. contains '.' or ',' (as allowed by Debian policy), 1. and 2. are
  61. switched to 'email (name)' format.
  62. """
  63. maintainer = maintainer.strip()
  64. if not maintainer:
  65. return ('', '', '', '')
  66. if maintainer.find("<") == -1:
  67. email = maintainer
  68. name = ""
  69. elif (maintainer[0] == "<" and maintainer[-1:] == ">"):
  70. email = maintainer[1:-1]
  71. name = ""
  72. else:
  73. m = re_parse_maintainer.match(maintainer)
  74. if not m:
  75. raise ParseMaintError("Doesn't parse as a valid Maintainer field.")
  76. name = m.group(1)
  77. email = m.group(2)
  78. # Get an RFC2047 compliant version of the name
  79. rfc2047_name = rfc2047_encode(name)
  80. # Force the name to be UTF-8
  81. name = force_to_utf8(name)
  82. if name.find(',') != -1 or name.find('.') != -1:
  83. rfc822_maint = "%s (%s)" % (email, name)
  84. rfc2047_maint = "%s (%s)" % (email, rfc2047_name)
  85. else:
  86. rfc822_maint = "%s <%s>" % (name, email)
  87. rfc2047_maint = "%s <%s>" % (rfc2047_name, email)
  88. if email.find("@") == -1 and email.find("buildd_") != 0:
  89. raise ParseMaintError("No @ found in email address part.")
  90. return (rfc822_maint, rfc2047_maint, name, email)
  91. ################################################################################
  92. def split_uploaders(field):
  93. import re
  94. for u in re.sub(">[ ]*,", ">\t", field).split("\t"):
  95. u = u.strip()
  96. # Trailing commas will give an empty final uploader
  97. if u:
  98. yield u