textutils.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. # vim:set et ts=4 sw=4:
  2. """Text utility functions
  3. @contact: Debian FTP Master <ftpmaster@debian.org>
  4. @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006 James Troup <james@nocrew.org>
  5. @license: GNU General Public License version 2 or later
  6. """
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation; either version 2 of the License, or
  10. # (at your option) any later version.
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. import email.header
  19. from .dak_exceptions import *
  20. from .regexes import re_parse_maintainer
  21. ################################################################################
  22. def rfc2047_encode(s):
  23. """
  24. Encodes a (header) string per RFC2047 if necessary. If the
  25. string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1.
  26. """
  27. for enc in ('ascii', 'utf-8', 'iso-8859-1'):
  28. try:
  29. return email.header.Header(s, enc, 998).encode()
  30. except UnicodeEncodeError:
  31. pass
  32. # If we get here, we're boned beyond belief
  33. raise RuntimeError("Failed to encode string")
  34. ################################################################################
  35. # <Culus> 'The standard sucks, but my tool is supposed to interoperate
  36. # with it. I know - I'll fix the suckage and make things
  37. # incompatible!'
  38. def fix_maintainer(maintainer: str) -> tuple[str, str, str, str]:
  39. """
  40. Parses a Maintainer or Changed-By field and returns:
  41. 1. an RFC822 compatible version,
  42. 2. an RFC2047 compatible version,
  43. 3. the name
  44. 4. the email
  45. The name is forced to UTF-8 for both 1. and 3.. If the name field
  46. contains '.' or ',' (as allowed by Debian policy), 1. and 2. are
  47. switched to 'email (name)' format.
  48. """
  49. maintainer = maintainer.strip()
  50. if not maintainer:
  51. return ('', '', '', '')
  52. if maintainer.find("<") == -1:
  53. email = maintainer
  54. name = ""
  55. elif (maintainer[0] == "<" and maintainer[-1:] == ">"):
  56. email = maintainer[1:-1]
  57. name = ""
  58. else:
  59. m = re_parse_maintainer.match(maintainer)
  60. if not m:
  61. raise ParseMaintError("Doesn't parse as a valid Maintainer field.")
  62. name = m.group(1)
  63. email = m.group(2)
  64. # Get an RFC2047 compliant version of the name
  65. rfc2047_name = rfc2047_encode(name)
  66. if name.find(',') != -1 or name.find('.') != -1:
  67. rfc822_maint = "%s (%s)" % (email, name)
  68. rfc2047_maint = "%s (%s)" % (email, rfc2047_name)
  69. else:
  70. rfc822_maint = "%s <%s>" % (name, email)
  71. rfc2047_maint = "%s <%s>" % (rfc2047_name, email)
  72. if email.find("@") == -1 and email.find("buildd_") != 0:
  73. raise ParseMaintError("No @ found in email address part.")
  74. return (rfc822_maint, rfc2047_maint, name, email)
  75. ################################################################################
  76. def split_uploaders(field):
  77. import re
  78. for u in re.sub(">[ ]*,", ">\t", field).split("\t"):
  79. u = u.strip()
  80. # Trailing commas will give an empty final uploader
  81. if u:
  82. yield u