fix_date.sh 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. #!/bin/sh
  2. # * Copyright 2004 Tristan Chabredier <wwp@claws-mail.org>
  3. # *
  4. # * This file is free software; you can redistribute it and/or modify it
  5. # * under the terms of the GNU General Public License as published by
  6. # * the Free Software Foundation; either version 3 of the License, or
  7. # * (at your option) any later version.
  8. # *
  9. # * This program is distributed in the hope that it will be useful, but
  10. # * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. # * General Public License for more details.
  13. # *
  14. # * You should have received a copy of the GNU General Public License
  15. # * along with this program; if not, write to the Free Software
  16. # * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  17. #
  18. # fix_date.sh helper script to fix non-standard date or add missing
  19. # date header to emails
  20. # usage: fix_date.sh <filename> [<filename> ..]
  21. # It will replace the Date: value w/ the one picked up from more recent
  22. # Fetchinfo time header, Received: field.. Otherwise, it will take the file
  23. # modification time (using a RFC 2822-compliant form).
  24. # Any already existing X-Original-Date is kept, if missing we're adding it
  25. # if the Date: was set (even if set w/ non conform value)
  26. # TODO: fallback to X-OriginalArrivalTime: ?
  27. VERSION="0.1.4"
  28. version()
  29. {
  30. echo "$VERSION"
  31. exit 0
  32. }
  33. usage()
  34. {
  35. echo "usage:"
  36. echo " ${0##*/} [<switches>] <filename> [<filename> ..]"
  37. echo "switches:"
  38. echo " -h --help display this help then exit"
  39. echo " -v --version display version information then exit"
  40. echo " -d --debug turn on debug information (be more verbose)"
  41. echo " -f --force always force (re-)writing of Date: header"
  42. echo " -r --rfc force re-writing of Date: header when it's not RFC-compliant"
  43. echo " -s --strict use RFC-strict matching patterns for dates"
  44. echo " -- end of switches (in case a filename starts with a -)"
  45. echo "this script requires coreutils (cat, cut, head, tr), dos2unix, grep and set"
  46. echo "in PATH to work"
  47. exit $1
  48. }
  49. date_valid()
  50. {
  51. test $STRICT -eq 1 && \
  52. REGEXP="$DATE_REGEXP_STRICT" || \
  53. REGEXP="$DATE_REGEXP"
  54. echo "$1" | grep -qEim 1 "$REGEXP"
  55. DATE_VALID=$?
  56. }
  57. dump_date_fields()
  58. {
  59. test -z "$X_ORIGINAL_DATE" -a -n "$DATE" && \
  60. echo "X-Original-Date:$DATE" >> "$TMP"
  61. echo "Date:$REPLACEMENT_DATE" >> "$TMP"
  62. }
  63. # use --force to always (re-)write the Date header
  64. # otherwise, the Date header will be written if only it doesn't exist
  65. FORCE=0
  66. # use --rfc to (re-)write the Date header when it's not RFC-compliant
  67. # otherwise, the Date header will be written if only it doesn't exist
  68. RFC=0
  69. # use --debug to display more information about what's performed
  70. DEBUG=0
  71. # use --strict to use strict matching patterns for date validation
  72. STRICT=0
  73. # 0 = valid, always valid until --strict is used, then date_valid overrides this value
  74. DATE_VALID=0
  75. # max header lines (300 is a reasonable minimum value but 600 has already been encountered, set to 1000 by security)
  76. MAX_HEADER_LINES=1000
  77. while [ -n "$1" ]
  78. do
  79. case "$1" in
  80. -h|--help) usage 0;;
  81. -v|--version) version;;
  82. -f|--force) FORCE=1;;
  83. -d|--debug) DEBUG=1;;
  84. -r|--rfc) RFC=1;;
  85. -s|--strict) STRICT=1;;
  86. --) shift
  87. break;;
  88. -*) echo "error: unrecognized switch '$1'"
  89. usage 1;;
  90. *) break;;
  91. esac
  92. shift
  93. done
  94. if [ $FORCE -eq 1 -a $RFC -eq 1 ]
  95. then
  96. echo "error: use either --force or --rfc, but not both at the same time"
  97. usage 1
  98. fi
  99. test $# -lt 1 && \
  100. usage 1
  101. for PROG in dos2unix grep sed
  102. do
  103. type "$PROG" >/dev/null 2>&1 || \
  104. { echo "error: $PROG not found in PATH"; exit 1; }
  105. done
  106. TMPDIR="/tmp"
  107. TMP="$TMPDIR/${0##*/}.$$.tmp"
  108. echo > "$TMP" >/dev/null 2>&1
  109. if [ $? -eq 0 ]
  110. then
  111. rm -f "$TMP" >/dev/null 2>&1
  112. else
  113. TMPDIR="$HOME"
  114. TMP="$TMPDIR/${0##*/}.$$.tmp"
  115. fi
  116. HEADERS="$TMPDIR/${0##*/}.$$.headers.tmp"
  117. BODY="$TMPDIR/${0##*/}.$$.body.tmp"
  118. DATE_REGEXP='( (Mon|Tue|Wed|Thu|Fri|Sat|Sun),)? [0-9]+ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]+ [0-9]+:[0-9]+:[0-9]+ [+-]?[0-9]+'
  119. DATE_REGEXP_STRICT='(Mon|Tue|Wed|Thu|Fri|Sat|Sun), [0-9]+ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]+ [0-9]+:[0-9]+:[0-9]+ [+-]?[0-9]+'
  120. while [ -n "$1" ]
  121. do
  122. # skip if file is empty or doesn't exist
  123. if [ ! -s "$1" ]
  124. then
  125. test $DEBUG -eq 1 && \
  126. echo "$1: no found or empty, skipping"
  127. shift
  128. continue
  129. fi
  130. SKIP=0
  131. # split headers and body
  132. # find the empty line that separates body (if any) from headers,
  133. # work on a temporary dos2unix'ed copy because body might
  134. # contain DOS CRLF and grep '^$' won't work
  135. head -$MAX_HEADER_LINES "$1" | dos2unix > "$TMP"
  136. SEP=`grep -nEm1 "^$" "$TMP" 2>/dev/null | cut -d ':' -f 1`
  137. rm -f "$TMP"
  138. if [ -z "$SEP" -o "$SEP" = "0" -o $? -ne 0 ]
  139. then
  140. cp -f "$1" "$HEADERS"
  141. :> "$BODY"
  142. test $DEBUG -eq 1 && \
  143. echo "$1: no body part could be found before line $MAX_HEADER_LINES"
  144. else
  145. sed -n '1,'`expr $SEP - 1`'p' "$1" > "$HEADERS"
  146. sed '1,'`expr $SEP - 1`'d' "$1" > "$BODY"
  147. fi
  148. # work on headers only
  149. # get the Date and X-Original-Date
  150. X_ORIGINAL_DATE=`sed -n '/^X-Original-Date:/,/^[^\t]/p' "$HEADERS" | head -n -1 | cut -d ':' -f 2-`
  151. DATE=`sed -n '/^Date:/,/^[^\t]/p' "$HEADERS" | head -n -1 | cut -d ':' -f 2-`
  152. # work on headers, minus Date and X-Original-Date
  153. test -n "$X_ORIGINAL_DATE" && \
  154. sed -i '/^X-Original-Date:/,/^[^\t]/d' "$HEADERS"
  155. test -n "$DATE" && \
  156. sed -i '/^Date:/,/^[^\t]/d' "$HEADERS"
  157. # find a replacement date in Fetchinfo: header
  158. FETCH_DATE=`grep -im1 'X-FETCH-TIME: ' "$HEADERS" | cut -d ' ' -f 2-`
  159. # or in Received: headers ..
  160. test $STRICT -eq 1 && \
  161. REGEXP="$DATE_REGEXP" || \
  162. REGEXP="$DATE_REGEXP_STRICT"
  163. RECEIVED_DATE=`sed -n '/^Received:/,/^[^\t]/p' "$HEADERS" | head -n -1 | grep -Eoim 1 "$REGEXP"`
  164. # .. or from file properties
  165. FILE_DATE=`LC_ALL=POSIX LANG=POSIX ls -l --time-style="+%a, %d %b %Y %X %z" "$1" | tr -s ' ' | cut -d ' ' -f 6-11`
  166. # we could also use the system date as a possible replacement
  167. SYSTEM_DATE="`date -R`"
  168. # determine which replacement date to use
  169. if [ -z "$FETCH_DATE" ]
  170. then
  171. if [ -z "$RECEIVED_DATE" ]
  172. then
  173. # don't forget the leading whitespace here
  174. REPLACEMENT_DATE=" $FILE_DATE"
  175. REPLACEMENT="file date"
  176. # REPLACEMENT_DATE=" $SYSTEM_DATE"
  177. # REPLACEMENT="system date"
  178. else
  179. REPLACEMENT_DATE="$RECEIVED_DATE"
  180. REPLACEMENT="received date"
  181. fi
  182. else
  183. # don't forget the leading whitespace here
  184. REPLACEMENT_DATE=" $FETCH_DATE"
  185. REPLACEMENT="Fetchinfo time header"
  186. fi
  187. # ensure that the original X-Original-Date is kept
  188. :> "$TMP"
  189. if [ -n "$X_ORIGINAL_DATE" ]
  190. then
  191. echo "X-Original-Date:$X_ORIGINAL_DATE" >> "$TMP"
  192. fi
  193. # replace/set the date and write all lines
  194. test $RFC -eq 1 && \
  195. date_valid "$DATE"
  196. if [ -z "$DATE" ]
  197. then
  198. test $DEBUG -eq 1 && \
  199. echo "$1: date not found, using $REPLACEMENT now"
  200. dump_date_fields
  201. else
  202. if [ $FORCE -eq 1 ]
  203. then
  204. test $DEBUG -eq 1 && \
  205. echo "$1: date already found, replacing with $REPLACEMENT"
  206. dump_date_fields
  207. else
  208. if [ $RFC -eq 1 ]
  209. then
  210. if [ $DATE_VALID -ne 0 ]
  211. then
  212. test $DEBUG -eq 1 && \
  213. echo "$1: date already found but not RFC-compliant, replacing with $REPLACEMENT"
  214. dump_date_fields
  215. else
  216. test $DEBUG -eq 1 && \
  217. echo "$1: date already found and RFC-compliant, skipping"
  218. SKIP=1
  219. fi
  220. else
  221. test $DEBUG -eq 1 && \
  222. echo "$1: date already found, skipping"
  223. SKIP=1
  224. fi
  225. fi
  226. fi
  227. if [ $SKIP -eq 0 ]
  228. then
  229. # uncomment the following line to backup the original file
  230. #mv -f "$1" "$1.bak"
  231. cat "$HEADERS" >> "$TMP"
  232. cat "$BODY" >> "$TMP"
  233. mv -f "$TMP" "$1"
  234. if [ $? -ne 0 ]
  235. then
  236. echo "error while moving '$TMP' to '$1'"
  237. exit 1
  238. fi
  239. fi
  240. rm -f "$HEADERS" "$BODY" "$TMP" >/dev/null 2>&1
  241. shift
  242. done
  243. exit 0