ddtp-i18n-check.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. #!/bin/bash
  2. #
  3. # $Id: ddtp_i18n_check.sh 2535 2011-02-19 14:20:52Z nekral-guest $
  4. #
  5. # Copyright (C) 2008, 2011 Felipe Augusto van de Wiel <faw@funlabs.org>
  6. # Copyright (C) 2008, 2009 Nicolas François <nicolas.francois@centraliens.net>
  7. #
  8. # This program is free software; you can redistribute it and/or modify
  9. # it under the terms of the GNU General Public License as published by
  10. # the Free Software Foundation; either version 2 of the License, or
  11. # (at your option) any later version.
  12. #
  13. # On Debian systems, you can find the full text of the license in
  14. # /usr/share/common-licenses/GPL-2
  15. set -eu
  16. export LC_ALL=C
  17. # This must be defined to either 0 or 1
  18. # When DEBUG=0, fail after the first error.
  19. # Otherwise, list all the errors.
  20. DEBUG=0
  21. # When DRY_RUN=0, generate the compressed version of the Translation-*
  22. # files.
  23. DRY_RUN=0
  24. dists_parent_dir=""
  25. # If no argument indicates the PACKAGES_LISTS_DIR then use '.'
  26. PACKAGES_LISTS_DIR=""
  27. usage () {
  28. echo "Usage: $0 [options] <dists_parent_dir> [<packages_lists_directory>]" >&2
  29. echo "" >&2
  30. echo " --debug Debug mode: do not stop after the first error" >&2
  31. echo " --dry-run Do not generate the compressed version of the " >&2
  32. echo " Translation files">&2
  33. exit 1
  34. }
  35. # Parse options
  36. for opt; do
  37. case "$opt" in
  38. "--debug")
  39. DEBUG=1
  40. ;;
  41. "--dry-run")
  42. DRY_RUN=1
  43. ;;
  44. "-*")
  45. usage
  46. ;;
  47. "")
  48. echo "Empty parameter" >&2
  49. echo "" >&2
  50. usage
  51. ;;
  52. *)
  53. if [ -z "$dists_parent_dir" ]; then
  54. # Removing trailing /
  55. dists_parent_dir=${opt%/}
  56. elif [ -z "$PACKAGES_LISTS_DIR" ]; then
  57. PACKAGES_LISTS_DIR=$opt
  58. else
  59. echo "$0: Invalid option: $opt" >&2
  60. usage
  61. fi
  62. ;;
  63. esac
  64. done
  65. PACKAGES_LISTS_DIR=${opt:-.}
  66. if [ ! -d "$dists_parent_dir" ]; then
  67. echo "missing dists_parent_dir, or not a directory" >&2
  68. echo "" >&2
  69. usage
  70. elif [ ! -d "$PACKAGES_LISTS_DIR" ]; then
  71. echo "missing packages_lists_directory, or not a directory" >&2
  72. echo "" >&2
  73. usage
  74. fi
  75. #STABLE="squeeze"
  76. TESTING=$(dak admin s-cfg get-value testing codename)
  77. UNSTABLE="sid"
  78. # Original SHA256SUMS, generated by i18n.debian.net
  79. CHECKSUMS="SHA256SUMS"
  80. # DAK Timestamp
  81. TIMESTAMP="timestamp"
  82. # These special files must exist on the top of dists_parent_dir
  83. SPECIAL_FILES="$CHECKSUMS $TIMESTAMP $TIMESTAMP.gpg"
  84. # Temporary working directory. We need a full path to reduce the
  85. # complexity of checking CHECKSUMS and cleaning/removing TMPDIR
  86. TEMP_WORK_DIR=$(mktemp -d -t ddtp_dinstall_tmpdir.XXXXXX)
  87. cd "$TEMP_WORK_DIR"
  88. TMP_WORK_DIR=$(pwd)
  89. cd "$OLDPWD"
  90. unset TEMP_WORK_DIR
  91. # If it's trapped, something bad happened.
  92. trap_exit () {
  93. rm -rf "$TMP_WORK_DIR"
  94. rm -f "$dists_parent_dir"/dists/*/main/i18n/Translation-*
  95. rm -f "$dists_parent_dir"/dists/*/main/i18n/Index
  96. exit 1
  97. }
  98. trap trap_exit EXIT HUP INT QUIT TERM
  99. is_filename_okay () {
  100. ifo_file="$1"
  101. # Check that the file in on an "i18n" directory
  102. # This ensures that the Translation-$lang files are not e.g. in
  103. # dists/etch/ or dists/etch/main/
  104. ifo_d=$(basename $(dirname "$ifo_file"))
  105. if [ "x$ifo_d" = "xi18n" ]; then
  106. # Check that the file is named Translation-$lang
  107. ifo_f=$(basename "$ifo_file")
  108. case "$ifo_f" in
  109. Translation-[a-z][a-z][a-z]_[A-Z][A-Z]) return 0;;
  110. Translation-[a-z][a-z]_[A-Z][A-Z]) return 0;;
  111. Translation-[a-z][a-z][a-z]) return 0;;
  112. Translation-[a-z][a-z]) return 0;;
  113. esac
  114. fi
  115. return 1
  116. }
  117. # Check a directory name against a directory whitelist
  118. is_dirname_okay () {
  119. ido_dir="$1"
  120. case "$ido_dir" in
  121. "$dists_parent_dir") return 0;;
  122. "$dists_parent_dir/dists") return 0;;
  123. # TODO/FIXME: It is undecided how to update at stable/point-releases, so we
  124. # don't allow files to $STABLE.
  125. # "$dists_parent_dir/dists/$STABLE") return 0;;
  126. # "$dists_parent_dir/dists/$STABLE/main") return 0;;
  127. # "$dists_parent_dir/dists/$STABLE/main/i18n") return 0;;
  128. # "$dists_parent_dir/dists/$STABLE/contrib") return 0;;
  129. # "$dists_parent_dir/dists/$STABLE/contrib/i18n") return 0;;
  130. # "$dists_parent_dir/dists/$STABLE/non-free-firmware") return 0;;
  131. # "$dists_parent_dir/dists/$STABLE/non-free-firmware/i18n") return 0;;
  132. # "$dists_parent_dir/dists/$STABLE/non-free") return 0;;
  133. # "$dists_parent_dir/dists/$STABLE/non-free/i18n") return 0;;
  134. "$dists_parent_dir/dists/$TESTING") return 0;;
  135. "$dists_parent_dir/dists/$TESTING/main") return 0;;
  136. "$dists_parent_dir/dists/$TESTING/main/i18n") return 0;;
  137. "$dists_parent_dir/dists/$TESTING/contrib") return 0;;
  138. "$dists_parent_dir/dists/$TESTING/contrib/i18n") return 0;;
  139. "$dists_parent_dir/dists/$TESTING/non-free-firmware") return 0;;
  140. "$dists_parent_dir/dists/$TESTING/non-free-firmware/i18n") return 0;;
  141. "$dists_parent_dir/dists/$TESTING/non-free") return 0;;
  142. "$dists_parent_dir/dists/$TESTING/non-free/i18n") return 0;;
  143. "$dists_parent_dir/dists/$UNSTABLE") return 0;;
  144. "$dists_parent_dir/dists/$UNSTABLE/main") return 0;;
  145. "$dists_parent_dir/dists/$UNSTABLE/main/i18n") return 0;;
  146. "$dists_parent_dir/dists/$UNSTABLE/contrib") return 0;;
  147. "$dists_parent_dir/dists/$UNSTABLE/contrib/i18n") return 0;;
  148. "$dists_parent_dir/dists/$UNSTABLE/non-free-firmware") return 0;;
  149. "$dists_parent_dir/dists/$UNSTABLE/non-free-firmware/i18n") return 0;;
  150. "$dists_parent_dir/dists/$UNSTABLE/non-free") return 0;;
  151. "$dists_parent_dir/dists/$UNSTABLE/non-free/i18n") return 0;;
  152. esac
  153. return 1
  154. }
  155. has_valid_fields () {
  156. hvf_file="$1"
  157. hvf_lang=${hvf_file/*-}
  158. awk "
  159. function print_status () {
  160. printf (\"p: %d, m: %d, s: %d, l: %d\n\", package, md5, s_description, l_description)
  161. }
  162. BEGIN {
  163. package = 0 # Indicates if a Package field was found
  164. md5 = 0 # Indicates if a Description-md5 field was found
  165. s_description = 0 # Indicates if a short description was found
  166. l_description = 0 # Indicates if a long description was found
  167. failures = 0 # Number of failures (debug only)
  168. failed = 0 # Failure already reported for the block
  169. }
  170. /^Package: / {
  171. if (0 == failed) {
  172. if ( (0 != package) \
  173. || (0 != md5) \
  174. || (0 != s_description) \
  175. || (0 != l_description)) {
  176. printf (\"Package field unexpected in $hvf_file (line %d)\n\", NR)
  177. print_status()
  178. failed = 1
  179. if ($DEBUG) { failures++ } else { exit 1 }
  180. }
  181. package++
  182. }
  183. # Next input line
  184. next
  185. }
  186. /^Description-md5: / {
  187. if (0 == failed) {
  188. if ( (1 != package) \
  189. || (0 != md5) \
  190. || (0 != s_description) \
  191. || (0 != l_description)) {
  192. printf (\"Description-md5 field unexpected in $hvf_file (line %d)\n\", NR)
  193. print_status()
  194. failed = 1
  195. if ($DEBUG) { failures++ } else { exit 1 }
  196. }
  197. md5++
  198. }
  199. # Next input line
  200. next
  201. }
  202. /^Description-$hvf_lang: / {
  203. if (0 == failed) {
  204. if ( (1 != package) \
  205. || (1 != md5) \
  206. || (0 != s_description) \
  207. || (0 != l_description)) {
  208. printf (\"Description-$hvf_lang field unexpected in $hvf_file (line %d)\n\", NR)
  209. print_status()
  210. failed = 1
  211. if ($DEBUG) { failures++ } else { exit 1 }
  212. }
  213. s_description++
  214. }
  215. # Next input line
  216. next
  217. }
  218. /^ / {
  219. if (0 == failed) {
  220. if ( (1 != package) \
  221. || (1 != md5) \
  222. || (1 != s_description)) {
  223. printf (\"Long description unexpected in $hvf_file (line %d)\n\", NR)
  224. print_status()
  225. failed = 1
  226. if ($DEBUG) { failures++ } else { exit 1 }
  227. }
  228. l_description = 1 # There can be any number of long description
  229. # lines. Do not count.
  230. }
  231. # Next line
  232. next
  233. }
  234. /^$/ {
  235. if (0 == failed) {
  236. if ( (1 != package) \
  237. || (1 != md5) \
  238. || (1 != s_description) \
  239. || (1 != l_description)) {
  240. printf (\"End of block unexpected in $hvf_file (line %d)\n\", NR)
  241. print_status()
  242. failed = 1
  243. if ($DEBUG) { failures++ } else { exit 1 }
  244. }
  245. }
  246. # Next package
  247. package = 0; md5 = 0; s_description = 0; l_description = 0
  248. failed = 0
  249. # Next input line
  250. next
  251. }
  252. # Anything else: fail
  253. {
  254. printf (\"Unexpected line '\$0' in $hvf_file (line %d)\n\", NR)
  255. print_status()
  256. failed = 1
  257. if ($DEBUG) { failures++ } else { exit 1 }
  258. }
  259. END {
  260. if (0 == failed) {
  261. # They must be all set to 0 or all set to 1
  262. if ( ( (0 == package) \
  263. || (0 == md5) \
  264. || (0 == s_description) \
  265. || (0 == l_description)) \
  266. && ( (0 != package) \
  267. || (0 != md5) \
  268. || (0 != s_description) \
  269. || (0 != l_description))) {
  270. printf (\"End of file unexpected in $hvf_file (line %d)\n\", NR)
  271. print_status()
  272. exit 1
  273. }
  274. }
  275. if (failures > 0) {
  276. exit 1
  277. }
  278. }
  279. " "$hvf_file" || return 1
  280. return 0
  281. }
  282. # $SPECIAL_FILES must exist
  283. for sf in $SPECIAL_FILES; do
  284. if [ ! -f "$dists_parent_dir/$sf" ]; then
  285. echo "Special file ($sf) doesn't exist"
  286. exit 1;
  287. fi
  288. done
  289. # Comparing CHECKSUMS
  290. # We don't use -c because a file could exist in the directory tree and not in
  291. # the CHECKSUMS, so we sort the existing CHECKSUMS and we create a new one
  292. # already sorted, if cmp fails then files are different and we don't want to
  293. # continue.
  294. cd "$dists_parent_dir"
  295. find dists -type f -print0 |xargs --null sha256sum > "$TMP_WORK_DIR/$CHECKSUMS.new"
  296. sort "$CHECKSUMS" > "$TMP_WORK_DIR/$CHECKSUMS.sorted"
  297. sort "$TMP_WORK_DIR/$CHECKSUMS.new" > "$TMP_WORK_DIR/$CHECKSUMS.new.sorted"
  298. if ! cmp --quiet "$TMP_WORK_DIR/$CHECKSUMS.sorted" "$TMP_WORK_DIR/$CHECKSUMS.new.sorted"; then
  299. echo "Failed to compare the $CHECKSUMS, they are not identical!" >&2
  300. diff -au "$TMP_WORK_DIR/$CHECKSUMS.sorted" "$TMP_WORK_DIR/$CHECKSUMS.new.sorted" >&2
  301. exit 1
  302. fi
  303. cd "$OLDPWD"
  304. # Get the list of valid packages (sorted, uniq)
  305. for t in "$TESTING" "$UNSTABLE"; do
  306. if [ ! -f "$PACKAGES_LISTS_DIR/$t" ]; then
  307. echo "Missing $PACKAGES_LISTS_DIR/$t" >&2
  308. exit 1
  309. fi
  310. cut -d' ' -f 1 "$PACKAGES_LISTS_DIR/$t" | sort -u > "$TMP_WORK_DIR/$t.pkgs"
  311. done
  312. /usr/bin/find "$dists_parent_dir" |
  313. while read f; do
  314. if [ -d "$f" ]; then
  315. if ! is_dirname_okay "$f"; then
  316. echo "Wrong directory name: $f" >&2
  317. exit 1
  318. fi
  319. elif [ -f "$f" ]; then
  320. # If $f is in $SPECIAL_FILES, we skip to the next loop because
  321. # we won't check it for format, fields and encoding.
  322. for sf in $SPECIAL_FILES; do
  323. if [ "$f" = "$dists_parent_dir/$sf" ]; then
  324. continue 2
  325. fi
  326. done
  327. if ! is_filename_okay "$f"; then
  328. echo "Wrong file: $f" >&2
  329. exit 1
  330. fi
  331. # Check that all entries contains the right fields
  332. if ! has_valid_fields "$f"; then
  333. echo "File $f has an invalid format" >&2
  334. exit 1
  335. fi
  336. # Check that every packages in Translation-$lang exists
  337. TPKGS=$(basename "$f").pkgs
  338. grep "^Package: " "$f" | cut -d' ' -f 2 | sort -u > "$TMP_WORK_DIR/$TPKGS"
  339. case "$f" in
  340. */$TESTING/*) t="$TESTING";;
  341. */$UNSTABLE/*) t="$UNSTABLE";;
  342. esac
  343. if diff "$TMP_WORK_DIR/$t.pkgs" "$TMP_WORK_DIR/$TPKGS" | grep -q "^>"; then
  344. diff -au "$TMP_WORK_DIR/$t.pkgs" "$TMP_WORK_DIR/$TPKGS" |grep "^+"
  345. echo "$f contains packages which are not in $t" >&2
  346. exit 1
  347. fi
  348. # Check encoding
  349. iconv -f utf-8 -t utf-8 < "$f" > /dev/null 2>&1 || {
  350. echo "$f is not an UTF-8 file" >&2
  351. exit 1
  352. }
  353. # We do not check if the md5 in Translation-$lang are
  354. # correct.
  355. if [ "$DRY_RUN" = "0" ]; then
  356. # Now generate the compressed files
  357. bzip2 "$f"
  358. fi
  359. else
  360. echo "Neither a file or directory: $f" >&2
  361. exit 1
  362. fi
  363. done || false
  364. # The while will just fail if an internal check "exit 1", but the script
  365. # is not exited. "|| false" makes the script fail (and exit) in that case.
  366. echo "$dists_parent_dir structure validated successfully ($(date +%c))"
  367. # If we reach this point, everything went fine.
  368. trap - EXIT
  369. rm -rf "$TMP_WORK_DIR"