ddtp-i18n-check.sh 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. #!/bin/bash
  2. #
  3. # $Id: ddtp_i18n_check.sh 2535 2011-02-19 14:20:52Z nekral-guest $
  4. #
  5. # Copyright (C) 2008, 2011 Felipe Augusto van de Wiel <faw@funlabs.org>
  6. # Copyright (C) 2008, 2009 Nicolas François <nicolas.francois@centraliens.net>
  7. #
  8. # This program is free software; you can redistribute it and/or modify
  9. # it under the terms of the GNU General Public License as published by
  10. # the Free Software Foundation; either version 2 of the License, or
  11. # (at your option) any later version.
  12. #
  13. # On Debian systems, you can find the full text of the license in
  14. # /usr/share/common-licenses/GPL-2
  15. set -eu
  16. export LC_ALL=C
  17. # This must be defined to either 0 or 1
  18. # When DEBUG=0, fail after the first error.
  19. # Otherwise, list all the errors.
  20. DEBUG=0
  21. # When DRY_RUN=0, generate the compressed version of the Translation-*
  22. # files.
  23. DRY_RUN=0
  24. dists_parent_dir=""
  25. # If no argument indicates the PACKAGES_LISTS_DIR then use '.'
  26. PACKAGES_LISTS_DIR=""
  27. usage () {
  28. echo "Usage: $0 [options] <dists_parent_dir> [<packages_lists_directory>]" >&2
  29. echo "" >&2
  30. echo " --debug Debug mode: do not stop after the first error" >&2
  31. echo " --dry-run Do not generate the compressed version of the " >&2
  32. echo " Translation files">&2
  33. exit 1
  34. }
  35. # Parse options
  36. for opt; do
  37. case "$opt" in
  38. "--debug")
  39. DEBUG=1
  40. ;;
  41. "--dry-run")
  42. DRY_RUN=1
  43. ;;
  44. "-*")
  45. usage
  46. ;;
  47. "")
  48. echo "Empty parameter" >&2
  49. echo "" >&2
  50. usage
  51. ;;
  52. *)
  53. if [ -z "$dists_parent_dir" ]; then
  54. # Removing trailing /
  55. dists_parent_dir=${opt%/}
  56. elif [ -z "$PACKAGES_LISTS_DIR" ]; then
  57. PACKAGES_LISTS_DIR=$opt
  58. else
  59. echo "$0: Invalid option: $opt" >&2
  60. usage
  61. fi
  62. ;;
  63. esac
  64. done
  65. PACKAGES_LISTS_DIR=${opt:-.}
  66. if [ ! -d "$dists_parent_dir" ]; then
  67. echo "missing dists_parent_dir, or not a directory" >&2
  68. echo "" >&2
  69. usage
  70. elif [ ! -d "$PACKAGES_LISTS_DIR" ]; then
  71. echo "missing packages_lists_directory, or not a directory" >&2
  72. echo "" >&2
  73. usage
  74. fi
  75. #STABLE="squeeze"
  76. TESTING="buster"
  77. UNSTABLE="sid"
  78. # Original SHA256SUMS, generated by i18n.debian.net
  79. CHECKSUMS="SHA256SUMS"
  80. # DAK Timestamp
  81. TIMESTAMP="timestamp"
  82. # These special files must exist on the top of dists_parent_dir
  83. SPECIAL_FILES="$CHECKSUMS $TIMESTAMP $TIMESTAMP.gpg"
  84. # Temporary working directory. We need a full path to reduce the
  85. # complexity of checking CHECKSUMS and cleaning/removing TMPDIR
  86. TEMP_WORK_DIR=$(mktemp -d -t ddtp_dinstall_tmpdir.XXXXXX)
  87. cd "$TEMP_WORK_DIR"
  88. TMP_WORK_DIR=$(pwd)
  89. cd "$OLDPWD"
  90. unset TEMP_WORK_DIR
  91. # If it's trapped, something bad happened.
  92. trap_exit () {
  93. rm -rf "$TMP_WORK_DIR"
  94. rm -f "$dists_parent_dir"/dists/*/main/i18n/Translation-*.bz2
  95. rm -f "$dists_parent_dir"/dists/*/main/i18n/Index
  96. exit 1
  97. }
  98. trap trap_exit EXIT HUP INT QUIT TERM
  99. is_filename_okay () {
  100. ifo_file="$1"
  101. # Check that the file in on an "i18n" directory
  102. # This ensures that the Translation-$lang files are not e.g. in
  103. # dists/etch/ or dists/etch/main/
  104. ifo_d=$(basename $(dirname "$ifo_file"))
  105. if [ "x$ifo_d" = "xi18n" ]; then
  106. # Check that the file is named Translation-$lang
  107. ifo_f=$(basename "$ifo_file")
  108. case "$ifo_f" in
  109. Translation-[a-z][a-z][a-z]_[A-Z][A-Z]) return 0;;
  110. Translation-[a-z][a-z]_[A-Z][A-Z]) return 0;;
  111. Translation-[a-z][a-z][a-z]) return 0;;
  112. Translation-[a-z][a-z]) return 0;;
  113. esac
  114. fi
  115. return 1
  116. }
  117. # Check a directory name against a directory whitelist
  118. is_dirname_okay () {
  119. ido_dir="$1"
  120. case "$ido_dir" in
  121. "$dists_parent_dir") return 0;;
  122. "$dists_parent_dir/dists") return 0;;
  123. # TODO/FIXME: It is undecided how to update at stable/point-releases, so we
  124. # don't allow files to $STABLE.
  125. # "$dists_parent_dir/dists/$STABLE") return 0;;
  126. # "$dists_parent_dir/dists/$STABLE/main") return 0;;
  127. # "$dists_parent_dir/dists/$STABLE/main/i18n") return 0;;
  128. # "$dists_parent_dir/dists/$STABLE/contrib") return 0;;
  129. # "$dists_parent_dir/dists/$STABLE/contrib/i18n") return 0;;
  130. # "$dists_parent_dir/dists/$STABLE/non-free") return 0;;
  131. # "$dists_parent_dir/dists/$STABLE/non-free/i18n") return 0;;
  132. "$dists_parent_dir/dists/$TESTING") return 0;;
  133. "$dists_parent_dir/dists/$TESTING/main") return 0;;
  134. "$dists_parent_dir/dists/$TESTING/main/i18n") return 0;;
  135. "$dists_parent_dir/dists/$TESTING/contrib") return 0;;
  136. "$dists_parent_dir/dists/$TESTING/contrib/i18n") return 0;;
  137. "$dists_parent_dir/dists/$TESTING/non-free") return 0;;
  138. "$dists_parent_dir/dists/$TESTING/non-free/i18n") return 0;;
  139. "$dists_parent_dir/dists/$UNSTABLE") return 0;;
  140. "$dists_parent_dir/dists/$UNSTABLE/main") return 0;;
  141. "$dists_parent_dir/dists/$UNSTABLE/main/i18n") return 0;;
  142. "$dists_parent_dir/dists/$UNSTABLE/contrib") return 0;;
  143. "$dists_parent_dir/dists/$UNSTABLE/contrib/i18n") return 0;;
  144. "$dists_parent_dir/dists/$UNSTABLE/non-free") return 0;;
  145. "$dists_parent_dir/dists/$UNSTABLE/non-free/i18n") return 0;;
  146. esac
  147. return 1
  148. }
  149. has_valid_fields () {
  150. hvf_file="$1"
  151. hvf_lang=${hvf_file/*-}
  152. awk "
  153. function print_status () {
  154. printf (\"p: %d, m: %d, s: %d, l: %d\n\", package, md5, s_description, l_description)
  155. }
  156. BEGIN {
  157. package = 0 # Indicates if a Package field was found
  158. md5 = 0 # Indicates if a Description-md5 field was found
  159. s_description = 0 # Indicates if a short description was found
  160. l_description = 0 # Indicates if a long description was found
  161. failures = 0 # Number of failures (debug only)
  162. failed = 0 # Failure already reported for the block
  163. }
  164. /^Package: / {
  165. if (0 == failed) {
  166. if ( (0 != package) \
  167. || (0 != md5) \
  168. || (0 != s_description) \
  169. || (0 != l_description)) {
  170. printf (\"Package field unexpected in $hvf_file (line %d)\n\", NR)
  171. print_status()
  172. failed = 1
  173. if ($DEBUG) { failures++ } else { exit 1 }
  174. }
  175. package++
  176. }
  177. # Next input line
  178. next
  179. }
  180. /^Description-md5: / {
  181. if (0 == failed) {
  182. if ( (1 != package) \
  183. || (0 != md5) \
  184. || (0 != s_description) \
  185. || (0 != l_description)) {
  186. printf (\"Description-md5 field unexpected in $hvf_file (line %d)\n\", NR)
  187. print_status()
  188. failed = 1
  189. if ($DEBUG) { failures++ } else { exit 1 }
  190. }
  191. md5++
  192. }
  193. # Next input line
  194. next
  195. }
  196. /^Description-$hvf_lang: / {
  197. if (0 == failed) {
  198. if ( (1 != package) \
  199. || (1 != md5) \
  200. || (0 != s_description) \
  201. || (0 != l_description)) {
  202. printf (\"Description-$hvf_lang field unexpected in $hvf_file (line %d)\n\", NR)
  203. print_status()
  204. failed = 1
  205. if ($DEBUG) { failures++ } else { exit 1 }
  206. }
  207. s_description++
  208. }
  209. # Next input line
  210. next
  211. }
  212. /^ / {
  213. if (0 == failed) {
  214. if ( (1 != package) \
  215. || (1 != md5) \
  216. || (1 != s_description)) {
  217. printf (\"Long description unexpected in $hvf_file (line %d)\n\", NR)
  218. print_status()
  219. failed = 1
  220. if ($DEBUG) { failures++ } else { exit 1 }
  221. }
  222. l_description = 1 # There can be any number of long description
  223. # lines. Do not count.
  224. }
  225. # Next line
  226. next
  227. }
  228. /^$/ {
  229. if (0 == failed) {
  230. if ( (1 != package) \
  231. || (1 != md5) \
  232. || (1 != s_description) \
  233. || (1 != l_description)) {
  234. printf (\"End of block unexpected in $hvf_file (line %d)\n\", NR)
  235. print_status()
  236. failed = 1
  237. if ($DEBUG) { failures++ } else { exit 1 }
  238. }
  239. }
  240. # Next package
  241. package = 0; md5 = 0; s_description = 0; l_description = 0
  242. failed = 0
  243. # Next input line
  244. next
  245. }
  246. # Anything else: fail
  247. {
  248. printf (\"Unexpected line '\$0' in $hvf_file (line %d)\n\", NR)
  249. print_status()
  250. failed = 1
  251. if ($DEBUG) { failures++ } else { exit 1 }
  252. }
  253. END {
  254. if (0 == failed) {
  255. # They must be all set to 0 or all set to 1
  256. if ( ( (0 == package) \
  257. || (0 == md5) \
  258. || (0 == s_description) \
  259. || (0 == l_description)) \
  260. && ( (0 != package) \
  261. || (0 != md5) \
  262. || (0 != s_description) \
  263. || (0 != l_description))) {
  264. printf (\"End of file unexpected in $hvf_file (line %d)\n\", NR)
  265. print_status()
  266. exit 1
  267. }
  268. }
  269. if (failures > 0) {
  270. exit 1
  271. }
  272. }
  273. " "$hvf_file" || return 1
  274. return 0
  275. }
  276. # $SPECIAL_FILES must exist
  277. for sf in $SPECIAL_FILES; do
  278. if [ ! -f "$dists_parent_dir/$sf" ]; then
  279. echo "Special file ($sf) doesn't exist"
  280. exit 1;
  281. fi
  282. done
  283. # Comparing CHECKSUMS
  284. # We don't use -c because a file could exist in the directory tree and not in
  285. # the CHECKSUMS, so we sort the existing CHECKSUMS and we create a new one
  286. # already sorted, if cmp fails then files are different and we don't want to
  287. # continue.
  288. cd "$dists_parent_dir"
  289. find dists -type f -print0 |xargs --null sha256sum > "$TMP_WORK_DIR/$CHECKSUMS.new"
  290. sort "$CHECKSUMS" > "$TMP_WORK_DIR/$CHECKSUMS.sorted"
  291. sort "$TMP_WORK_DIR/$CHECKSUMS.new" > "$TMP_WORK_DIR/$CHECKSUMS.new.sorted"
  292. if ! cmp --quiet "$TMP_WORK_DIR/$CHECKSUMS.sorted" "$TMP_WORK_DIR/$CHECKSUMS.new.sorted"; then
  293. echo "Failed to compare the $CHECKSUMS, they are not identical!" >&2
  294. diff -au "$TMP_WORK_DIR/$CHECKSUMS.sorted" "$TMP_WORK_DIR/$CHECKSUMS.new.sorted" >&2
  295. exit 1
  296. fi
  297. cd "$OLDPWD"
  298. # Get the list of valid packages (sorted, uniq)
  299. for t in "$TESTING" "$UNSTABLE"; do
  300. if [ ! -f "$PACKAGES_LISTS_DIR/$t" ]; then
  301. echo "Missing $PACKAGES_LISTS_DIR/$t" >&2
  302. exit 1
  303. fi
  304. cut -d' ' -f 1 "$PACKAGES_LISTS_DIR/$t" | sort -u > "$TMP_WORK_DIR/$t.pkgs"
  305. done
  306. /usr/bin/find "$dists_parent_dir" |
  307. while read f; do
  308. if [ -d "$f" ]; then
  309. if ! is_dirname_okay "$f"; then
  310. echo "Wrong directory name: $f" >&2
  311. exit 1
  312. fi
  313. elif [ -f "$f" ]; then
  314. # If $f is in $SPECIAL_FILES, we skip to the next loop because
  315. # we won't check it for format, fields and encoding.
  316. for sf in $SPECIAL_FILES; do
  317. if [ "$f" = "$dists_parent_dir/$sf" ]; then
  318. continue 2
  319. fi
  320. done
  321. if ! is_filename_okay "$f"; then
  322. echo "Wrong file: $f" >&2
  323. exit 1
  324. fi
  325. # Check that all entries contains the right fields
  326. if ! has_valid_fields "$f"; then
  327. echo "File $f has an invalid format" >&2
  328. exit 1
  329. fi
  330. # Check that every packages in Translation-$lang exists
  331. TPKGS=$(basename "$f").pkgs
  332. grep "^Package: " "$f" | cut -d' ' -f 2 | sort -u > "$TMP_WORK_DIR/$TPKGS"
  333. case "$f" in
  334. */$TESTING/*) t="$TESTING";;
  335. */$UNSTABLE/*) t="$UNSTABLE";;
  336. esac
  337. if diff "$TMP_WORK_DIR/$t.pkgs" "$TMP_WORK_DIR/$TPKGS" | grep -q "^>"; then
  338. diff -au "$TMP_WORK_DIR/$t.pkgs" "$TMP_WORK_DIR/$TPKGS" |grep "^+"
  339. echo "$f contains packages which are not in $t" >&2
  340. exit 1
  341. fi
  342. # Check encoding
  343. iconv -f utf-8 -t utf-8 < "$f" > /dev/null 2>&1 || {
  344. echo "$f is not an UTF-8 file" >&2
  345. exit 1
  346. }
  347. # We do not check if the md5 in Translation-$lang are
  348. # correct.
  349. if [ "$DRY_RUN" = "0" ]; then
  350. # Now generate the compressed files
  351. bzip2 "$f"
  352. fi
  353. else
  354. echo "Neither a file or directory: $f" >&2
  355. exit 1
  356. fi
  357. done || false
  358. # The while will just fail if an internal check "exit 1", but the script
  359. # is not exited. "|| false" makes the script fail (and exit) in that case.
  360. echo "$dists_parent_dir structure validated successfully ($(date +%c))"
  361. # If we reach this point, everything went fine.
  362. trap - EXIT
  363. rm -rf "$TMP_WORK_DIR"