Util.pm 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023
  1. # -*- Mode: perl; indent-tabs-mode: nil -*-
  2. #
  3. # The contents of this file are subject to the Mozilla Public
  4. # License Version 1.1 (the "License"); you may not use this file
  5. # except in compliance with the License. You may obtain a copy of
  6. # the License at http://www.mozilla.org/MPL/
  7. #
  8. # Software distributed under the License is distributed on an "AS
  9. # IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
  10. # implied. See the License for the specific language governing
  11. # rights and limitations under the License.
  12. #
  13. # The Original Code is the Bugzilla Bug Tracking System.
  14. #
  15. # The Initial Developer of the Original Code is Netscape Communications
  16. # Corporation. Portions created by Netscape are
  17. # Copyright (C) 1998 Netscape Communications Corporation. All
  18. # Rights Reserved.
  19. #
  20. # Contributor(s): Terry Weissman <terry@mozilla.org>
  21. # Dan Mosedale <dmose@mozilla.org>
  22. # Jacob Steenhagen <jake@bugzilla.org>
  23. # Bradley Baetz <bbaetz@student.usyd.edu.au>
  24. # Christopher Aillon <christopher@aillon.com>
  25. # Max Kanat-Alexander <mkanat@bugzilla.org>
  26. # Frédéric Buclin <LpSolit@gmail.com>
  27. # Marc Schumann <wurblzap@gmail.com>
  28. package Bugzilla::Util;
  29. use strict;
  30. use base qw(Exporter);
  31. @Bugzilla::Util::EXPORT = qw(is_tainted trick_taint detaint_natural
  32. detaint_signed
  33. html_quote url_quote xml_quote
  34. css_class_quote html_light_quote url_decode
  35. i_am_cgi get_netaddr correct_urlbase
  36. lsearch ssl_require_redirect use_attachbase
  37. diff_arrays diff_strings
  38. trim wrap_hard wrap_comment find_wrap_point
  39. format_time format_time_decimal validate_date
  40. validate_time
  41. file_mod_time is_7bit_clean
  42. bz_crypt generate_random_password
  43. validate_email_syntax clean_text
  44. get_text disable_utf8);
  45. use Bugzilla::Constants;
  46. use Date::Parse;
  47. use Date::Format;
  48. use Text::Wrap;
  49. # This is from the perlsec page, slightly modified to remove a warning
  50. # From that page:
  51. # This function makes use of the fact that the presence of
  52. # tainted data anywhere within an expression renders the
  53. # entire expression tainted.
  54. # Don't ask me how it works...
  55. sub is_tainted {
  56. return not eval { my $foo = join('',@_), kill 0; 1; };
  57. }
  58. sub trick_taint {
  59. require Carp;
  60. Carp::confess("Undef to trick_taint") unless defined $_[0];
  61. my $match = $_[0] =~ /^(.*)$/s;
  62. $_[0] = $match ? $1 : undef;
  63. return (defined($_[0]));
  64. }
  65. sub detaint_natural {
  66. my $match = $_[0] =~ /^(\d+)$/;
  67. $_[0] = $match ? $1 : undef;
  68. return (defined($_[0]));
  69. }
  70. sub detaint_signed {
  71. my $match = $_[0] =~ /^([-+]?\d+)$/;
  72. $_[0] = $match ? $1 : undef;
  73. # Remove any leading plus sign.
  74. if (defined($_[0]) && $_[0] =~ /^\+(\d+)$/) {
  75. $_[0] = $1;
  76. }
  77. return (defined($_[0]));
  78. }
  79. sub html_quote {
  80. my ($var) = (@_);
  81. $var =~ s/\&/\&amp;/g;
  82. $var =~ s/</\&lt;/g;
  83. $var =~ s/>/\&gt;/g;
  84. $var =~ s/\"/\&quot;/g;
  85. return $var;
  86. }
  87. sub html_light_quote {
  88. my ($text) = @_;
  89. # List of allowed HTML elements having no attributes.
  90. my @allow = qw(b strong em i u p br abbr acronym ins del cite code var
  91. dfn samp kbd big small sub sup tt dd dt dl ul li ol
  92. fieldset legend);
  93. # Are HTML::Scrubber and HTML::Parser installed?
  94. eval { require HTML::Scrubber;
  95. require HTML::Parser;
  96. };
  97. if ($@) { # Package(s) not installed.
  98. my $safe = join('|', @allow);
  99. my $chr = chr(1);
  100. # First, escape safe elements.
  101. $text =~ s#<($safe)>#$chr$1$chr#go;
  102. $text =~ s#</($safe)>#$chr/$1$chr#go;
  103. # Now filter < and >.
  104. $text =~ s#<#&lt;#g;
  105. $text =~ s#>#&gt;#g;
  106. # Restore safe elements.
  107. $text =~ s#$chr/($safe)$chr#</$1>#go;
  108. $text =~ s#$chr($safe)$chr#<$1>#go;
  109. return $text;
  110. }
  111. else { # Packages installed.
  112. # We can be less restrictive. We can accept elements with attributes.
  113. push(@allow, qw(a blockquote q span));
  114. # Allowed protocols.
  115. my $safe_protocols = join('|', SAFE_PROTOCOLS);
  116. my $protocol_regexp = qr{(^(?:$safe_protocols):|^[^:]+$)}i;
  117. # Deny all elements and attributes unless explicitly authorized.
  118. my @default = (0 => {
  119. id => 1,
  120. name => 1,
  121. class => 1,
  122. '*' => 0, # Reject all other attributes.
  123. }
  124. );
  125. # Specific rules for allowed elements. If no specific rule is set
  126. # for a given element, then the default is used.
  127. my @rules = (a => {
  128. href => $protocol_regexp,
  129. title => 1,
  130. id => 1,
  131. name => 1,
  132. class => 1,
  133. '*' => 0, # Reject all other attributes.
  134. },
  135. blockquote => {
  136. cite => $protocol_regexp,
  137. id => 1,
  138. name => 1,
  139. class => 1,
  140. '*' => 0, # Reject all other attributes.
  141. },
  142. 'q' => {
  143. cite => $protocol_regexp,
  144. id => 1,
  145. name => 1,
  146. class => 1,
  147. '*' => 0, # Reject all other attributes.
  148. },
  149. );
  150. my $scrubber = HTML::Scrubber->new(default => \@default,
  151. allow => \@allow,
  152. rules => \@rules,
  153. comment => 0,
  154. process => 0);
  155. return $scrubber->scrub($text);
  156. }
  157. }
  158. # This originally came from CGI.pm, by Lincoln D. Stein
  159. sub url_quote {
  160. my ($toencode) = (@_);
  161. utf8::encode($toencode) # The below regex works only on bytes
  162. if Bugzilla->params->{'utf8'} && utf8::is_utf8($toencode);
  163. $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg;
  164. return $toencode;
  165. }
  166. sub css_class_quote {
  167. my ($toencode) = (@_);
  168. $toencode =~ s/ /_/g;
  169. $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("&#x%x;",ord($1))/eg;
  170. return $toencode;
  171. }
  172. sub xml_quote {
  173. my ($var) = (@_);
  174. $var =~ s/\&/\&amp;/g;
  175. $var =~ s/</\&lt;/g;
  176. $var =~ s/>/\&gt;/g;
  177. $var =~ s/\"/\&quot;/g;
  178. $var =~ s/\'/\&apos;/g;
  179. # the following nukes characters disallowed by the XML 1.0
  180. # spec, Production 2.2. 1.0 declares that only the following
  181. # are valid:
  182. # (#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF])
  183. $var =~ s/([\x{0001}-\x{0008}]|
  184. [\x{000B}-\x{000C}]|
  185. [\x{000E}-\x{001F}]|
  186. [\x{D800}-\x{DFFF}]|
  187. [\x{FFFE}-\x{FFFF}])//gx;
  188. return $var;
  189. }
  190. # This function must not be relied upon to return a valid string to pass to
  191. # the DB or the user in UTF-8 situations. The only thing you can rely upon
  192. # it for is that if you url_decode a string, it will url_encode back to the
  193. # exact same thing.
  194. sub url_decode {
  195. my ($todecode) = (@_);
  196. $todecode =~ tr/+/ /; # pluses become spaces
  197. $todecode =~ s/%([0-9a-fA-F]{2})/pack("c",hex($1))/ge;
  198. return $todecode;
  199. }
  200. sub i_am_cgi {
  201. # I use SERVER_SOFTWARE because it's required to be
  202. # defined for all requests in the CGI spec.
  203. return exists $ENV{'SERVER_SOFTWARE'} ? 1 : 0;
  204. }
  205. sub ssl_require_redirect {
  206. my $method = shift;
  207. # If currently not in a protected SSL
  208. # connection, determine if a redirection is
  209. # needed based on value in Bugzilla->params->{ssl}.
  210. # If we are already in a protected connection or
  211. # sslbase is not set then no action is required.
  212. if (uc($ENV{'HTTPS'}) ne 'ON'
  213. && $ENV{'SERVER_PORT'} != 443
  214. && Bugzilla->params->{'sslbase'} ne '')
  215. {
  216. # System is configured to never require SSL
  217. # so no redirection is needed.
  218. return 0
  219. if Bugzilla->params->{'ssl'} eq 'never';
  220. # System is configured to always require a SSL
  221. # connection so we need to redirect.
  222. return 1
  223. if Bugzilla->params->{'ssl'} eq 'always';
  224. # System is configured such that if we are inside
  225. # of an authenticated session, then we need to make
  226. # sure that all of the connections are over SSL. Non
  227. # authenticated sessions SSL is not mandatory.
  228. # For XMLRPC requests, if the method is User.login
  229. # then we always want the connection to be over SSL
  230. # if the system is configured for authenticated
  231. # sessions since the user's username and password
  232. # will be passed before the user is logged in.
  233. return 1
  234. if Bugzilla->params->{'ssl'} eq 'authenticated sessions'
  235. && (Bugzilla->user->id
  236. || (defined $method && $method eq 'User.login'));
  237. }
  238. return 0;
  239. }
  240. sub correct_urlbase {
  241. my $ssl = Bugzilla->params->{'ssl'};
  242. return Bugzilla->params->{'urlbase'} if $ssl eq 'never';
  243. my $sslbase = Bugzilla->params->{'sslbase'};
  244. if ($sslbase) {
  245. return $sslbase if $ssl eq 'always';
  246. # Authenticated Sessions
  247. return $sslbase if Bugzilla->user->id;
  248. }
  249. # Set to "authenticated sessions" but nobody's logged in, or
  250. # sslbase isn't set.
  251. return Bugzilla->params->{'urlbase'};
  252. }
  253. sub use_attachbase {
  254. my $attachbase = Bugzilla->params->{'attachment_base'};
  255. return ($attachbase ne ''
  256. && $attachbase ne Bugzilla->params->{'urlbase'}
  257. && $attachbase ne Bugzilla->params->{'sslbase'}) ? 1 : 0;
  258. }
  259. sub lsearch {
  260. my ($list,$item) = (@_);
  261. my $count = 0;
  262. foreach my $i (@$list) {
  263. if ($i eq $item) {
  264. return $count;
  265. }
  266. $count++;
  267. }
  268. return -1;
  269. }
  270. sub diff_arrays {
  271. my ($old_ref, $new_ref) = @_;
  272. my @old = @$old_ref;
  273. my @new = @$new_ref;
  274. # For each pair of (old, new) entries:
  275. # If they're equal, set them to empty. When done, @old contains entries
  276. # that were removed; @new contains ones that got added.
  277. foreach my $oldv (@old) {
  278. foreach my $newv (@new) {
  279. next if ($newv eq '');
  280. if ($oldv eq $newv) {
  281. $newv = $oldv = '';
  282. }
  283. }
  284. }
  285. my @removed = grep { $_ ne '' } @old;
  286. my @added = grep { $_ ne '' } @new;
  287. return (\@removed, \@added);
  288. }
  289. sub trim {
  290. my ($str) = @_;
  291. if ($str) {
  292. $str =~ s/^\s+//g;
  293. $str =~ s/\s+$//g;
  294. }
  295. return $str;
  296. }
  297. sub diff_strings {
  298. my ($oldstr, $newstr) = @_;
  299. # Split the old and new strings into arrays containing their values.
  300. $oldstr =~ s/[\s,]+/ /g;
  301. $newstr =~ s/[\s,]+/ /g;
  302. my @old = split(" ", $oldstr);
  303. my @new = split(" ", $newstr);
  304. my ($rem, $add) = diff_arrays(\@old, \@new);
  305. my $removed = join (", ", @$rem);
  306. my $added = join (", ", @$add);
  307. return ($removed, $added);
  308. }
  309. sub wrap_comment {
  310. my ($comment, $cols) = @_;
  311. my $wrappedcomment = "";
  312. # Use 'local', as recommended by Text::Wrap's perldoc.
  313. local $Text::Wrap::columns = $cols || COMMENT_COLS_WRAP;
  314. # Make words that are longer than COMMENT_COLS_WRAP not wrap.
  315. local $Text::Wrap::huge = 'overflow';
  316. # Don't mess with tabs.
  317. local $Text::Wrap::unexpand = 0;
  318. # If the line starts with ">", don't wrap it. Otherwise, wrap.
  319. foreach my $line (split(/\r\n|\r|\n/, $comment)) {
  320. if ($line =~ qr/^>/) {
  321. $wrappedcomment .= ($line . "\n");
  322. }
  323. else {
  324. # Due to a segfault in Text::Tabs::expand() when processing tabs with
  325. # Unicode (see http://rt.perl.org/rt3/Public/Bug/Display.html?id=52104),
  326. # we have to remove tabs before processing the comment. This restriction
  327. # can go away when we require Perl 5.8.9 or newer.
  328. $line =~ s/\t/ /g;
  329. $wrappedcomment .= (wrap('', '', $line) . "\n");
  330. }
  331. }
  332. chomp($wrappedcomment); # Text::Wrap adds an extra newline at the end.
  333. return $wrappedcomment;
  334. }
  335. sub find_wrap_point {
  336. my ($string, $maxpos) = @_;
  337. if (!$string) { return 0 }
  338. if (length($string) < $maxpos) { return length($string) }
  339. my $wrappoint = rindex($string, ",", $maxpos); # look for comma
  340. if ($wrappoint < 0) { # can't find comma
  341. $wrappoint = rindex($string, " ", $maxpos); # look for space
  342. if ($wrappoint < 0) { # can't find space
  343. $wrappoint = rindex($string, "-", $maxpos); # look for hyphen
  344. if ($wrappoint < 0) { # can't find hyphen
  345. $wrappoint = $maxpos; # just truncate it
  346. } else {
  347. $wrappoint++; # leave hyphen on the left side
  348. }
  349. }
  350. }
  351. return $wrappoint;
  352. }
  353. sub wrap_hard {
  354. my ($string, $columns) = @_;
  355. local $Text::Wrap::columns = $columns;
  356. local $Text::Wrap::unexpand = 0;
  357. local $Text::Wrap::huge = 'wrap';
  358. my $wrapped = wrap('', '', $string);
  359. chomp($wrapped);
  360. return $wrapped;
  361. }
  362. sub format_time {
  363. my ($date, $format) = @_;
  364. # If $format is undefined, try to guess the correct date format.
  365. my $show_timezone;
  366. if (!defined($format)) {
  367. if ($date =~ m/^(\d{4})[-\.](\d{2})[-\.](\d{2}) (\d{2}):(\d{2})(:(\d{2}))?$/) {
  368. my $sec = $7;
  369. if (defined $sec) {
  370. $format = "%Y-%m-%d %T";
  371. } else {
  372. $format = "%Y-%m-%d %R";
  373. }
  374. } else {
  375. # Default date format. See Date::Format for other formats available.
  376. $format = "%Y-%m-%d %R";
  377. }
  378. # By default, we want the timezone to be displayed.
  379. $show_timezone = 1;
  380. }
  381. else {
  382. # Search for %Z or %z, meaning we want the timezone to be displayed.
  383. # Till bug 182238 gets fixed, we assume Bugzilla->params->{'timezone'}
  384. # is used.
  385. $show_timezone = ($format =~ s/\s?%Z$//i);
  386. }
  387. # str2time($date) is undefined if $date has an invalid date format.
  388. my $time = str2time($date);
  389. if (defined $time) {
  390. $date = time2str($format, $time);
  391. $date .= " " . Bugzilla->params->{'timezone'} if $show_timezone;
  392. }
  393. else {
  394. # Don't let invalid (time) strings to be passed to templates!
  395. $date = '';
  396. }
  397. return trim($date);
  398. }
  399. sub format_time_decimal {
  400. my ($time) = (@_);
  401. my $newtime = sprintf("%.2f", $time);
  402. if ($newtime =~ /0\Z/) {
  403. $newtime = sprintf("%.1f", $time);
  404. }
  405. return $newtime;
  406. }
  407. sub file_mod_time {
  408. my ($filename) = (@_);
  409. my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
  410. $atime,$mtime,$ctime,$blksize,$blocks)
  411. = stat($filename);
  412. return $mtime;
  413. }
  414. sub bz_crypt {
  415. my ($password) = @_;
  416. # The list of characters that can appear in a salt. Salts and hashes
  417. # are both encoded as a sequence of characters from a set containing
  418. # 64 characters, each one of which represents 6 bits of the salt/hash.
  419. # The encoding is similar to BASE64, the difference being that the
  420. # BASE64 plus sign (+) is replaced with a forward slash (/).
  421. my @saltchars = (0..9, 'A'..'Z', 'a'..'z', '.', '/');
  422. # Generate the salt. We use an 8 character (48 bit) salt for maximum
  423. # security on systems whose crypt uses MD5. Systems with older
  424. # versions of crypt will just use the first two characters of the salt.
  425. my $salt = '';
  426. for ( my $i=0 ; $i < 8 ; ++$i ) {
  427. $salt .= $saltchars[rand(64)];
  428. }
  429. # Wide characters cause crypt to die
  430. if (Bugzilla->params->{'utf8'}) {
  431. utf8::encode($password) if utf8::is_utf8($password);
  432. }
  433. # Crypt the password.
  434. my $cryptedpassword = crypt($password, $salt);
  435. # Return the crypted password.
  436. return $cryptedpassword;
  437. }
  438. # If you want to understand the security of strings generated by this
  439. # function, here's a quick formula that will help you estimate:
  440. # We pick from 62 characters, which is close to 64, which is 2^6.
  441. # So 8 characters is (2^6)^8 == 2^48 combinations. Just multiply 6
  442. # by the number of characters you generate, and that gets you the equivalent
  443. # strength of the string in bits.
  444. sub generate_random_password {
  445. my $size = shift || 10; # default to 10 chars if nothing specified
  446. my $rand;
  447. if (eval { require Math::Random::Secure; 1; }) {
  448. $rand = \&Math::Random::Secure::irand;
  449. }
  450. else {
  451. # For details on why this block works the way it does, see bug 619594.
  452. # (Note that we don't do this if Math::Random::Secure is installed,
  453. # because we don't need to.)
  454. my $counter = 0;
  455. $rand = sub {
  456. # If we regenerate the seed every 5 characters, our seed is roughly
  457. # as strong (in terms of bit size) as our randomly-generated
  458. # string itself.
  459. _do_srand() if ($counter % 5) == 0;
  460. $counter++;
  461. return int(rand $_[0]);
  462. };
  463. }
  464. return join("", map{ ('0'..'9','a'..'z','A'..'Z')[$rand->(62)] }
  465. (1..$size));
  466. }
  467. sub _do_srand {
  468. # On Windows, calling srand over and over in the same process produces
  469. # very bad results. We need a stronger seed.
  470. if (ON_WINDOWS) {
  471. require Win32;
  472. # GuidGen generates random data via Windows's CryptGenRandom
  473. # interface, which is documented as being cryptographically secure.
  474. my $guid = Win32::GuidGen();
  475. # GUIDs look like:
  476. # {09531CF1-D0C7-4860-840C-1C8C8735E2AD}
  477. $guid =~ s/[-{}]+//g;
  478. # Get a 32-bit integer using the first eight hex digits.
  479. my $seed = hex(substr($guid, 0, 8));
  480. srand($seed);
  481. return;
  482. }
  483. # On *nix-like platforms, this uses /dev/urandom, so the seed changes
  484. # enough on every invocation.
  485. srand();
  486. }
  487. sub validate_email_syntax {
  488. my ($addr) = @_;
  489. my $match = Bugzilla->params->{'emailregexp'};
  490. my $ret = ($addr =~ /$match/ && $addr !~ /[\\\(\)<>&,;:"\[\] \t\r\n]/);
  491. if ($ret) {
  492. # We assume these checks to suffice to consider the address untainted.
  493. trick_taint($_[0]);
  494. }
  495. return $ret ? 1 : 0;
  496. }
  497. sub validate_date {
  498. my ($date) = @_;
  499. my $date2;
  500. # $ts is undefined if the parser fails.
  501. my $ts = str2time($date);
  502. if ($ts) {
  503. $date2 = time2str("%Y-%m-%d", $ts);
  504. $date =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/;
  505. $date2 =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/;
  506. }
  507. my $ret = ($ts && $date eq $date2);
  508. return $ret ? 1 : 0;
  509. }
  510. sub validate_time {
  511. my ($time) = @_;
  512. my $time2;
  513. # $ts is undefined if the parser fails.
  514. my $ts = str2time($time);
  515. if ($ts) {
  516. $time2 = time2str("%H:%M:%S", $ts);
  517. if ($time =~ /^(\d{1,2}):(\d\d)(?::(\d\d))?$/) {
  518. $time = sprintf("%02d:%02d:%02d", $1, $2, $3 || 0);
  519. }
  520. }
  521. my $ret = ($ts && $time eq $time2);
  522. return $ret ? 1 : 0;
  523. }
  524. sub is_7bit_clean {
  525. return $_[0] !~ /[^\x20-\x7E\x0A\x0D]/;
  526. }
  527. sub clean_text {
  528. my ($dtext) = shift;
  529. $dtext =~ s/[\x00-\x1F\x7F]+/ /g; # change control characters into a space
  530. return trim($dtext);
  531. }
  532. sub get_text {
  533. my ($name, $vars) = @_;
  534. my $template = Bugzilla->template_inner;
  535. $vars ||= {};
  536. $vars->{'message'} = $name;
  537. my $message;
  538. $template->process('global/message.txt.tmpl', $vars, \$message)
  539. || ThrowTemplateError($template->error());
  540. # Remove the indenting that exists in messages.html.tmpl.
  541. $message =~ s/^ //gm;
  542. return $message;
  543. }
  544. sub get_netaddr {
  545. my $ipaddr = shift;
  546. # Check for a valid IPv4 addr which we know how to parse
  547. if (!$ipaddr || $ipaddr !~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/) {
  548. return undef;
  549. }
  550. my $addr = unpack("N", pack("CCCC", split(/\./, $ipaddr)));
  551. my $maskbits = Bugzilla->params->{'loginnetmask'};
  552. # Make Bugzilla ignore the IP address if loginnetmask is set to 0
  553. return "0.0.0.0" if ($maskbits == 0);
  554. $addr >>= (32-$maskbits);
  555. $addr <<= (32-$maskbits);
  556. return join(".", unpack("CCCC", pack("N", $addr)));
  557. }
  558. sub disable_utf8 {
  559. if (Bugzilla->params->{'utf8'}) {
  560. binmode STDOUT, ':bytes'; # Turn off UTF8 encoding.
  561. }
  562. }
  563. 1;
  564. __END__
  565. =head1 NAME
  566. Bugzilla::Util - Generic utility functions for bugzilla
  567. =head1 SYNOPSIS
  568. use Bugzilla::Util;
  569. # Functions for dealing with variable tainting
  570. $rv = is_tainted($var);
  571. trick_taint($var);
  572. detaint_natural($var);
  573. detaint_signed($var);
  574. # Functions for quoting
  575. html_quote($var);
  576. url_quote($var);
  577. xml_quote($var);
  578. # Functions for decoding
  579. $rv = url_decode($var);
  580. # Functions that tell you about your environment
  581. my $is_cgi = i_am_cgi();
  582. my $net_addr = get_netaddr($ip_addr);
  583. my $urlbase = correct_urlbase();
  584. # Functions for searching
  585. $loc = lsearch(\@arr, $val);
  586. # Data manipulation
  587. ($removed, $added) = diff_arrays(\@old, \@new);
  588. # Functions for manipulating strings
  589. $val = trim(" abc ");
  590. ($removed, $added) = diff_strings($old, $new);
  591. $wrapped = wrap_comment($comment);
  592. # Functions for formatting time
  593. format_time($time);
  594. # Functions for dealing with files
  595. $time = file_mod_time($filename);
  596. # Cryptographic Functions
  597. $crypted_password = bz_crypt($password);
  598. $new_password = generate_random_password($password_length);
  599. # Validation Functions
  600. validate_email_syntax($email);
  601. validate_date($date);
  602. =head1 DESCRIPTION
  603. This package contains various utility functions which do not belong anywhere
  604. else.
  605. B<It is not intended as a general dumping group for something which
  606. people feel might be useful somewhere, someday>. Do not add methods to this
  607. package unless it is intended to be used for a significant number of files,
  608. and it does not belong anywhere else.
  609. =head1 FUNCTIONS
  610. This package provides several types of routines:
  611. =head2 Tainting
  612. Several functions are available to deal with tainted variables. B<Use these
  613. with care> to avoid security holes.
  614. =over 4
  615. =item C<is_tainted>
  616. Determines whether a particular variable is tainted
  617. =item C<trick_taint($val)>
  618. Tricks perl into untainting a particular variable.
  619. Use trick_taint() when you know that there is no way that the data
  620. in a scalar can be tainted, but taint mode still bails on it.
  621. B<WARNING!! Using this routine on data that really could be tainted defeats
  622. the purpose of taint mode. It should only be used on variables that have been
  623. sanity checked in some way and have been determined to be OK.>
  624. =item C<detaint_natural($num)>
  625. This routine detaints a natural number. It returns a true value if the
  626. value passed in was a valid natural number, else it returns false. You
  627. B<MUST> check the result of this routine to avoid security holes.
  628. =item C<detaint_signed($num)>
  629. This routine detaints a signed integer. It returns a true value if the
  630. value passed in was a valid signed integer, else it returns false. You
  631. B<MUST> check the result of this routine to avoid security holes.
  632. =back
  633. =head2 Quoting
  634. Some values may need to be quoted from perl. However, this should in general
  635. be done in the template where possible.
  636. =over 4
  637. =item C<html_quote($val)>
  638. Returns a value quoted for use in HTML, with &, E<lt>, E<gt>, and E<34> being
  639. replaced with their appropriate HTML entities.
  640. =item C<html_light_quote($val)>
  641. Returns a string where only explicitly allowed HTML elements and attributes
  642. are kept. All HTML elements and attributes not being in the whitelist are either
  643. escaped (if HTML::Scrubber is not installed) or removed.
  644. =item C<url_quote($val)>
  645. Quotes characters so that they may be included as part of a url.
  646. =item C<css_class_quote($val)>
  647. Quotes characters so that they may be used as CSS class names. Spaces
  648. are replaced by underscores.
  649. =item C<xml_quote($val)>
  650. This is similar to C<html_quote>, except that ' is escaped to &apos;. This
  651. is kept separate from html_quote partly for compatibility with previous code
  652. (for &apos;) and partly for future handling of non-ASCII characters.
  653. =item C<url_decode($val)>
  654. Converts the %xx encoding from the given URL back to its original form.
  655. =back
  656. =head2 Environment and Location
  657. Functions returning information about your environment or location.
  658. =over 4
  659. =item C<i_am_cgi()>
  660. Tells you whether or not you are being run as a CGI script in a web
  661. server. For example, it would return false if the caller is running
  662. in a command-line script.
  663. =item C<get_netaddr($ipaddr)>
  664. Given an IP address, this returns the associated network address, using
  665. C<Bugzilla->params->{'loginnetmask'}> as the netmask. This can be used
  666. to obtain data in order to restrict weak authentication methods (such as
  667. cookies) to only some addresses.
  668. =item C<correct_urlbase()>
  669. Returns either the C<sslbase> or C<urlbase> parameter, depending on the
  670. current setting for the C<ssl> parameter.
  671. =item C<use_attachbase()>
  672. Returns true if an alternate host is used to display attachments; false
  673. otherwise.
  674. =back
  675. =head2 Searching
  676. Functions for searching within a set of values.
  677. =over 4
  678. =item C<lsearch($list, $item)>
  679. Returns the position of C<$item> in C<$list>. C<$list> must be a list
  680. reference.
  681. If the item is not in the list, returns -1.
  682. =back
  683. =head2 Data Manipulation
  684. =over 4
  685. =item C<diff_arrays(\@old, \@new)>
  686. Description: Takes two arrayrefs, and will tell you what it takes to
  687. get from @old to @new.
  688. Params: @old = array that you are changing from
  689. @new = array that you are changing to
  690. Returns: A list of two arrayrefs. The first is a reference to an
  691. array containing items that were removed from @old. The
  692. second is a reference to an array containing items
  693. that were added to @old. If both returned arrays are
  694. empty, @old and @new contain the same values.
  695. =back
  696. =head2 String Manipulation
  697. =over 4
  698. =item C<trim($str)>
  699. Removes any leading or trailing whitespace from a string. This routine does not
  700. modify the existing string.
  701. =item C<diff_strings($oldstr, $newstr)>
  702. Takes two strings containing a list of comma- or space-separated items
  703. and returns what items were removed from or added to the new one,
  704. compared to the old one. Returns a list, where the first entry is a scalar
  705. containing removed items, and the second entry is a scalar containing added
  706. items.
  707. =item C<wrap_hard($string, $size)>
  708. Wraps a string, so that a line is I<never> longer than C<$size>.
  709. Returns the string, wrapped.
  710. =item C<wrap_comment($comment)>
  711. Takes a bug comment, and wraps it to the appropriate length. The length is
  712. currently specified in C<Bugzilla::Constants::COMMENT_COLS_WRAP>. Lines beginning
  713. with ">" are assumed to be quotes, and they will not be wrapped.
  714. The intended use of this function is to wrap comments that are about to be
  715. displayed or emailed. Generally, wrapped text should not be stored in the
  716. database.
  717. =item C<find_wrap_point($string, $maxpos)>
  718. Search for a comma, a whitespace or a hyphen to split $string, within the first
  719. $maxpos characters. If none of them is found, just split $string at $maxpos.
  720. The search starts at $maxpos and goes back to the beginning of the string.
  721. =item C<is_7bit_clean($str)>
  722. Returns true is the string contains only 7-bit characters (ASCII 32 through 126,
  723. ASCII 10 (LineFeed) and ASCII 13 (Carrage Return).
  724. =item C<disable_utf8()>
  725. Disable utf8 on STDOUT (and display raw data instead).
  726. =item C<clean_text($str)>
  727. Returns the parameter "cleaned" by exchanging non-printable characters with spaces.
  728. Specifically characters (ASCII 0 through 31) and (ASCII 127) will become ASCII 32 (Space).
  729. =item C<get_text>
  730. =over
  731. =item B<Description>
  732. This is a method of getting localized strings within Bugzilla code.
  733. Use this when you don't want to display a whole template, you just
  734. want a particular string.
  735. It uses the F<global/message.txt.tmpl> template to return a string.
  736. =item B<Params>
  737. =over
  738. =item C<$message> - The identifier for the message.
  739. =item C<$vars> - A hashref. Any variables you want to pass to the template.
  740. =back
  741. =item B<Returns>
  742. A string.
  743. =back
  744. =back
  745. =head2 Formatting Time
  746. =over 4
  747. =item C<format_time($time)>
  748. Takes a time, converts it to the desired format and appends the timezone
  749. as defined in editparams.cgi, if desired. This routine will be expanded
  750. in the future to adjust for user preferences regarding what timezone to
  751. display times in.
  752. This routine is mainly called from templates to filter dates, see
  753. "FILTER time" in Templates.pm. In this case, $format is undefined and
  754. the routine has to "guess" the date format that was passed to $dbh->sql_date_format().
  755. =item C<format_time_decimal($time)>
  756. Returns a number with 2 digit precision, unless the last digit is a 0. Then it
  757. returns only 1 digit precision.
  758. =back
  759. =head2 Files
  760. =over 4
  761. =item C<file_mod_time($filename)>
  762. Takes a filename and returns the modification time. It returns it in the format
  763. of the "mtime" parameter of the perl "stat" function.
  764. =back
  765. =head2 Cryptography
  766. =over 4
  767. =item C<bz_crypt($password)>
  768. Takes a string and returns a C<crypt>ed value for it, using a random salt.
  769. Please always use this function instead of the built-in perl "crypt"
  770. when initially encrypting a password.
  771. =begin undocumented
  772. Random salts are generated because the alternative is usually
  773. to use the first two characters of the password itself, and since
  774. the salt appears in plaintext at the beginning of the encrypted
  775. password string this has the effect of revealing the first two
  776. characters of the password to anyone who views the encrypted version.
  777. =end undocumented
  778. =item C<generate_random_password($password_length)>
  779. Returns an alphanumeric string with the specified length
  780. (10 characters by default). Use this function to generate passwords
  781. and tokens.
  782. =back
  783. =head2 Validation
  784. =over 4
  785. =item C<validate_email_syntax($email)>
  786. Do a syntax checking for a legal email address and returns 1 if
  787. the check is successful, else returns 0.
  788. Untaints C<$email> if successful.
  789. =item C<validate_date($date)>
  790. Make sure the date has the correct format and returns 1 if
  791. the check is successful, else returns 0.
  792. =back