example.pl 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. package config;
  2. use strict;
  3. # To enable use of unicode characters
  4. use utf8;
  5. # You can change minimum word length allowed for a word to be added to list.
  6. $dicelister::minimum_word_length = 4;
  7. @dicelister::data_sources = (
  8. # Some file with literature text, article body text etc.
  9. # to extract words from. Add as many random text as you want
  10. # and edit later if you need.
  11. #"file://${dicelister::data_dir}/data.txt",
  12. # You can add any article, text URL, as many as you want.
  13. # It will extract word from that URL too.
  14. # Here are some bird related Wikipedia article URLs as an example.
  15. "https://en.wikipedia.org/wiki/Bird",
  16. "https://en.wikipedia.org/wiki/Ring_ouzel",
  17. "https://en.wikipedia.org/wiki/Common_blackbird",
  18. "https://en.wikipedia.org/wiki/True_thrush",
  19. "https://en.wikipedia.org/wiki/Song_thrush",
  20. "https://en.wikipedia.org/wiki/Olive_thrush",
  21. "https://en.wikipedia.org/wiki/Black-billed_thrush",
  22. "https://en.wikipedia.org/wiki/Black-headed_bulbul",
  23. "https://en.wikipedia.org/wiki/Passerine",
  24. "https://en.wikipedia.org/wiki/Common_cuckoo",
  25. "https://en.wikipedia.org/wiki/Eurasian_sparrowhawk",
  26. "https://en.wikipedia.org/wiki/Bird_of_prey",
  27. "https://en.wikipedia.org/wiki/Eurasian_eagle-owl",
  28. "https://en.wikipedia.org/wiki/Golden_eagle",
  29. "https://en.wikipedia.org/wiki/Peregrine_falcon",
  30. "https://en.wikipedia.org/wiki/Galliformes",
  31. "https://en.wikipedia.org/wiki/Wild_turkey",
  32. "https://en.wikipedia.org/wiki/Prairie_chicken",
  33. "https://en.wikipedia.org/wiki/Northern_goshawk",
  34. "https://en.wikipedia.org/wiki/Domestic_turkey",
  35. "https://en.wikipedia.org/wiki/Bald_eagle",
  36. "https://en.wikipedia.org/wiki/Kingbird",
  37. "https://en.wikipedia.org/wiki/Old_World_quail",
  38. "https://en.wikipedia.org/wiki/Malleefowl",
  39. "https://en.wikipedia.org/wiki/Anseriformes",
  40. "https://en.wikipedia.org/wiki/Anhimidae",
  41. "https://en.wikipedia.org/wiki/Magpie_goose",
  42. "https://en.wikipedia.org/wiki/Duck",
  43. "https://en.wikipedia.org/wiki/Heritage_turkey",
  44. "https://en.wikipedia.org/wiki/Ocellated_turkey",
  45. "https://en.wikipedia.org/wiki/Wattle_(anatomy)",
  46. "https://en.wikipedia.org/wiki/American_white_pelican",
  47. "https://en.wikipedia.org/wiki/California_condor",
  48. # Adding words from string is also possible. Putting all 7000+ words
  49. # in here isn't probably ideal. A file (shown above) would be more
  50. # appropriate. So this is just to show you as an example.
  51. #'The quick brown fox jumps over the lazy dog.'
  52. );
  53. # List symbols, characters or words you don't want to appear in the word list.
  54. # This adds to the default list available in @exclude_strings on dicelister.pl
  55. @dicelister::exclude_strings = (@dicelister::exclude_strings, (
  56. '`',
  57. # I've looked through the rough wordlist and found some characters
  58. # that are not easy to type for everyone.
  59. # So I listed them here.
  60. 'â',
  61. 'å',
  62. 'ã',
  63. 'Ã',
  64. 'ä',
  65. 'ª',
  66. '±',
  67. '¨',
  68. 'Ÿ',
  69. '–',
  70. '€',
  71. '“',
  72. 'ˆ',
  73. '’',
  74. '”',
  75. 'œ',
  76. '†',
  77. '¾',
  78. '²',
  79. '«',
  80. '¶',
  81. '¡',
  82. '¼',
  83. # This came in somehow. Not a word, so added it.
  84. 's42815',
  85. ));
  86. # You can override almost any function used in the dicelister.pl file.
  87. # This is to aid in finetuning the output as you like it.
  88. # This is an example to detect Bangla numbers. Feel free to comment it.
  89. sub dicelister::is_numeric {
  90. my $val = shift;
  91. if ( defined $val ) {
  92. return $val =~ /^\d|১|২|৩|৪|৫|৬|৭|৮|৯|০+$/ ? 1 : 0;
  93. }
  94. }