crap5.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. # -*- coding: utf8 -*-
  2. # CRAP5 - 5-bit Compact Representation of Alphabetical Patterns
  3. # Copyright © 2023 Nichlas Severinsen
  4. #
  5. # This program is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  17. import sys
  18. BYTE_LENGTH = 0x8
  19. BASE_BINARY = 0x2
  20. CRAP_LENGTH = 0x5
  21. characterset = {
  22. b' ' : '00000',
  23. b'A' : '00001',
  24. b'B' : '00010',
  25. b'C' : '00011',
  26. b'D' : '00100',
  27. b'E' : '00101',
  28. b'F' : '00110',
  29. b'G' : '00111',
  30. b'H' : '01000',
  31. b'I' : '01001',
  32. b'J' : '01010',
  33. b'K' : '01011',
  34. b'L' : '01100',
  35. b'M' : '01101',
  36. b'N' : '01110',
  37. b'O' : '01111',
  38. b'P' : '10000',
  39. b'Q' : '10001',
  40. b'R' : '10010',
  41. b'S' : '10011',
  42. b'T' : '10100',
  43. b'U' : '10101',
  44. b'V' : '10110',
  45. b'W' : '10111',
  46. b'X' : '11000',
  47. b'Y' : '11001',
  48. b'Z' : '11010',
  49. b'.' : '11011',
  50. b',' : '11100',
  51. b'!' : '11101',
  52. b'?' : '11110',
  53. b'\n': '11111',
  54. }
  55. def encode(data: bytes) -> bytes:
  56. # After testing, it seems making a dict with the bitstring for every byte, and then accessing those
  57. # directly is faster than converting from bitstring to integer and then to bytes. So, this is what we do.
  58. bytedict = {'{0:08b}'.format(x): x.to_bytes(1, 'big') for x in range(0,BASE_BINARY**BYTE_LENGTH)}
  59. buffer = ''
  60. while (byte := data.read(1)):
  61. buffer += characterset[byte.upper()]
  62. # Continuously yield completed bytes
  63. if len(buffer) >= BYTE_LENGTH:
  64. yield bytedict[buffer[:BYTE_LENGTH]]
  65. buffer = buffer[BYTE_LENGTH:]
  66. # Yield remaining bits with padding
  67. if buffer:
  68. buffer += '0' * (BYTE_LENGTH - len(buffer))
  69. yield bytedict[buffer]
  70. def decode(data: bytes) -> bytes:
  71. # Invert charset for easy use
  72. charset = dict(zip(characterset.values(),characterset.keys()))
  73. # Create bitdict
  74. bitdict = {x.to_bytes(1, 'big'): '{0:08b}'.format(x) for x in range(0,BASE_BINARY**BYTE_LENGTH)}
  75. # Convert 5 and 5 bits back to their former ASCII representation.
  76. remainder = ''
  77. while (byte := data.read(1)):
  78. remainder += bitdict[byte]
  79. while len(remainder) > 5:
  80. yield charset[remainder[:CRAP_LENGTH]]
  81. remainder = remainder[CRAP_LENGTH:]