123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- # -*- coding: utf8 -*-
- # CRAP5 - 5-bit Compact Representation of Alphabetical Patterns
- # Copyright © 2023 Nichlas Severinsen
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
- import sys
- BYTE_LENGTH = 0x8
- BASE_BINARY = 0x2
- CRAP_LENGTH = 0x5
- characterset = {
- b' ' : '00000',
- b'A' : '00001',
- b'B' : '00010',
- b'C' : '00011',
- b'D' : '00100',
- b'E' : '00101',
- b'F' : '00110',
- b'G' : '00111',
- b'H' : '01000',
- b'I' : '01001',
- b'J' : '01010',
- b'K' : '01011',
- b'L' : '01100',
- b'M' : '01101',
- b'N' : '01110',
- b'O' : '01111',
- b'P' : '10000',
- b'Q' : '10001',
- b'R' : '10010',
- b'S' : '10011',
- b'T' : '10100',
- b'U' : '10101',
- b'V' : '10110',
- b'W' : '10111',
- b'X' : '11000',
- b'Y' : '11001',
- b'Z' : '11010',
- b'.' : '11011',
- b',' : '11100',
- b'!' : '11101',
- b'?' : '11110',
- b'\n': '11111',
- }
- def encode(data: bytes) -> bytes:
- # After testing, it seems making a dict with the bitstring for every byte, and then accessing those
- # directly is faster than converting from bitstring to integer and then to bytes. So, this is what we do.
- bytedict = {'{0:08b}'.format(x): x.to_bytes(1, 'big') for x in range(0,BASE_BINARY**BYTE_LENGTH)}
- buffer = ''
- while (byte := data.read(1)):
- buffer += characterset[byte.upper()]
- # Continuously yield completed bytes
- if len(buffer) >= BYTE_LENGTH:
- yield bytedict[buffer[:BYTE_LENGTH]]
- buffer = buffer[BYTE_LENGTH:]
- # Yield remaining bits with padding
- if buffer:
- buffer += '0' * (BYTE_LENGTH - len(buffer))
- yield bytedict[buffer]
- def decode(data: bytes) -> bytes:
- # Invert charset for easy use
- charset = dict(zip(characterset.values(),characterset.keys()))
- # Create bitdict
- bitdict = {x.to_bytes(1, 'big'): '{0:08b}'.format(x) for x in range(0,BASE_BINARY**BYTE_LENGTH)}
- # Convert 5 and 5 bits back to their former ASCII representation.
- remainder = ''
- while (byte := data.read(1)):
- remainder += bitdict[byte]
- while len(remainder) > 5:
- yield charset[remainder[:CRAP_LENGTH]]
- remainder = remainder[CRAP_LENGTH:]
|