necklace
/
CRAP5


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
							# -*- coding: utf8 -*-

# CRAP5 - 5-bit Compact Representation of Alphabetical Patterns
# Copyright © 2023 Nichlas Severinsen
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.


import sys


BYTE_LENGTH = 0x8
BASE_BINARY = 0x2
CRAP_LENGTH = 0x5


characterset = {
    b' ' : '00000',
    b'A' : '00001',
    b'B' : '00010',
    b'C' : '00011',
    b'D' : '00100',
    b'E' : '00101',
    b'F' : '00110',
    b'G' : '00111',
    b'H' : '01000',
    b'I' : '01001',
    b'J' : '01010',
    b'K' : '01011',
    b'L' : '01100',
    b'M' : '01101',
    b'N' : '01110',
    b'O' : '01111',
    b'P' : '10000',
    b'Q' : '10001',
    b'R' : '10010',
    b'S' : '10011',
    b'T' : '10100',
    b'U' : '10101',
    b'V' : '10110',
    b'W' : '10111',
    b'X' : '11000',
    b'Y' : '11001',
    b'Z' : '11010',
    b'.' : '11011',
    b',' : '11100',
    b'!' : '11101',
    b'?' : '11110',
    b'\n': '11111',
}


def encode(data: bytes) -> bytes:

    # After testing, it seems making a dict with the bitstring for every byte, and then accessing those
    # directly is faster than converting from bitstring to integer and then to bytes. So, this is what we do.

    bytedict = {'{0:08b}'.format(x): x.to_bytes(1, 'big') for x in range(0,BASE_BINARY**BYTE_LENGTH)}
    buffer = ''

    while (byte := data.read(1)):

        buffer += characterset[byte.upper()]

        # Continuously yield completed bytes

        if len(buffer) >= BYTE_LENGTH:
            yield bytedict[buffer[:BYTE_LENGTH]]
            buffer = buffer[BYTE_LENGTH:]

    # Yield remaining bits with padding

    if buffer:
        buffer += '0' * (BYTE_LENGTH - len(buffer))
        yield bytedict[buffer]


def decode(data: bytes) -> bytes:

    # Invert charset for easy use
    charset = dict(zip(characterset.values(),characterset.keys()))

    # Create bitdict
    bitdict = {x.to_bytes(1, 'big'): '{0:08b}'.format(x) for x in range(0,BASE_BINARY**BYTE_LENGTH)}

    # Convert 5 and 5 bits back to their former ASCII representation.
    remainder = ''

    while (byte := data.read(1)):

        remainder += bitdict[byte]

        while len(remainder) > 5:
            yield charset[remainder[:CRAP_LENGTH]]
            remainder = remainder[CRAP_LENGTH:]