Problem with creating a Huffman code tree

Code: (Select All)

'Huffman Encoding



TYPE assignment

  CHAR AS _UNSIGNED _BYTE '<-- ASCII Character

  COUNT AS _UNSIGNED LONG '<-- Frequenzy of ASCII Chars (Counter)

END TYPE



DIM File AS STRING



File = "test.txt"



OPEN File FOR BINARY ACCESS READ AS #1

REDIM MEM(LOF(1) - 1) AS _UNSIGNED _BYTE

GET #1, , MEM()

CLOSE #1



' Step 1 - Calc ASCII Char Frequenzy

REDIM Table(0) AS assignment

CALC_Table Table(), MEM()



COLOR 11: PRINT " STEP 1 *** Calc ASCII Frequenzy ***"

COLOR 7

FOR i = 0 TO UBOUND(Table)

  PRINT Table(i).CHAR; " - "; Table(i).COUNT

NEXT i



OPEN "test_TABLE.txt" FOR OUTPUT AS #1

FOR i = 0 TO UBOUND(table)

  PRINT #1, HEX$(Table(i).CHAR) + " - " + LTRIM$(STR$((Table(i).COUNT)))

NEXT i

CLOSE #1



'SLEEP



' Step 2 - Huffman Tree create







SUB InsertElement (Array() AS assignment, Index AS _UNSIGNED LONG)

  DIM I AS _UNSIGNED LONG

  DIM Empty AS assignment



  IF Index > (UBOUND(Array) + 1) THEN EXIT SUB



  REDIM _PRESERVE Array(UBOUND(Array) + 1) AS assignment



  FOR I = UBOUND(Array) - 1 TO Index STEP -1

    Array(I + 1) = Array(I)

  NEXT I



  Array(Index) = Empty

END SUB



SUB RemoveElement (Array() AS assignment, Index AS _UNSIGNED LONG)

  DIM I AS _UNSIGNED LONG



  FOR I = Index TO UBOUND(Array) - 1

    Array(I) = Array(I + 1)

  NEXT I



  REDIM _PRESERVE Array(UBOUND(Array) - 1) AS assignment

END SUB



SUB CALC_Table (Table() AS assignment, Array() AS _UNSIGNED _BYTE)

  ' Step 1 - Calc ASCII Char Frequenzy

  DIM i AS _UNSIGNED LONG ' <- Counter for Array

  DIM r AS _UNSIGNED LONG ' <- Counter for Table

  DIM TableIDX AS _UNSIGNED LONG ' <- MAX Index for Table

  DIM NewEntry AS _UNSIGNED _BYTE ' <- becomes 1 if character is missing from table



  Table(TableIDX).CHAR = Array(i)

  FOR i = 0 TO UBOUND(Array)

    FOR r = 0 TO UBOUND(Table)



      ' If the character is already in the table,

      ' then increase the number of characters by 1,

      ' otherwise create a new entry.      '

      IF Array(i) = Table(r).CHAR THEN

        Table(r).COUNT = Table(r).COUNT + 1

        NewEntry = 0

        EXIT FOR

      ELSE

        NewEntry = 1

      END IF

    NEXT r



    ' New Entry in Table

    IF NewEntry = 1 THEN

      TableIDX = TableIDX + 1

      REDIM _PRESERVE Table(TableIDX) AS assignment

      Table(TableIDX).CHAR = Array(i)

      Table(TableIDX).COUNT = 1

    END IF

  NEXT i



  ' Sort table by counter of characters

  QUICKSORT Table(), LBOUND(Table), UBOUND(Table), 1

END SUB



SUB QUICKSORT (Array() AS assignment, LB AS _UNSIGNED LONG, UB AS _UNSIGNED LONG, Mode AS _UNSIGNED _BYTE)

  DIM P1 AS _UNSIGNED LONG

  DIM P2 AS _UNSIGNED LONG

  DIM REF AS assignment

  DIM temp AS assignment



  P1 = LB

  P2 = UB

  REF.CHAR = Array((P1 + P2) \ 2).CHAR

  REF.COUNT = Array((P1 + P2) \ 2).COUNT



  DO



    SELECT CASE Mode

      CASE 0:

        DO WHILE Array(P1).CHAR < REF.CHAR

          P1 = P1 + 1

        LOOP



        DO WHILE Array(P2).CHAR > REF.CHAR

          P2 = P2 - 1

        LOOP

      CASE 1:

        DO WHILE Array(P1).COUNT < REF.COUNT

          P1 = P1 + 1

        LOOP



        DO WHILE Array(P2).COUNT > REF.COUNT

          P2 = P2 - 1

        LOOP

    END SELECT



    IF P1 <= P2 THEN

      temp = Array(P1)

      Array(P1) = Array(P2)

      Array(P2) = temp



      P1 = P1 + 1

      P2 = P2 - 1

    END IF



  LOOP WHILE P1 <= P2



  IF LB < P2 THEN CALL QUICKSORT(Array(), LB, P2, Mode)

  IF P1 < UB THEN CALL QUICKSORT(Array(), P1, UB, Mode)

END SUB

' I think this is all you need Type HuffmanNode Char As _Unsigned _Byte Count As _Unsigned Long Left As Long ' 0-bit path Right As Long ' 1-bit path End Type ReDim tree(20) As HuffmanNode ' I just picked a random size, in practice you'll resize this as you create more nodes tree(2).Char = ASC("A") ' Leaf node tree(2).Count = 10 tree(2).Left = -1 ' -1 for the indexes indicates a leaf node tree(2).Right = -1 tree(3).Char = ASC("B") tree(3).Count = 15 tree(3).Left = -1 tree(3).Right = -1 ' This is an internal node created when creating the huffman tree tree(1).Char = 0 ' doesn't actually have a char tree(1).Count = 25 ' Addition of the counts of left/right tree(1).Left = 2 ' The "A" leaf node tree(1).Right = 3 ' The "B" leaf node ' When traversing the tree, you index the array with the .Left and .Right values Print "0-bit path of internal node 1: "; tree(tree(1).Left) Print "1-bit path of internal node 1: "; tree(tree(1).Right)

ReDim HuffmanQueue(256) As Long ' Assuming the starting leafnodes are entries 1 to 256 in tree() For i = 1 To 256 HuffmanQueue(i) = i Next ' When sorting you do this kind of thing If tree(HuffmanQueue(x)).Count > tree(HuffmanQueue(y)).Count Then ' Swapping two entries in the queue z = HuffmanQueue(y) HuffmanQueue(y) = HuffmanQueue(x) HuffmanQueue(x) = z End If ' Create a new tree() node and assign it into HuffmanQueue() tree(nextNode).Left = HuffmanQueue(1) tree(nextNode).Right = HuffmanQueue(2) tree(nextNode).Count = tree(HuffmanQueue(1)).Count + tree(HuffmanQueue(2)).Count HuffmanQueue(1) = nextNode HuffmanQueue(2) = -1 nextNode = nextNode + 1 ' The above requires your sort to ignore the -1 entries. ' You could also just shift all the entries in the HuffmanQueue() array up one so that the HuffmanQueue(2) entry is gone and the array is one less in size.