QB64 Phoenix Edition
Alphabetical sort of characters within a string. - Printable Version

+- QB64 Phoenix Edition (https://qb64phoenix.com/forum)
+-- Forum: QB64 Rising (https://qb64phoenix.com/forum/forumdisplay.php?fid=1)
+--- Forum: Code and Stuff (https://qb64phoenix.com/forum/forumdisplay.php?fid=3)
+---- Forum: Help Me! (https://qb64phoenix.com/forum/forumdisplay.php?fid=10)
+---- Thread: Alphabetical sort of characters within a string. (/showthread.php?tid=2620)

Pages: 1 2 3 4 5 6 7


RE: Alphabetical sort of characters within a string. - bplus - 04-26-2024

@euklides yes very short but if you knew the word doesn't repeat letters or didn't care, it could be useful

+1 @Circlotron thanks for bringing up this topic, my anacode$ has been improved by it i am getting you out of 0 ;-)) yes every little improvement to code is the right mindset IMHO. you can be obsessed by worse things Smile


RE: Alphabetical sort of characters within a string. - bplus - 04-26-2024

looks like peteCode2$() is fastest sorter of letters in a word. anacode2$ was worse using mid$ = trick to save time, did improve orig 26 digit version of anaCode$ to be used in apps that look for words in the letters of bigger word.

Code: (Select All)
Option _Explicit
_Title "anaCode2$ versus peteCode2$ with dictionary 2" ' b+ 2024-04-25 speed test

' anaCode$ returned to original format of len = 26 letter counts but improved?
' this is most useful to use with games using letters from words

' anacode2$ is to compare apples to apples in the function return format

' looks like peteCode2$ has the fastest letter sorter for words

Dim a$(25000), p$(25000)

'''' check i made modifications correctly
'Dim test$(9)
'Dim As Integer i
'test$(0) = "grmaana"
'test$(1) = "angiogram"
'test$(2) = "naagrma"
'test$(3) = "telgram"
'test$(4) = "gramana"
'test$(5) = "gram"
'test$(6) = "nag"
'test$(7) = "tag"
'test$(8) = "am"
'test$(9) = "grip"
'For i = 0 To 9
'    Print test$(i), AnaCode2$(test$(i)), petecode2$(test$(i))
'Next
'End
''''  ok looks right

Dim i As Integer, j As Integer
Dim start#, w$, ana2Time#, peteTime#, anatime#

start# = Timer(.01)
For i = 1 To 5
    j = 0
    Open "WORDS.txt" For Input As #1
    While Not EOF(1)
        Input #1, w$
        j = j + 1
        p$(j) = petecode2$(w$)
    Wend
    Close
Next
peteTime# = Timer(.01) - start#

start# = Timer(.01)
For i = 1 To 5
    j = 0
    Open "WORDS.txt" For Input As #1
    While Not EOF(1)
        Input #1, w$
        j = j + 1
        a$(j) = AnaCode$(w$)
    Wend
    Close
Next
anatime# = Timer(.01) - start#

start# = Timer(.01)
For i = 1 To 5
    j = 0
    Open "WORDS.txt" For Input As #1
    While Not EOF(1)
        Input #1, w$
        j = j + 1
        a$(j) = AnaCode2$(w$)
    Wend
    Close
Next
ana2Time# = Timer(.01) - start#


Print "Anacode2 Time ="; ana2Time#, "PeteCode2 Time ="; peteTime#
Print "Original anaCode$ time ="; anatime#

Print "DIFF CHECK anaCcode2 versus PeteCode2"
For i = 1 To j ' checking for differences between array values
    If a$(i) <> p$(i) Then Print a$(i), p$(i)
Next
Print "END OF DIFF CHECK"

Print: Print " Check tail end of arrays:"
For i = j - 15 To j
    Print a$(i), p$(i)
Next

' back to original form of anacode using counts of each letter 1 to 26
' this limits numbers of repeated letters to 9 max
' this code also assumes all letters but need not be assumed capitals
Function AnaCode$ (wrd$) ' anaCode$ converts word to an Anagram pattern
    ' are assuming wrd$ is all letters
    Dim s$, w$
    Dim As Integer i, p
    s$ = String$(26, "0") ' number of A's in first, number of B's in 2nd, number of C's in third
    w$ = UCase$(wrd$) ' don't assume all caps  as expected
    For i = 1 To Len(wrd$)
        p = Asc(w$, i) - 64 ' A=1, B=2...
        Mid$(s$, p, 1) = _Trim$(Str$(Asc(s$, p) - 47)) ' finally!! better
    Next
    AnaCode$ = s$
End Function

' return sorted anagram code string for any word, call ucase$(wrd$) for all caps
Function AnaCode2$ (wrd$) ' anaCode$ converts word to an Anagram pattern
    ' wrd$ is assumed to be in all capitals!!!
    ' number of A's in first, number of B's in 2nd, number of C's in third
    Dim As Integer L(65 To 90), i, p
    Dim rtn$, w$
    w$ = UCase$(wrd$) ' don't assume all caps  as expected
    For i = 1 To Len(w$)
        p = Asc(w$, i) ' A=1, B=2...
        L(p) = L(p) + 1
    Next
    For i = 65 To 90
        If L(i) Then rtn$ = rtn$ + String$(L(i), i)
    Next
    AnaCode2$ = rtn$
End Function

Function petecode2$ (la$)
    Dim rtn$, i%, seed%, j%, a$
    a$ = UCase$(la$)
    rtn$ = Space$(Len(a$))
    For i% = 65 To 90
        seed% = InStr(a$, Chr$(i%))
        While seed%
            j% = j% + 1
            Mid$(rtn$, j%) = Chr$(i%)
            seed% = InStr(seed% + 1, a$, Chr$(i%))
        Wend
    Next
    petecode2$ = _Trim$(rtn$)
End Function

   


RE: Alphabetical sort of characters within a string. - bplus - 04-27-2024

studying steves string addition, i came up with a way to crack peteCode2

i generalized steves string add function called bind$ that works for string arrays not shared
Code: (Select All)
'  Function bind$ (arr$()) is my mod of steves code
Const limit = 200000
Dim Shared NumStr(limit) As String

MakeNumsStrings
t# = Timer(0.001)
o$ = AddStrings 'time how long it takes to add those strings together
t1# = Timer(0.001)
o1$ = bind$(NumStr()) 'and time how long it takes to just mid$ those strings, if you know the size
t2# = Timer(0.001)
o2$ = MidStrings$(Len(o$))
t3# = Timer(0.001)
Print "Results:"
Print "First 50: "; Left$(o$, 50)
Print "First 50: "; Left$(o1$, 50)
Print "First 50: "; Left$(o2$, 50)
Print "Last  50: "; Right$(o$, 50)
Print "Last  50: "; Right$(o1$, 50)
Print "Last  50: "; Right$(o2$, 50)
Print
Print
Print Using "It took ###.### seconds to AddStrings"; t1# - t#
Print Using "It took ###.### seconds to  testbind$"; t2# - t1#
Print Using "It took ###.### seconds to midstrings"; t3# - t2#


Sub MakeNumsStrings
    For i = 1 To limit
        NumStr(i) = _Trim$(Str$(i))
    Next
End Sub

Function AddStrings$
    For i = 1 To limit
        temp$ = temp$ + NumStr(i)
    Next
    AddStrings = temp$
End Function

Function MidStrings$ (size)
    temp$ = Space$(size)
    p = 1 'position in full string
    For i = 1 To limit
        Mid$(temp$, p) = NumStr(i)
        p = p + Len(NumStr(i))
    Next
    MidStrings = temp$
End Function

Function bind$ (arr$())
    Dim As Long lb, ub, i, size, p
    Dim rtn$
    lb = LBound(arr$)
    ub = UBound(arr$)
    For i = lb To ub
        size = size + Len(arr$(i))
    Next
    rtn$ = Space$(size)
    p = 1
    For i = lb To ub
        Mid$(rtn$, p) = arr$(i)
        p = p + Len(arr$(i))
    Next
    bind$ = rtn$
End Function

so using bind$() i modified anacode3$
Code: (Select All)
Option _Explicit
_Title "anaCode3$ versus peteCode2$ with WORDS.txt" ' b+ 2024-04-26 speed test

' anaCode$ returned to original format of len = 26 letter counts but improved?
' this is most useful to use with games using letters from words

' anacode3$ is to compare apples to apples in the function return format

' aha, i cracked petecode2() time!

Dim a$(25000), p$(25000)

'''' check i made modifications correctly
'Dim test$(9)
'Dim As Integer i
'test$(0) = "grmaana"
'test$(1) = "angiogram"
'test$(2) = "naagrma"
'test$(3) = "telgram"
'test$(4) = "gramana"
'test$(5) = "gram"
'test$(6) = "nag"
'test$(7) = "tag"
'test$(8) = "am"
'test$(9) = "grip"
'For i = 0 To 9
'    Print test$(i), AnaCode2$(test$(i)), petecode2$(test$(i))
'Next
'End
''''  ok looks right

Dim i As Integer, j As Integer
Dim start#, w$, ana2Time#, peteTime#, anatime#

start# = Timer(.01)
For i = 1 To 5
    j = 0
    Open "WORDS.txt" For Input As #1
    While Not EOF(1)
        Input #1, w$
        j = j + 1
        p$(j) = petecode2$(w$)
    Wend
    Close
Next
peteTime# = Timer(.01) - start#

start# = Timer(.01)
For i = 1 To 5
    j = 0
    Open "WORDS.txt" For Input As #1
    While Not EOF(1)
        Input #1, w$
        j = j + 1
        a$(j) = AnaCode$(w$)
    Wend
    Close
Next
anatime# = Timer(.01) - start#

start# = Timer(.01)
For i = 1 To 5
    j = 0
    Open "WORDS.txt" For Input As #1
    While Not EOF(1)
        Input #1, w$
        j = j + 1
        a$(j) = AnaCode3$(w$)
    Wend
    Close
Next
ana2Time# = Timer(.01) - start#


Print "Anacode3 Time ="; ana2Time#, "PeteCode2 Time ="; peteTime#
Print "Original anaCode$ time ="; anatime#

Print "DIFF CHECK anaCcode3 versus PeteCode2"
For i = 1 To j ' checking for differences between array values
    If a$(i) <> p$(i) Then Print a$(i), p$(i)
Next
Print "END OF DIFF CHECK"

Print: Print " Check tail end of arrays:"
For i = j - 15 To j
    Print a$(i), p$(i)
Next

' back to original form of anacode using counts of each letter 1 to 26
' this limits numbers of repeated letters to 9 max
' this code also assumes all letters but need not be assumed capitals
Function AnaCode$ (wrd$) ' anaCode$ converts word to an Anagram pattern
    ' are assuming wrd$ is all letters
    Dim s$, w$
    Dim As Integer i, p
    s$ = String$(26, "0") ' number of A's in first, number of B's in 2nd, number of C's in third
    w$ = UCase$(wrd$) ' don't assume all caps  as expected
    For i = 1 To Len(wrd$)
        p = Asc(w$, i) - 64 ' A=1, B=2...
        Mid$(s$, p, 1) = _Trim$(Str$(Asc(s$, p) - 47)) ' finally!! better
    Next
    AnaCode$ = s$
End Function

' return sorted anagram code string for any word, call ucase$(wrd$) for all caps
Function AnaCode2$ (wrd$) ' anaCode$ converts word to an Anagram pattern
    ' wrd$ is assumed to be in all capitals!!!
    ' number of A's in first, number of B's in 2nd, number of C's in third
    Dim As Integer L(65 To 90), i, p
    Dim rtn$, w$
    w$ = UCase$(wrd$) ' don't assume all caps  as expected
    For i = 1 To Len(w$)
        p = Asc(w$, i) ' A=1, B=2...
        L(p) = L(p) + 1
    Next
    For i = 65 To 90
        If L(i) Then rtn$ = rtn$ + String$(L(i), i)
    Next
    AnaCode2$ = rtn$
End Function

Function AnaCode3$ (wrd$) ' anaCode$ converts word to an Anagram pattern
    ' wrd$ is assumed to be in all capitals!!!
    ' number of A's in first, number of B's in 2nd, number of C's in third
    Dim As Integer L(65 To 90), i, p
    Dim rtn$, w$
    w$ = UCase$(wrd$) ' don't assume all caps  as expected
    For i = 1 To Len(w$)
        p = Asc(w$, i) ' A=1, B=2...
        L(p) = L(p) + 1
    Next
    Dim t$(65 To 90)
    For i = 65 To 90
        If L(i) Then t$(i) = String$(L(i), i)
    Next
    AnaCode3$ = bind$(t$())
End Function

Function petecode2$ (la$)
    Dim rtn$, i%, seed%, j%, a$
    a$ = UCase$(la$)
    rtn$ = Space$(Len(a$))
    For i% = 65 To 90
        seed% = InStr(a$, Chr$(i%))
        While seed%
            j% = j% + 1
            Mid$(rtn$, j%) = Chr$(i%)
            seed% = InStr(seed% + 1, a$, Chr$(i%))
        Wend
    Next
    petecode2$ = _Trim$(rtn$)
End Function

Function bind$ (arr$())
    Dim As Long lb, ub, i, size, p
    Dim rtn$
    lb = LBound(arr$)
    ub = UBound(arr$)
    For i = lb To ub
        size = size + Len(arr$(i))
    Next
    rtn$ = Space$(size)
    p = 1
    For i = lb To ub
        Mid$(rtn$, p) = arr$(i)
        p = p + Len(arr$(i))
    Next
    bind$ = rtn$
End Function

   

i honestly thought it wouldn't work but it did.


RE: Alphabetical sort of characters within a string. - SMcNeill - 04-27-2024

(04-27-2024, 01:44 AM)bplus Wrote: studying steves string addition, i came up with a way to crack peteCode2

...

i honestly thought it wouldn't work but it did.

Bada Bing!! You're welcome, oh ye of little faith. Big Grin

Now your code is beginning to look more like the one which I shared from the beginning for you. https://qb64phoenix.com/forum/showthread.php?tid=2620&pid=24733#pid24733

Big Grin Big Grin Wink


RE: Alphabetical sort of characters within a string. - Circlotron - 04-27-2024

What I have managed to do for my anagram finder to speed things up a whole lot is to have a the big word list, also a number of smaller files, each one containing words of equal length, also matching files containing L-R sorted versions of the same words in the same order. These files are named 3,4,5 etc and 3S,4S,5S etc. When a word from the main file is read, the length of that word opens the corresponding word length file and the matching sorted word file. These are a lot shorter than the big file, so a full scan of these is way quicker than a full length file. What's more, both the equal length word file and it's corresponding sorted file are read together, so when a sorted match is found, the corresponding anagram is read directly from the equal length file. The point is, the words being tested don't have to be sorted repeatedly on the fly, this is already done and stored in the sorted file. Edit -> These changes have sped things up by a factor of 9.2 times.


RE: Alphabetical sort of characters within a string. - bplus - 04-27-2024

(04-27-2024, 02:42 AM)SMcNeill Wrote:
(04-27-2024, 01:44 AM)bplus Wrote: studying steves string addition, i came up with a way to crack peteCode2

...

i honestly thought it wouldn't work but it did.

Bada Bing!! You're welcome, oh ye of little faith. Big Grin

Now your code is beginning to look more like the one which I shared from the beginning for you. it would also complain of the same "invalid value"

Big Grin Big Grin Wink

+1 ok this has been quite an adventure past couple days

i would probably have been onto it quicker had you shared the file you were using with code in zip but still a hard won lesson will be likely remembered better/longer.

i wonder if that trick will help the other version of anaCode with all the letter counts or PeteCode ?


RE: Alphabetical sort of characters within a string. - bplus - 04-27-2024

(04-27-2024, 06:29 AM)Circlotron Wrote: What I have managed to do for my anagram finder to speed things up a whole lot is to have a the big word list, also a number of smaller files, each one containing words of equal length, also matching files containing L-R sorted versions of the same words in the same order. These files are named 3,4,5 etc and 3S,4S,5S etc. When a word from the main file is read, the length of that word opens the corresponding word length file and the matching sorted word file. These are a lot shorter than the big file, so a full scan of these is way quicker than a full length file. What's more, both the equal length word file and it's corresponding sorted file are read together, so when a sorted match is found, the corresponding anagram is read directly from the equal length file. The point is, the words being tested don't have to be sorted repeatedly on the fly, this is already done and stored in the sorted file. Edit -> These changes have sped things up by a factor of 9.2 times.

i did the Rosetta Code challenge for anagrams about 7 years ago, steve too so don't remember exactly what was done then... i'd be inclinded to write an 2 word array with the anaCode for the first word then the word for second, then sort it by first word key, so all like anacodes are bunched together and just read down the array looking for the biggest bunch.


RE: Alphabetical sort of characters within a string. - bplus - 04-27-2024

(04-27-2024, 01:46 PM)bplus Wrote:
(04-27-2024, 02:42 AM)SMcNeill Wrote:
(04-27-2024, 01:44 AM)bplus Wrote: studying steves string addition, i came up with a way to crack peteCode2

...

i honestly thought it wouldn't work but it did.

Bada Bing!! You're welcome, oh ye of little faith. Big Grin

Now your code is beginning to look more like the one which I shared from the beginning for you. it would also complain of the same "invalid value"

Big Grin Big Grin Wink

+1 ok this has been quite an adventure past couple days

i would probably have been onto it quicker had you shared the file you were using with code in zip but still a hard won lesson will be likely remembered better/longer.

i wonder if that trick will help the other version of anaCode with all the letter counts or PeteCode ?

ah altAnaCode$ now edges out PeteCode2$ barely
Code: (Select All)
Function altAnaCode$ (wrd$) ' anaCode$ converts word to an Anagram pattern
    Dim As Integer L(65 To 90), i, p
    Dim t$(65 To 90)
    Dim rtn$, w$
    w$ = UCase$(wrd$) ' don't assume all caps  as expected
    For i = 1 To Len(w$)
        p = Asc(w$, i) ' A=1, B=2...
        L(p) = L(p) + 1
    Next
    For i = 65 To 90
        t$(i) = LTrim$(Str$(L(i)))
    Next
    altAnaCode$ = bind$(t$())
End Function

can petecode2$ also benefit from bind$ trick? it seems like it will be going backwards?


RE: Alphabetical sort of characters within a string. - SMcNeill - 04-27-2024

(04-27-2024, 01:46 PM)bplus Wrote:
(04-27-2024, 02:42 AM)SMcNeill Wrote:
(04-27-2024, 01:44 AM)bplus Wrote: studying steves string addition, i came up with a way to crack peteCode2

...

i honestly thought it wouldn't work but it did.

Bada Bing!!  You're welcome, oh ye of little faith.  Big Grin

Now your code is beginning to look more like the one which I shared from the beginning for you.  it would also complain of the same "invalid value"

Big Grin  Big Grin Wink

+1 ok this has been quite an adventure past couple days

i would probably have been onto it quicker had you shared the file you were using with code in zip but still a hard won lesson will be likely remembered better/longer.

i wonder if that trick will help the other version of anaCode with all the letter counts or PeteCode ?

I didn't have your file, but I figured you'd just swap dictionary names for testing purposes.  Big Grin

Guess that'll show me!   Big Grin


RE: Alphabetical sort of characters within a string. - bplus - 04-27-2024

(04-27-2024, 03:31 PM)SMcNeill Wrote:
(04-27-2024, 01:46 PM)bplus Wrote:
(04-27-2024, 02:42 AM)SMcNeill Wrote: Bada Bing!!  You're welcome, oh ye of little faith.  Big Grin

Now your code is beginning to look more like the one which I shared from the beginning for you.  it would also complain of the same "invalid value"

Big Grin  Big Grin Wink

+1 ok this has been quite an adventure past couple days

i would probably have been onto it quicker had you shared the file you were using with code in zip but still a hard won lesson will be likely remembered better/longer.

i wonder if that trick will help the other version of anaCode with all the letter counts or PeteCode ?

I didn't have your file, but I figured you'd just swap dictionary names for testing purposes.  Big Grin

Guess that'll show me!   Big Grin

coulda, shoulda, must of been feel'n lazy...

holy cow! Bind$() puts PeteCode3$ back on top!
compare
Code: (Select All)
Function petecode2$ (la$)
    Dim rtn$, i%, seed%, j%, a$
    a$ = UCase$(la$)
    rtn$ = Space$(Len(a$))
    For i% = 65 To 90
        seed% = InStr(a$, Chr$(i%))
        While seed%
            j% = j% + 1
            Mid$(rtn$, j%) = Chr$(i%)
            seed% = InStr(seed% + 1, a$, Chr$(i%))
        Wend
    Next
    petecode2$ = _Trim$(rtn$)
End Function

Function petecode3$ (la$)
    Dim rtn$, i%, seed%, j%, a$
    Dim t$(1 To Len(la$))
    a$ = UCase$(la$)
    For i% = 65 To 90
        seed% = InStr(a$, Chr$(i%))
        While seed%
            j% = j% + 1
            t$(j%) = Chr$(i%)
            seed% = InStr(seed% + 1, a$, Chr$(i%))
        Wend
    Next
    petecode3$ = bind$(t$())
End Function
petecode2$ was already using mid$ trick just not en mass as bind$() does it. you'd think the extra steps to save all the strings into an array to bind and extra call to bind would take longer, but no!

oh got to clean code and run tests with browser off...