Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
How to format a While... Wend correctly?
#21
(02-29-2024, 12:28 PM)bplus Wrote: For me the main concern was not taking anytime to load the word file let alone the word and definitions file at the start of the game.

The Binary search does not take a noticable amount of time for a word lookup, the human player is way too slow to need a slew of words searched at once. Maybe different with a spellchecker.

Load time is even less than search time:

Code: (Select All)
dict$ = "Collins.txt"

Screen _NewImage(800, 600, 32)

Type Dict_Type
    As String Word, Definition
End Type

ReDim Shared Dict(1000000) As Dict_Type

load_time1# = Timer
temp$ = _ReadFile$(dict$)
sp = 1
Do
    ep = InStr(sp, temp$, Chr$(13) + Chr$(10))
    temp1$ = Mid$(temp$, sp, ep - sp)
    'Print temp1$
    l = InStr(temp1$, Chr$(9)) 'tab separated data file

    count = count + 1
    Dict(count).Word = _Trim$(Left$(temp1$, l - 1))
    Dict(count).Definition = _Trim$(Mid$(temp1$, l + 1))
    ' Print Dict(count).Word
    sp = ep + 2
    '  Sleep
Loop Until sp >= Len(temp$)
load_time2# = Timer

ReDim _Preserve Dict(count) As Dict_Type

Dim Junk(10) As String
Data cheese,dog,cat,elephant,rootbeer,house,food,drink,zebra,mouse
For i = 1 To 10
    Read Junk(i)
Next


t# = Timer
For k = 1 To 10000
    For i = 1 To 10
        f = FindWord(Junk(i))
        If k = 1 Then 'no need to scroll the screen and print the words repeatedly
            If f = 0 Then
                Print Junk(i), "Word not found"
            Else
                Print Junk(i), Dict(FindWord(Junk(i))).Definition
            End If
        End If
    Next
Next
t1# = Timer

Print Using "###.######## seconds to load dictionary with ###,###,### words."; loadtimer2# - loadtime1#, count
Print Using "###.######## seconds to find #### words and definitions, ###,###,### repeated times."; t1# - t#, i - 1, k - 1

Function FindWord (word$)
    Dim As Long low, hi, test
    low = 1: hi = UBound(Dict)
    While low <= hi
        test = Int((low + hi) / 2)
        Select Case _StriCmp(Dict(test).Word, word$)
            Case 0
                'Print "found"; test
                FindWord = test: Exit Function
            Case -1
                'Print "low"; Dict(test).Word
                low = test + 1
            Case 1
                'Print "high"; Dict(test).Word
                hi = test - 1
        End Select
    Wend
End Function

   

0 seconds to load.
0.05 seconds to look up 100,000 entries and determine if they're in the dictionary, or not.
Reply
#22
Hey @PhilOfPerth -- Are you certain that this dictionary is 100% alphabetic, and that these words don't have any typos in them? Particularly a typo which might cause a word to be out of order?

From the screenshot above, "zebra" isn't found.

I edited my code to add in a simple little CombSort routine, to make certain our lists were alphabetical, and then PRESTO -- "zebra" exists once again and gives me a definition.

Code: (Select All)
dict$ = "Collins.txt"

Screen _NewImage(800, 600, 32)

Type Dict_Type
As String Word, Definition
End Type

ReDim Shared Dict(1000000) As Dict_Type

load_time1# = Timer
temp$ = _ReadFile$(dict$)
sp = 1
count = -1
Do
ep = InStr(sp, temp$, Chr$(13) + Chr$(10))
temp1$ = Mid$(temp$, sp, ep - sp)
l = InStr(temp1$, Chr$(9)) 'tab separated data file
count = count + 1
If count > 0 Then 'first two lines in the data file are garbage.
Dict(count).Word = _Trim$(Left$(temp1$, l - 1))
Dict(count).Definition = _Trim$(Mid$(temp1$, l))
End If
sp = ep + 2
Loop Until sp >= Len(temp$)
ReDim _Preserve Dict(count) As Dict_Type
combsort
load_time2# = Timer

Dim Junk(10) As String
Data cheese,dog,cat,elephant,rootbeer,house,food,drink,mouse,zebra
For i = 1 To 10
Read Junk(i)
Next


t# = Timer
For k = 1 To 10000
For i = 1 To 10
f = FindWord(Junk(i))
If k = 1 Then 'no need to scroll the screen and print the words repeatedly
If f = 0 Then
Print Junk(i), "Word not found"
Else
Print Junk(i), Dict(FindWord(Junk(i))).Definition
End If
End If
Next
Next
t1# = Timer

Print Using "###.######## seconds to load dictionary with ###,###,### words."; loadtimer2# - loadtime1#, count
Print Using "###.######## seconds to find #### words and definitions, ###,###,### repeated times."; t1# - t#, i - 1, k - 1

Function FindWord (word$)
Dim As Long low, hi, test
low = 1: hi = UBound(Dict)
While low <= hi
test = Int((low + hi) / 2)
Select Case _StriCmp(Dict(test).Word, word$)
Case 0
'Print "found"; test
FindWord = test: Exit Function
Case -1
'Print "low"; Dict(test).Word
low = test + 1
Case 1
'Print "high"; Dict(test).Word
hi = test - 1
End Select
Wend
End Function

Sub combsort
'This is the routine I tend to use personally and promote.
'It's short, simple, and easy to implement into code.

gap = UBound(Dict)

Do
gap = 10 * gap \ 13
If gap < 1 Then gap = 1
i = 0
swapped = 0
Do
If Dict(i).Word > Dict(i + gap).Word Then
Swap Dict(i).Word, Dict(i + gap).Word
Swap Dict(i).Definition, Dict(i + gap).Definition
swapped = -1
End If
i = i + 1
Loop Until i + gap > UBound(Dict)
Loop Until gap = 1 And swapped = 0
End Sub


Also need to point out that after looking at the actual file, we need to skip the first two lines of this file as they're header information and not part of the wordlist itself. Code above also accounts for that change. Wink
Reply
#23
cat = to vomit??? that's what a cat is?

I gotta check my Collin's now ;-))
b = b + ...
Reply
#24
CAT - crap!
   

ZEBRA looks OK
   

If takes 0 time to load, I will try your method next time I go to do something with dictionary @Steve.
b = b + ...
Reply
#25
Zebra is in there, and it's an okay definition.  The problem is it doesn't get found in an unsorted binary search.   My guess is that one of the words that is in that chain of searches is spelt wrong, and that's throwing the search off.

say we have a list of 10 things.  A B C D E F H G I J, and we want to find H

Search starts at E...  less than H, so we look half-way higher
G is the mid-point of the remainder... it's less than H, so we look half-way higher
I is the mid-point of what remains...  it's greater than H, so we look half-way lower
but there is no mid-point between G and I!

"Letter not found!"

It's in there, but since it's out of order or typoed or whatever, it throws the binary search method off.

Doing a sort, "zebra" is found and exists once again.  Without sorting, it's not, for whatever odd reason.
Reply
#26
Here is my RA method tested with zebra and some of your words, no rootbeer here either but does find zebra OK
   

Possible Plil's list was reassembled from his letter lists???

Mine came straight from Collins 2019 Scrabble which came from you Steve, I think?
b = b + ...
Reply
#27
That's what the first two lines of the file tells us: 

"Collins Scrabble Words (2019). 279,496 words with definitions."
(blank spacer)

Does yours have those 2 lines at the top of it?  They might've been the difference in the "zebra" search being off somehow.
Reply
#28
I had removed the first two lines, my Collins.txt starts with AA word...

Steve I tested your code, but do not have version 3.12 installed yet so made my own _ReadFile function.

Your code seems to wrap around and I had to put a count = 279,496 limit to stop the thing from going around over and over WTH???
Code: (Select All)
dict$ = "Collins.txt"

Screen _NewImage(800, 600, 32)

Type Dict_Type
    As String Word, Definition
End Type

ReDim Shared Dict(3000000) As Dict_Type

load_time1# = Timer
temp$ = readfile$(dict$) ' <<<< made my own

'Print Mid$(temp$, 1, 500) ' test load
'Sleep
Dim As Long l, count, sp, ep
sp = 1
Do
    ep = InStr(sp, temp$, Chr$(13) + Chr$(10))
    temp1$ = Mid$(temp$, sp, ep - sp)
    'Print temp1$
    l = InStr(temp1$, Chr$(9)) 'tab separated data file

    count = count + 1


    Dict(count).Word = _Trim$(Left$(temp1$, l - 1))
    Dict(count).Definition = _Trim$(Mid$(temp1$, l + 1))
    'Print count, Dict(count).Word, sp, Len(temp$) ' <<<< 3,000,000 not enough WTH??? it's wrapping around and starting over??

    sp = ep + 2

Loop Until sp >= Len(temp$) Or count > 279496 ' <<<<<<  stop the insane wrap around!!!
load_time2# = Timer

ReDim _Preserve Dict(count) As Dict_Type

Dim Junk(10) As String
Data cheese,dog,cat,elephant,rootbeer,house,food,drink,zebra,mouse
For i = 1 To 10
    Read Junk(i)
Next


t# = Timer
For k = 1 To 10000
    For i = 1 To 10
        f = FindWord(Junk(i))
        If k = 1 Then 'no need to scroll the screen and print the words repeatedly
            If f = 0 Then
                Print Junk(i), "Word not found"
            Else
                Print Junk(i), Dict(FindWord(Junk(i))).Definition
            End If
        End If
    Next
Next
t1# = Timer

Print Using "###.######## seconds to load dictionary with ###,###,### words."; loadtimer2# - loadtime1#, count
Print Using "###.######## seconds to find #### words and definitions, ###,###,### repeated times."; t1# - t#, i - 1, k - 1

Function FindWord (word$)
    Dim As Long low, hi, test
    low = 1: hi = UBound(Dict)
    While low <= hi
        test = Int((low + hi) / 2)
        Select Case _StriCmp(Dict(test).Word, word$)
            Case 0
                'Print "found"; test
                FindWord = test: Exit Function
            Case -1
                'Print "low"; Dict(test).Word
                low = test + 1
            Case 1
                'Print "high"; Dict(test).Word
                hi = test - 1
        End Select
    Wend
End Function

Function readfile$ (f$)
    If _FileExists(f$) Then
        Open f$ For Binary As #1
        buff$ = Space$(LOF(1))
        Get #1, , buff$
        readfile$ = buff$
        Close #1
    End If
End Function

Zebra is found by your code and it does work well once the wrap around was fixed???
b = b + ...
Reply
#29
It's an overflow error.  /sigh

Add this one line to the top of the code and try it:

Code: (Select All)
DefLng A-Z

s is a SINGLE by default.  The overall dictionary is larger than a single can hold, so it reads the first 255000 or so words and definition and then says, "Blah!!!"

The sort isn't needed.  It simple needs to not overflow so that it'll load and parse that list properly for us.

Code: (Select All)
dict$ = "Collins.txt"
DefLng A-Z
Screen _NewImage(800, 600, 32)

Type Dict_Type
    As String Word, Definition
End Type

ReDim Shared Dict(1000000) As Dict_Type

load_time1# = Timer
temp$ = _ReadFile$(dict$)
sp = 1
Do
    ep = InStr(sp, temp$, Chr$(10))
    temp1$ = Mid$(temp$, sp, ep - sp)
    If temp1$ = "" Then Exit Do
    l = InStr(temp1$, Chr$(9)) 'tab separated data file
    count = count + 1
    Dict(count).Word = _Trim$(Left$(temp1$, l - 1))
    Dict(count).Definition = _Trim$(Mid$(temp1$, l))
    sp = ep + 1
Loop Until sp >= Len(temp$)
ReDim _Preserve Dict(count) As Dict_Type
load_time2# = Timer

Dim Junk(10) As String
Data cheese,dog,cat,elephant,rootbeer,house,food,drink,mouse,zebra
For i = 1 To 10
    Read Junk(i)
Next


t# = Timer
For k = 1 To 10000
    For i = 1 To 10
        f = FindWord(Junk(i))
        If k = 1 Then 'no need to scroll the screen and print the words repeatedly
            If f = 0 Then
                Print Junk(i), "Word not found"
            Else
                Print Junk(i), Dict(FindWord(Junk(i))).Definition
            End If
        End If
    Next
Next
t1# = Timer

Print Using "###.######## seconds to load dictionary with ###,###,### words."; loadtimer2# - loadtime1#, count
Print Using "###.######## seconds to find #### words and definitions, ###,###,### repeated times."; t1# - t#, i - 1, k - 1

Function FindWord (word$)
    Dim As Long low, hi, test
    low = 1: hi = UBound(Dict)
    While low <= hi
        test = Int((low + hi) / 2)
        Select Case _StriCmp(Dict(test).Word, word$)
            Case 0
                'Print "found"; test
                FindWord = test: Exit Function
            Case -1
                'Print "low"; Dict(test).Word
                low = test + 1
            Case 1
                'Print "high"; Dict(test).Word
                hi = test - 1
        End Select
    Wend
End Function


.txt   Collins.txt (Size: 17.37 MB / Downloads: 17)

Use the dictionary file above with the code above us.  I wanted to simplify things for testing, so this strips out those header lines, and the blank line at the end of the file, and sets it to using CHR$(10) line endings rather than CHR$(13) + CHR$(10).  (Just to reduce overall filesize by a few hundred thousand bytes.)

Things appear to work as advertised now that our sp (starting position) can fully hold values greater than the length of the string without losing precision.  Wink
Reply
#30
Quote:It's an overflow error. /sigh

Add this one line to the top of the code and try it:

Code: (Select All)
DefLng A-Z

s is a SINGLE by default. The overall dictionary is larger than a single can hold, so it reads the first 255000 or so words and definition and then says, "Blah!!!"

I anticipated that and had this line in code:
Dim As Long l, count, sp, ep

No s in there???

I am trying again with all varaibles DIM'd...


BTW you are using two different varaibles for Load_Time#1 and LoadTimer#1 no wonder it's 0.


yep! it is still erroring out with subscript out of range for dictionary at 3000000???
Code: (Select All)
Option _Explicit
Dim dict$, temp$, temp1$
dict$ = "Collins.txt"

Screen _NewImage(800, 600, 32)

Type Dict_Type
    As String Word, Definition
End Type

ReDim Shared Dict(3000000) As Dict_Type
Dim load_time1#, load_time2#, t#, t1#

load_time1# = Timer
temp$ = readfile$(dict$) ' <<<< made my own

'Print Mid$(temp$, 1, 500) ' test load
'Sleep
Dim i As Integer, k As Integer
Dim As Long l, count, sp, ep, f
sp = 1
Do
    ep = InStr(sp, temp$, Chr$(13) + Chr$(10))
    temp1$ = Mid$(temp$, sp, ep - sp)
    'Print temp1$
    l = InStr(temp1$, Chr$(9)) 'tab separated data file

    count = count + 1


    Dict(count).Word = _Trim$(Left$(temp1$, l - 1))
    Dict(count).Definition = _Trim$(Mid$(temp1$, l + 1))
    'Print count, Dict(count).Word, sp, Len(temp$) ' <<<< 3,000,000 not enough WTH??? it's wrapping around and starting over??

    sp = ep + 2

Loop Until sp >= Len(temp$) ' Or count > 279496 ' <<<<<<  stop the insane wrap around!!!
load_time2# = Timer

ReDim _Preserve Dict(count) As Dict_Type

Dim Junk(10) As String
Data cheese,dog,cat,elephant,rootbeer,house,food,drink,zebra,mouse
For i = 1 To 10
    Read Junk(i)
Next


t# = Timer
For k = 1 To 10000
    For i = 1 To 10
        f = FindWord&(Junk(i))
        If k = 1 Then 'no need to scroll the screen and print the words repeatedly
            If f = 0 Then
                Print Junk(i), "Word not found"
            Else
                Print Junk(i), Dict(FindWord(Junk(i))).Definition
            End If
        End If
    Next
Next
t1# = Timer

Print Using "###.######## seconds to load dictionary with ###,###,### words."; load_time2# - load_time1#, count
Print Using "###.######## seconds to find #### words and definitions, ###,###,### repeated times."; t1# - t#, i - 1, k - 1

Function FindWord& (word$)
    Dim As Long low, hi, test
    low = 1: hi = UBound(Dict)
    While low <= hi
        test = Int((low + hi) / 2)
        Select Case _StriCmp(Dict(test).Word, word$)
            Case 0
                'Print "found"; test
                FindWord = test: Exit Function
            Case -1
                'Print "low"; Dict(test).Word
                low = test + 1
            Case 1
                'Print "high"; Dict(test).Word
                hi = test - 1
        End Select
    Wend
End Function

Function readfile$ (f$)
    Dim buff$
    If _FileExists(f$) Then
        Open f$ For Binary As #1
        buff$ = Space$(LOF(1))
        Get #1, , buff$
        readfile$ = buff$
        Close #1
    End If
End Function

So something else is going on that using count to control stop fixes.

BTW .39 +/- secs to load dictionary well within acceptable range!
b = b + ...
Reply




Users browsing this thread: 3 Guest(s)