Posts: 2,696
Threads: 327
Joined: Apr 2022
Reputation:
217
02-29-2024, 02:16 PM
(This post was last modified: 02-29-2024, 02:17 PM by SMcNeill.)
(02-29-2024, 12:28 PM)bplus Wrote: For me the main concern was not taking anytime to load the word file let alone the word and definitions file at the start of the game.
The Binary search does not take a noticable amount of time for a word lookup, the human player is way too slow to need a slew of words searched at once. Maybe different with a spellchecker.
Load time is even less than search time:
Code: (Select All)
dict$ = "Collins.txt"
Screen _NewImage(800, 600, 32)
Type Dict_Type
As String Word, Definition
End Type
ReDim Shared Dict(1000000) As Dict_Type
load_time1# = Timer
temp$ = _ReadFile$(dict$)
sp = 1
Do
ep = InStr(sp, temp$, Chr$(13) + Chr$(10))
temp1$ = Mid$(temp$, sp, ep - sp)
'Print temp1$
l = InStr(temp1$, Chr$(9)) 'tab separated data file
count = count + 1
Dict(count).Word = _Trim$(Left$(temp1$, l - 1))
Dict(count).Definition = _Trim$(Mid$(temp1$, l + 1))
' Print Dict(count).Word
sp = ep + 2
' Sleep
Loop Until sp >= Len(temp$)
load_time2# = Timer
ReDim _Preserve Dict(count) As Dict_Type
Dim Junk(10) As String
Data cheese,dog,cat,elephant,rootbeer,house,food,drink,zebra,mouse
For i = 1 To 10
Read Junk(i)
Next
t# = Timer
For k = 1 To 10000
For i = 1 To 10
f = FindWord(Junk(i))
If k = 1 Then 'no need to scroll the screen and print the words repeatedly
If f = 0 Then
Print Junk(i), "Word not found"
Else
Print Junk(i), Dict(FindWord(Junk(i))).Definition
End If
End If
Next
Next
t1# = Timer
Print Using "###.######## seconds to load dictionary with ###,###,### words."; loadtimer2# - loadtime1#, count
Print Using "###.######## seconds to find #### words and definitions, ###,###,### repeated times."; t1# - t#, i - 1, k - 1
Function FindWord (word$)
Dim As Long low, hi, test
low = 1: hi = UBound(Dict)
While low <= hi
test = Int((low + hi) / 2)
Select Case _StriCmp(Dict(test).Word, word$)
Case 0
'Print "found"; test
FindWord = test: Exit Function
Case -1
'Print "low"; Dict(test).Word
low = test + 1
Case 1
'Print "high"; Dict(test).Word
hi = test - 1
End Select
Wend
End Function
0 seconds to load.
0.05 seconds to look up 100,000 entries and determine if they're in the dictionary, or not.
Posts: 2,696
Threads: 327
Joined: Apr 2022
Reputation:
217
Hey @PhilOfPerth -- Are you certain that this dictionary is 100% alphabetic, and that these words don't have any typos in them? Particularly a typo which might cause a word to be out of order?
From the screenshot above, "zebra" isn't found.
I edited my code to add in a simple little CombSort routine, to make certain our lists were alphabetical, and then PRESTO -- "zebra" exists once again and gives me a definition.
Code: (Select All)
dict$ = "Collins.txt"
Screen _NewImage(800, 600, 32)
Type Dict_Type
As String Word, Definition
End Type
ReDim Shared Dict(1000000) As Dict_Type
load_time1# = Timer
temp$ = _ReadFile$(dict$)
sp = 1
count = -1
Do
ep = InStr(sp, temp$, Chr$(13) + Chr$(10))
temp1$ = Mid$(temp$, sp, ep - sp)
l = InStr(temp1$, Chr$(9)) 'tab separated data file
count = count + 1
If count > 0 Then 'first two lines in the data file are garbage.
Dict(count).Word = _Trim$(Left$(temp1$, l - 1))
Dict(count).Definition = _Trim$(Mid$(temp1$, l))
End If
sp = ep + 2
Loop Until sp >= Len(temp$)
ReDim _Preserve Dict(count) As Dict_Type
combsort
load_time2# = Timer
Dim Junk(10) As String
Data cheese,dog,cat,elephant,rootbeer,house,food,drink,mouse,zebra
For i = 1 To 10
Read Junk(i)
Next
t# = Timer
For k = 1 To 10000
For i = 1 To 10
f = FindWord(Junk(i))
If k = 1 Then 'no need to scroll the screen and print the words repeatedly
If f = 0 Then
Print Junk(i), "Word not found"
Else
Print Junk(i), Dict(FindWord(Junk(i))).Definition
End If
End If
Next
Next
t1# = Timer
Print Using "###.######## seconds to load dictionary with ###,###,### words."; loadtimer2# - loadtime1#, count
Print Using "###.######## seconds to find #### words and definitions, ###,###,### repeated times."; t1# - t#, i - 1, k - 1
Function FindWord (word$)
Dim As Long low, hi, test
low = 1: hi = UBound(Dict)
While low <= hi
test = Int((low + hi) / 2)
Select Case _StriCmp(Dict(test).Word, word$)
Case 0
'Print "found"; test
FindWord = test: Exit Function
Case -1
'Print "low"; Dict(test).Word
low = test + 1
Case 1
'Print "high"; Dict(test).Word
hi = test - 1
End Select
Wend
End Function
Sub combsort
'This is the routine I tend to use personally and promote.
'It's short, simple, and easy to implement into code.
gap = UBound(Dict)
Do
gap = 10 * gap \ 13
If gap < 1 Then gap = 1
i = 0
swapped = 0
Do
If Dict(i).Word > Dict(i + gap).Word Then
Swap Dict(i).Word, Dict(i + gap).Word
Swap Dict(i).Definition, Dict(i + gap).Definition
swapped = -1
End If
i = i + 1
Loop Until i + gap > UBound(Dict)
Loop Until gap = 1 And swapped = 0
End Sub
Also need to point out that after looking at the actual file, we need to skip the first two lines of this file as they're header information and not part of the wordlist itself. Code above also accounts for that change.
Posts: 3,964
Threads: 176
Joined: Apr 2022
Reputation:
219
cat = to vomit??? that's what a cat is?
I gotta check my Collin's now ;-))
b = b + ...
Posts: 3,964
Threads: 176
Joined: Apr 2022
Reputation:
219
02-29-2024, 03:03 PM
(This post was last modified: 02-29-2024, 03:05 PM by bplus.)
CAT - crap!
ZEBRA looks OK
If takes 0 time to load, I will try your method next time I go to do something with dictionary @Steve.
b = b + ...
Posts: 2,696
Threads: 327
Joined: Apr 2022
Reputation:
217
Zebra is in there, and it's an okay definition. The problem is it doesn't get found in an unsorted binary search. My guess is that one of the words that is in that chain of searches is spelt wrong, and that's throwing the search off.
say we have a list of 10 things. A B C D E F H G I J, and we want to find H
Search starts at E... less than H, so we look half-way higher
G is the mid-point of the remainder... it's less than H, so we look half-way higher
I is the mid-point of what remains... it's greater than H, so we look half-way lower
but there is no mid-point between G and I!
"Letter not found!"
It's in there, but since it's out of order or typoed or whatever, it throws the binary search method off.
Doing a sort, "zebra" is found and exists once again. Without sorting, it's not, for whatever odd reason.
Posts: 3,964
Threads: 176
Joined: Apr 2022
Reputation:
219
02-29-2024, 03:21 PM
(This post was last modified: 02-29-2024, 03:26 PM by bplus.)
Here is my RA method tested with zebra and some of your words, no rootbeer here either but does find zebra OK
Possible Plil's list was reassembled from his letter lists???
Mine came straight from Collins 2019 Scrabble which came from you Steve, I think?
b = b + ...
Posts: 2,696
Threads: 327
Joined: Apr 2022
Reputation:
217
That's what the first two lines of the file tells us:
"Collins Scrabble Words (2019). 279,496 words with definitions."
(blank spacer)
Does yours have those 2 lines at the top of it? They might've been the difference in the "zebra" search being off somehow.
Posts: 3,964
Threads: 176
Joined: Apr 2022
Reputation:
219
I had removed the first two lines, my Collins.txt starts with AA word...
Steve I tested your code, but do not have version 3.12 installed yet so made my own _ReadFile function.
Your code seems to wrap around and I had to put a count = 279,496 limit to stop the thing from going around over and over WTH???
Code: (Select All) dict$ = "Collins.txt"
Screen _NewImage(800, 600, 32)
Type Dict_Type
As String Word, Definition
End Type
ReDim Shared Dict(3000000) As Dict_Type
load_time1# = Timer
temp$ = readfile$(dict$) ' <<<< made my own
'Print Mid$(temp$, 1, 500) ' test load
'Sleep
Dim As Long l, count, sp, ep
sp = 1
Do
ep = InStr(sp, temp$, Chr$(13) + Chr$(10))
temp1$ = Mid$(temp$, sp, ep - sp)
'Print temp1$
l = InStr(temp1$, Chr$(9)) 'tab separated data file
count = count + 1
Dict(count).Word = _Trim$(Left$(temp1$, l - 1))
Dict(count).Definition = _Trim$(Mid$(temp1$, l + 1))
'Print count, Dict(count).Word, sp, Len(temp$) ' <<<< 3,000,000 not enough WTH??? it's wrapping around and starting over??
sp = ep + 2
Loop Until sp >= Len(temp$) Or count > 279496 ' <<<<<< stop the insane wrap around!!!
load_time2# = Timer
ReDim _Preserve Dict(count) As Dict_Type
Dim Junk(10) As String
Data cheese,dog,cat,elephant,rootbeer,house,food,drink,zebra,mouse
For i = 1 To 10
Read Junk(i)
Next
t# = Timer
For k = 1 To 10000
For i = 1 To 10
f = FindWord(Junk(i))
If k = 1 Then 'no need to scroll the screen and print the words repeatedly
If f = 0 Then
Print Junk(i), "Word not found"
Else
Print Junk(i), Dict(FindWord(Junk(i))).Definition
End If
End If
Next
Next
t1# = Timer
Print Using "###.######## seconds to load dictionary with ###,###,### words."; loadtimer2# - loadtime1#, count
Print Using "###.######## seconds to find #### words and definitions, ###,###,### repeated times."; t1# - t#, i - 1, k - 1
Function FindWord (word$)
Dim As Long low, hi, test
low = 1: hi = UBound(Dict)
While low <= hi
test = Int((low + hi) / 2)
Select Case _StriCmp(Dict(test).Word, word$)
Case 0
'Print "found"; test
FindWord = test: Exit Function
Case -1
'Print "low"; Dict(test).Word
low = test + 1
Case 1
'Print "high"; Dict(test).Word
hi = test - 1
End Select
Wend
End Function
Function readfile$ (f$)
If _FileExists(f$) Then
Open f$ For Binary As #1
buff$ = Space$(LOF(1))
Get #1, , buff$
readfile$ = buff$
Close #1
End If
End Function
Zebra is found by your code and it does work well once the wrap around was fixed???
b = b + ...
Posts: 2,696
Threads: 327
Joined: Apr 2022
Reputation:
217
02-29-2024, 04:54 PM
(This post was last modified: 02-29-2024, 04:59 PM by SMcNeill.)
It's an overflow error. /sigh
Add this one line to the top of the code and try it:
Code: (Select All) DefLng A-Z
s is a SINGLE by default. The overall dictionary is larger than a single can hold, so it reads the first 255000 or so words and definition and then says, "Blah!!!"
The sort isn't needed. It simple needs to not overflow so that it'll load and parse that list properly for us.
Code: (Select All)
dict$ = "Collins.txt"
DefLng A-Z
Screen _NewImage(800, 600, 32)
Type Dict_Type
As String Word, Definition
End Type
ReDim Shared Dict(1000000) As Dict_Type
load_time1# = Timer
temp$ = _ReadFile$(dict$)
sp = 1
Do
ep = InStr(sp, temp$, Chr$(10))
temp1$ = Mid$(temp$, sp, ep - sp)
If temp1$ = "" Then Exit Do
l = InStr(temp1$, Chr$(9)) 'tab separated data file
count = count + 1
Dict(count).Word = _Trim$(Left$(temp1$, l - 1))
Dict(count).Definition = _Trim$(Mid$(temp1$, l))
sp = ep + 1
Loop Until sp >= Len(temp$)
ReDim _Preserve Dict(count) As Dict_Type
load_time2# = Timer
Dim Junk(10) As String
Data cheese,dog,cat,elephant,rootbeer,house,food,drink,mouse,zebra
For i = 1 To 10
Read Junk(i)
Next
t# = Timer
For k = 1 To 10000
For i = 1 To 10
f = FindWord(Junk(i))
If k = 1 Then 'no need to scroll the screen and print the words repeatedly
If f = 0 Then
Print Junk(i), "Word not found"
Else
Print Junk(i), Dict(FindWord(Junk(i))).Definition
End If
End If
Next
Next
t1# = Timer
Print Using "###.######## seconds to load dictionary with ###,###,### words."; loadtimer2# - loadtime1#, count
Print Using "###.######## seconds to find #### words and definitions, ###,###,### repeated times."; t1# - t#, i - 1, k - 1
Function FindWord (word$)
Dim As Long low, hi, test
low = 1: hi = UBound(Dict)
While low <= hi
test = Int((low + hi) / 2)
Select Case _StriCmp(Dict(test).Word, word$)
Case 0
'Print "found"; test
FindWord = test: Exit Function
Case -1
'Print "low"; Dict(test).Word
low = test + 1
Case 1
'Print "high"; Dict(test).Word
hi = test - 1
End Select
Wend
End Function
Collins.txt (Size: 17.37 MB / Downloads: 17)
Use the dictionary file above with the code above us. I wanted to simplify things for testing, so this strips out those header lines, and the blank line at the end of the file, and sets it to using CHR$(10) line endings rather than CHR$(13) + CHR$(10). (Just to reduce overall filesize by a few hundred thousand bytes.)
Things appear to work as advertised now that our sp (starting position) can fully hold values greater than the length of the string without losing precision.
Posts: 3,964
Threads: 176
Joined: Apr 2022
Reputation:
219
02-29-2024, 06:33 PM
(This post was last modified: 02-29-2024, 06:41 PM by bplus.)
Quote:It's an overflow error. /sigh
Add this one line to the top of the code and try it:
Code: (Select All)
DefLng A-Z
s is a SINGLE by default. The overall dictionary is larger than a single can hold, so it reads the first 255000 or so words and definition and then says, "Blah!!!"
I anticipated that and had this line in code:
Dim As Long l, count, sp, ep
No s in there???
I am trying again with all varaibles DIM'd...
BTW you are using two different varaibles for Load_Time#1 and LoadTimer#1 no wonder it's 0.
yep! it is still erroring out with subscript out of range for dictionary at 3000000???
Code: (Select All) Option _Explicit
Dim dict$, temp$, temp1$
dict$ = "Collins.txt"
Screen _NewImage(800, 600, 32)
Type Dict_Type
As String Word, Definition
End Type
ReDim Shared Dict(3000000) As Dict_Type
Dim load_time1#, load_time2#, t#, t1#
load_time1# = Timer
temp$ = readfile$(dict$) ' <<<< made my own
'Print Mid$(temp$, 1, 500) ' test load
'Sleep
Dim i As Integer, k As Integer
Dim As Long l, count, sp, ep, f
sp = 1
Do
ep = InStr(sp, temp$, Chr$(13) + Chr$(10))
temp1$ = Mid$(temp$, sp, ep - sp)
'Print temp1$
l = InStr(temp1$, Chr$(9)) 'tab separated data file
count = count + 1
Dict(count).Word = _Trim$(Left$(temp1$, l - 1))
Dict(count).Definition = _Trim$(Mid$(temp1$, l + 1))
'Print count, Dict(count).Word, sp, Len(temp$) ' <<<< 3,000,000 not enough WTH??? it's wrapping around and starting over??
sp = ep + 2
Loop Until sp >= Len(temp$) ' Or count > 279496 ' <<<<<< stop the insane wrap around!!!
load_time2# = Timer
ReDim _Preserve Dict(count) As Dict_Type
Dim Junk(10) As String
Data cheese,dog,cat,elephant,rootbeer,house,food,drink,zebra,mouse
For i = 1 To 10
Read Junk(i)
Next
t# = Timer
For k = 1 To 10000
For i = 1 To 10
f = FindWord&(Junk(i))
If k = 1 Then 'no need to scroll the screen and print the words repeatedly
If f = 0 Then
Print Junk(i), "Word not found"
Else
Print Junk(i), Dict(FindWord(Junk(i))).Definition
End If
End If
Next
Next
t1# = Timer
Print Using "###.######## seconds to load dictionary with ###,###,### words."; load_time2# - load_time1#, count
Print Using "###.######## seconds to find #### words and definitions, ###,###,### repeated times."; t1# - t#, i - 1, k - 1
Function FindWord& (word$)
Dim As Long low, hi, test
low = 1: hi = UBound(Dict)
While low <= hi
test = Int((low + hi) / 2)
Select Case _StriCmp(Dict(test).Word, word$)
Case 0
'Print "found"; test
FindWord = test: Exit Function
Case -1
'Print "low"; Dict(test).Word
low = test + 1
Case 1
'Print "high"; Dict(test).Word
hi = test - 1
End Select
Wend
End Function
Function readfile$ (f$)
Dim buff$
If _FileExists(f$) Then
Open f$ For Binary As #1
buff$ = Space$(LOF(1))
Get #1, , buff$
readfile$ = buff$
Close #1
End If
End Function
So something else is going on that using count to control stop fixes.
BTW .39 +/- secs to load dictionary well within acceptable range!
b = b + ...
|