Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Cautionary tale of open, append, close
#3
BTW, I've also created standard function to read huge (>100GB) files to process line by line or field by field extremely fast

Code: (Select All)
Function CSV.read& (fileName$, eol$) ' 4M lines/sec
  Const BLOCKSIZE = 2 ^ 22 ' 4 MB
  If Not _FileExists(fileName$) Then CSV.read& = 0: Exit Function
  'Print "Reading lines from "; fileName$; " ";: cpos% = Pos(0)
  eoll% = Len(eol$)
  Dim As _Unsigned _Integer64 blocks
  Dim block As String * BLOCKSIZE
  ff% = FreeFile
  Open fileName$ For Binary Access Read As #ff%
  blocks = .5 + LOF(ff%) / Len(block)

  sep& = 0
  lines& = -1
  $Checking:Off
  For curblock& = 1 To blocks
    Get #ff%, , block
    If curblock& > 1 Then
      buf$ = Mid$(buf$, sep&) + block
      r0& = InStr(buf$, eol$) + eoll%
    Else
      buf$ = block
      r0& = 1
    End If
    r1& = InStr(r0&, buf$, eol$)
    Do While r1& >= r0& And r0& > 0
      lin$ = Mid$(buf$, r0&, r1& - r0& + eoll%)
      ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
      ret% = CSV.line(lin$) ' Process lin$
      ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
      lines& = lines& + 1
      sep& = r1&: r0& = r1& + eoll%: r1& = InStr(r0&, buf$, eol$)
    Loop
    'Locate , cpos%, 0: Print lines&;
  Next curblock&
  $Checking:On
  Close #ff%
  buf$ = ""
  'Locate , cpos%, 0
  CSV.read& = lines&
End Function

Function CSV.line% (l$)
  fields% = Val(CSV.field(l$, 0))
  For i% = 1 To fields%
    Print CSV.field(l$, i%); ",";
  Next i%
  Print
  CSV.line% = 1
End Function

Function CSV.field$ (lin$, n%)
  Const MAXFIELDS = 99
  Static cf%, rec$, f$(1 To MAXFIELDS)
  If rec$ <> lin$ Then
    rec$ = lin$
    cf% = 0: q% = 0: i0% = 0: ll% = Len(rec$)
    For i% = 1 To ll%
      cc% = Asc(Mid$(rec$, i%, 1))
      If cc% = 13 Or cc% = 10 Then
        Exit For
      ElseIf cc% = 34 Then '34 = "
        q% = 1 - q%
      ElseIf cc% = 44 And q% = 0 Then '44 = ,
        cf% = cf% + 1: f$(cf%) = Mid$(rec$, i0%, i% - i0%)
        i0% = i% + 1
      End If
    Next i%
    cf% = cf% + 1: f$(cf%) = Mid$(rec$, i0%, i% - i0%)
  End If
  If n% <= 0 Then CSV.field$ = _ToStr$(cf%) Else If n% <= cf% Then CSV.field$ = f$(n%) Else CSV.field$ = ""
End Function
45y and 2M lines of MBASIC>BASICA>QBASIC>QBX>QB64 experience
Reply


Messages In This Thread
RE: Cautionary tale of open, append, close - by mdijkens - 01-15-2025, 09:48 PM



Users browsing this thread: 3 Guest(s)