Sunday, 31 August 2014

Reading a closed Excel workbook using ADODB

Here's a rough code sample (check for line breaks!) for reading closed Excel files:
Public Function GetDataFromClosedWorkbook(ByVal SourceFile As String, _
                                          ByVal SourceRange As String, _
                                 Optional ByRef FieldNames As String = "", _
                                 Optional ByVal SkipHeaders As Boolean = False, _
                                 Optional ByVal LocalCopyLifetime As Double = 1#, _
                                 Optional ByVal ForceRecopy As Boolean = False, _
                                 Optional ByVal Asynchronous As Boolean = False) As Variant

Application.Volatile False
On Error GoTo ErrSub

' Read a Range in a closed workbook (which remains closed throughout the 
'  operation - we do not open the file in Excel.exe)
' Returns a TRANSPOSED 2-dimensional variant array, in which the first column will be the headers

' If your range is a worksheet, append "$" to the worksheet name
' If your range is a defined set of cells on a worksheet, use this format: 
'    Sheet_Name$B1:G1024  (spaces are OK in the worksheet name)
' If you're using workbook-level named range, just supply the name
' If you're querying a csv file, don't bother with a sheet or range name. The filename is the 'table' 

' SkipHeaders = TRUE means that the top row of your data range will NOT be 
' treated as part of the data to be returned
' Set SkipHeaders=True if you pass the parameter SourceRange as 
' a SQL query instead of a range or table name


' FieldNames will be populated by a comma-delimited string containing 
' the field names if SkipHeaders is True



' Note that we do not attempt to examine files on network folders: we always copy to a temporary folder

'      - However, we'll only overwrite a pre-existing local copy if the pre-existing
'        file is older than LocalCopyLifetime days
'      - While the copy-to-local-folder operation in running in asynchronous mode,
'        the function will return #WAITING FOR FILE TRANSFER




Dim objFSO      As Object   '  late-binding: imperfect, but it means we can drag-and-drop this sheet without creating references
Dim objConnect  As Object   '  ADODB.Connection
Dim rst         As Object   '  ADODB.Recordset
Dim strConnect  As String

Dim i           As Long
Dim j           As Long
Dim arrData     As Variant
Dim TempFile    As String
Dim strTest     As String
Dim SQL         As String
Dim iColCount   As Long
Dim strPathFull As String


Dim strHeaders As String

If SourceFile = "" Then
    Exit Function
End If


' ****  Parse out web folder paths ' **** **** **** **** **** **** **** **** **** **** **** ****

If Left(SourceFile, 5) = "http:" Then

    SourceFile = Right(SourceFile, Len(SourceFile) - 5)
    SourceFile = Replace(SourceFile, "%20", " ")
    SourceFile = Replace(SourceFile, "%160", " ")
    SourceFile = Replace(SourceFile, "/", "\")

End If


strPathFull = SourceFile


If Len(Dir(SourceFile)) = 0 Then
    ReDim arrTemp(1 To 1, 1 To 1)
    arrTemp(1, 1) = "#ERROR Source file not found"
    GetDataFromClosedWorkbook = arrTemp
    Erase arrTemp
    Exit Function
End If


' **** Copy remote files to the local drive: **** **** **** **** **** **** **** **** **** **** **** 

If objFSO Is Nothing Then
    Set objFSO = CreateObject("Scripting.FileSystemObject") ' New Scripting.FileSystemObject
End If

If objFSO Is Nothing Then
    Shell "Regsvr32.exe /s scrrun.dll", vbHide
    Application.Wait (Now() + 5 / 3600 / 24)
    Set objFSO = CreateObject("Scripting.FileSystemObject")
End If

If objFSO Is Nothing Then
    Exit Function
End If


TempFile = objFSO.GetSpecialFolder(2).Path & "\" & Filename(SourceFile)
    
    
If ForceRecopy Then

    If Len(VBA.FileSystem.Dir(TempFile)) > 0 Then
        VBA.FileSystem.Kill TempFile
    End If
    
End If



If Not (Left(SourceFile, 3) = "C:\" Or Left(SourceFile, 3) = "D:\") Then
  
    If Len(VBA.FileSystem.Dir(TempFile)) > 0 Then
    
        On Error Resume Next
        If VBA.FileSystem.FileDateTime(TempFile) < VBA.FileSystem.FileDateTime(SourceFile) Then
            VBA.FileSystem.Kill TempFile
        ElseIf objFSO.GetFile(TempFile).dateLastAccessed < (Now - LocalCopyLifetime) Then
            VBA.FileSystem.Kill TempFile
        End If
        
    End If
    
    If Len(VBA.FileSystem.Dir(TempFile)) = 0 Then
    
        If Asynchronous Then
            Shell "cmd /c COPY " & Chr(34) & SourceFile & _ 
                   Chr(34) & " " & Chr(34) & TempFile & Chr(34), vbHide
            ReDim arrTemp(1 To 1, 1 To 1)
            arrTemp(1, 1) = "#WAITING FOR FILE TRANSFER. Please try again in a minute."
            GetDataFromClosedWorkbook = arrTemp
            Erase arrTemp
            Exit Function
        Else
     
            VBA.FileSystem.FileCopy SourceFile, TempFile
            
        End If
        
    Else
        SourceFile = TempFile
    End If

End If


' ****  Decide whether we need to read a header row separately from the main body of the data: ' **** **** 

If InStr(1, SourceRange, "SELECT", vbTextCompare) > 0 And _ 
   InStr(7, SourceRange, "FROM", vbTextCompare) > 1 _ 
Then
    strHeaders = "HDR=Yes"
    'SkipHeaders = True
ElseIf SkipHeaders = True Then
    strHeaders = "HDR=Yes"
Else
    strHeaders = "HDR=No"
End If





' **** Connect to the file: ' **** **** **** **** **** **** **** **** **** **** **** ****' **** **** 

        Application.StatusBar = "Connecting to " & SourceFile & "..."
    
    
        If Right(SourceFile, 4) = ".xls" Then    '
        
            'strConnect = "DRIVER={Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb)};" & _ 
                            "ReadOnly=1;DBQ=" & Chr(34) & SourceFile & Chr(34) & ";" & _ 
                            ";Extended Properties=" & _ 
                            Chr(34) & "HDR=No;IMEX=1;MaxScanRows=0" & Chr(34) & ";"
            
            strConnect = "Provider=Microsoft.Jet.OLEDB.4.0;_ 
                            Data Source=" & Chr(34) & SourceFile & Chr(34) & ";_ 
                            Extended Properties=" & Chr(34) & "Excel 8.0;" & _ 
                            strHeaders & ";IMEX=1;MaxScanRows=0" & Chr(34) & ";"
          
           ' strConnect = "Provider=Microsoft.ACE.OLEDB.12.0;_ 
                            Data Source=" & Chr(34) & SourceFile & Chr(34) & ";_ 
                            Extended Properties=" & Chr(34) & "Excel 8.0;" & _ 
                            strHeaders & ";IMEX=1;MaxScanRows=0" & Chr(34) & ";"
          
        ElseIf Right(SourceFile, 5) = ".xlsx" Then
        
            strConnect = "Provider=Microsoft.ACE.OLEDB.12.0;_ 
                            Data Source=" & Chr(34) & SourceFile & Chr(34) & ";_ 
                            Extended Properties=" & Chr(34) & "Excel 12.0 Xml;" & _ 
                            strHeaders & ";IMEX=1;MaxScanRows=0" & Chr(34) & ";"
        
        ElseIf Right(SourceFile, 5) = ".xlsm" Then
            
            'strConnect = "Driver={Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb)};"_ 
                             & "ReadOnly=1;_ 
                            DBQ=" & SourceFile & ";" & Chr(34) & SourceFile & Chr(34) & ";" & ";_ 
                            Extended Properties=" & Chr(34) & "Excel 12.0;" & _ 
                            strHeaders & ";IMEX=1;MaxScanRows=0" & Chr(34) & ";"
     
            strConnect = "Provider=Microsoft.ACE.OLEDB.12.0; _ 
                            Data Source=" & Chr(34) & SourceFile & Chr(34) & ";_ 
                            Extended Properties=" & Chr(34) & "Excel 12.0 Macro;" & _ 
                            strHeaders & ";IMEX=1;MaxScanRows=0" & Chr(34) & ";"
        
        ElseIf Right(SourceFile, 5) = ".xlsb" Then
          
            'strConnect = "Driver={Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb)};" &  _ 
            '"ReadOnly=1;DBQ=" & SourceFile & ";" & Chr(34) & SourceFile & Chr(34) & ";" &  _ 
            ' ";Extended Properties=" & Chr(34) & "Excel 12.0;" & strHeaders & "; _ 
            ' IMEX=1;MaxScanRows=0" & Chr(34) & ";"

            ' This ACE driver is unstable on xlsb files... 
            ' But it's more likely to return a result, if you don't mind crashes:
            strConnect = "Provider=Microsoft.ACE.OLEDB.12.0; _ 
                          Data Source=" & Chr(34) & SourceFile & Chr(34) &  _ 
                          ";Extended Properties=" & Chr(34) & "Excel 12.0;" & strHeaders _  & "; _ 
                            IMEX=1;MaxScanRows=0" & Chr(34) & ";"
        
        ElseIf Right(SourceFile, 4) = ".csv" Or Right(SourceFile, 4) = ".txt" Then
        
            ' JET OLEDB text driver connection string:
            '   Provider=Microsoft.Jet.OLEDB.4.0;Data Source=c:\txtFilesFolder\;
            '     Extended Properties="text;HDR=Yes;FMT=Delimited;MaxScanRows=;IMEX=1;"; 

            ' ODBC text driver connection string:
            '   Driver={Microsoft Text Driver 
            '    (*.txt; *.csv)};Dbq=c:\txtFilesFolder\;Extensions=asc,csv,tab,txt;


            strConnect = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & _ 
                          Chr(34) & Folder(SourceFile) & Chr(34) & ";"
            strConnect = strConnect & "Extended Properties=" & Chr(34) &  _ 
                          "text;HDR=Yes;IMEX=1;MaxScanRows=0" & Chr(34) & ";"
            SourceRange = Filename(SourceFile)
            
         
        ElseIf Right(SourceFile, 4) = ".tab" Or Right(SourceFile, 4) = ".dat" Then
        
            ' JET OLEDB text driver connection string:
            '   Provider=Microsoft.Jet.OLEDB.4.0;Data Source=c:\txtFilesFolder\;Extended  
            '    Properties="text;HDR=Yes;FMT=Delimited;MaxScanRows=;IMEX=1;";

            ' ODBC text driver connection string:
            '   Driver={Microsoft Text Driver 
            '     (*.txt; *.csv)};Dbq=c:\txtFilesFolder\;Extensions=asc,csv,tab,txt;



            strConnect = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" &  _ 
                         Chr(34) & Folder(SourceFile) & Chr(34) & ";"
            strConnect = strConnect & "Extended Properties=" & Chr(34) & _ 
                         "text;HDR=Yes;IMEX=1;MaxScanRows=0;DELIMITER=TAB" & Chr(34) & ";"
            SourceRange = Filename(SourceFile)
           
        
        Else
        
            ReDim arrTemp(1 To 1, 1 To 1)
            arrTemp(1, 1) = "#ERROR - file format not known"
            GetDataFromClosedWorkbook = arrTemp
            Erase arrTemp
            
        End If
        
        On Error GoTo ErrSub
        
        Set objConnect = CreateObject("ADODB.Connection")   ' New ADODB.Connection
        With objConnect
        
            .ConnectionTimeout = 60
            .CommandTimeout = 90
            .Mode = 1 ' adModeRead = 1
            .ConnectionString = strConnect
    
            .Open
    
        End With

    
' ****  Retrieve the data: ' **** **** **** **** **** **** **** **** **** **** **** **** **** 
    
    Set rst = CreateObject("ADODB.Recordset") '  New ADODB.Recordset
    
    With rst

        .CursorLocation = 3
        
        If Right(ThisWorkbook.Name, 4) = ".xls" Then
            .MaxRecords = 65535
        Else
            .MaxRecords = 1048575
        End If
        
        If InStr(1, SourceRange, "SELECT", vbTextCompare) > 0  _ 
        And InStr(7, SourceRange, "FROM", vbTextCompare) > 1 Then
            SQL = SourceRange
        Else
           SQL = "SELECT * FROM [" & SourceRange & "] "
        End If
        
        Application.StatusBar = "Querying " & SourceFile & "..."
        
        '.Open SQL, objConnect, adOpenStatic, adLockReadOnly, adCmdText + adAsyncFetch
        .Open SQL, objConnect, 3, 1, 1 + 32
 
        i = 0
        Do While .State > 1
            i = (i + 1) Mod 3
            Application.StatusBar = "Retrieving data from " & SourceFile & String(i, ".")
            Application.Wait Now + (0.25 / 24 / 3600)
        Loop
        
    
    End With
        
    
' ****  Handle the returned data ' **** **** **** **** **** **** **** **** **** **** **** ****  

On Error Resume Next
' resume next is required, as the errors we anticipate cannot be trapped:
' they can only be detected after the fact
    
    
    For i = 0 To rst.Fields.Count - 1
        FieldNames = FieldNames & rst.Fields(i).Name & ","
    Next i
    FieldNames = Left(FieldNames, Len(FieldNames) - 1)
    
    If rst.EOF And rst.BOF Then
        'return a single empty rpw, so that the caller doesn't error out
        ReDim arrTemp(1 To rst.Fields.Count, 1 To 1)   'remember, its a transposed array
        arrTemp(1, 1) = "#NO MATCHING DATA IN '" & SourceFile & "' USING '" & SQL & "'"
        GetDataFromClosedWorkbook = arrTemp
    Else
        Err.Clear
        rst.MoveFirst
        GetDataFromClosedWorkbook = rst.GetRows   
        ' note that this often fails on the first try.
        
        If IsEmpty(GetDataFromClosedWorkbook) Then
            rst.MoveFirst
            GetDataFromClosedWorkbook = rst.GetRows
        End If
        
        If IsEmpty(GetDataFromClosedWorkbook) Then  
        ' ...And on the second try. GetRows is fast when it works, but cannot be relied on
            
            rst.MoveFirst
            ReDim arrTemp(0 To rst.Fields.Count - 1, 0 To rst.RecordCount)
            i = 0
            j = 0
            Do Until rst.EOF
            
                Err.Clear
                If i > UBound(arrTemp, 2) Then
                    ReDim Preserve arrTemp(0 To rst.Fields.Count - 1, 0 To i)
                End If
                
                For j = 0 To rst.Fields.Count - 1
                    arrTemp(j, i) = rst.Fields(j).Value
                    If Err.Number = &HBCD Then
                        Exit For
                    End If
                Next j
                
                i = i + 1
            
           
                If Err.Number <> &HBCD Then
                    rst.MoveNext
                End If
                
                If Err.Number <> &H80004005 And Err.Number <> 0 Then
                    Exit Do
                End If
                
            Loop
            
            GetDataFromClosedWorkbook = arrTemp
            Erase arrTemp
            
        End If ' IsEmpty(GetDataFromClosedWorkbook)
        
        
    End If '  rst.EOF And rst.BOF Then
    
    
   
    
    
    
ExitSub:
On Error Resume Next

    rst.Close
    objConnect.Close ' close the database connection
    
    Set rst = Nothing
    Set objConnect = Nothing

    

    Exit Function
    
ErrSub:

    ReDim arrTemp(1 To 1, 1 To 1)
    If InStr(Err.Description, "not a valid name") Then
        arrTemp(1, 1) = "#ERROR: cannot retrieve data from '" & SourceRange & "'t"
        MsgBox "Cannot open the file: " & vbCrLf & vbCrLf & strPathFull & vbCrLf & vbCrLf & _ 
        "This error message probably means that the source file is locked because another  _ 
        user has this file open. Please wait a few minutes, and try again." & vbCrLf &  _ 
        vbCrLf & "If this error persists, please contact the tech team.", vbCritical,  _ 
        APP_NAME & ": file access error:"
    
    ElseIf InStr(Err.Description, "cannot open the file") Then
        arrTemp(1, 1) = "#ERROR: Cannot open the file '" & SourceRange & "'t"
        MsgBox "Cannot open the file: " & vbCrLf & vbCrLf & strPathFull & vbCrLf  _ 
        & vbCrLf & "This error message probably means that the source file is  _ 
        locked because another user has this file open. Please wait a few minutes,  _ 
        and try again." & vbCrLf & vbCrLf & "If this error persists, please contact  _ 
        the tech team.", vbCritical, APP_NAME & ": file access error:"
    
    ElseIf InStr(Err.Description, "not find the object") Then
        arrTemp(1, 1) = "#ERROR: Invalid object name in '" & SourceRange & "'" _         
        MsgBox Err.Description & vbCrLf & vbCrLf & "This error message probably  _ 
        means that the worksheet or range has been renamed, or does not exist in  _ 
        the file. Please check your file: if you can't see an obvious error, ask  _ 
        for help from the tech team.", vbCritical, APP_NAME & ": file data error:"
    
    ElseIf InStr(Err.Description, "Permission Denied") Then
        arrTemp(1, 1) = "#ERROR Access to file"
        MsgBox "Cannot open the file: " & vbCrLf & vbCrLf & strPathFull & vbCrLf & vbCrLf &  _ 
        "Another user probably has this file open. Please wait a few minutes, and try again."  _ 
        & vbCrLf & vbCrLf & "If this error persists, please contact tech team.",  _ 
        vbCritical, APP_NAME & ": file access error:"
    
    Else
        arrTemp(1, 1) = "#ERROR " & Err.Number & ": " & Err.Description
    End If
    
    GetDataFromClosedWorkbook = arrTemp
    Erase arrTemp
   
    Resume ExitSub
    Resume
    
End Function