'Retrieves and saves all designated links on a page
'Can pass arguments INDEX URL, DEST FOLDER, TARGET EXTENSION
'or you will be asked interactively for them.
Option Explicit
Main
Sub Main
Dim strUrl, strDest, strExt, strGuessFolder
strUrl = GetUrl()
If strUrl = "" Then Exit Sub
strGuessFolder = UrlDocName(strUrl)
strDest = GetFolder(strGuessFolder)
If strDest = "" Then Exit Sub
strExt = GetFileExtension()
If strExt = "" Then Exit Sub
Slurp strUrl, strDest, strExt
End Sub
Function GetUrl()
Dim strUrl
'Get the starting URL -- pretty much an index page linking to all the files we want
If WScript.Arguments.Count = 0 Then
strUrl = InputBox("Enter URL of page with links you want slurped. The http:// header is mandatory.", "Link Page URL", "http://")
Else
strUrl = WScript.Arguments(0)
End If
If Instr(strUrl, "http://") = 0 Then
MsgBox "URL is missing the leading http://"
GetUrl = ""
Else
GetUrl = strUrl
End If
End Function
Function GetFolder(strGuess)
Dim fs, strDest
Set fs = CreateObject("Scripting.FileSystemObject")
'Get the local path where desired files will be stored
If WScript.Arguments.Count < 2 Then
strDest = InputBox("Enter the local path where you want the files stored.", "Destination", fs.BuildPath(fs.GetParentFolderName(WScript.ScriptFullName), strGuess))
Else
strDest = WScript.Arguments(1)
End If
If Not fs.FolderExists(strDest) Then
'Maybe we can create it. Does the parent folder exist?
If Not fs.FolderExists(fs.GetParentFolderName(strDest)) Then
MsgBox "Folder doesn't exist, and neither does the parent."
GetFolder = ""
Exit Function
Else
On Error Resume Next
fs.CreateFolder(strDest)
On Error Goto 0
End If
End If
If Not fs.FolderExists(strDest) Then
MsgBox "Folder doesn't exist and I can't create it."
GetFolder = ""
Exit Function
End If
GetFolder = strDest
End Function
Function GetFileExtension()
Dim strExt, intCounter
'Get the extensions (file types) of the links we are after
If WScript.Arguments.Count <> 3 Then
strExt = InputBox("Enter the extension (no dots) of the files you want slurped.", "File Type", "mp3")
Else
strExt = WScript.Arguments(2)
End If
If strExt = "" Then
MsgBox "No extension"
GetFileExtension = ""
Exit Function
End If
strExt = LCase(strExt)
For intCounter = 1 To Len(strExt)
If InStr("qwertyuiopasdfghjklzxcvbnm1234567890", Mid(strExt, intCounter, 1)) = 0 Then
MsgBox "The character """ & Mid(strExt, intCounter, 1) & """ is not allowed."
Exit Function
GetFileExtension = ""
End If
Next
GetFileExtension = strExt
End Function
Function UrlDocName(strUrl)
Dim strDocName, intCounter
'Get the last word in the URL
If Right(strUrl, 1) = "/" Then
strDocName = Left(strUrl, Len(strUrl) - 1)
strDocName = Mid(strDocName, InStrRev(strDocName, "/") + 1)
Else
strDocName = Mid(strUrl, InStrRev(strUrl, "/") + 1)
End If
'Don't include file extensions
If InStr(strDocName, ".") <> 0 Then
strDocName = Left(strDocName, InStr(strDocName, ".") - 1)
End If
'Don't include CGI arguments
If InStr(strDocName, "?") <> 0 Then
strDocName = Left(strDocName, InStr(strDocName, "?") - 1)
End If
'Clean up any URL-encoding
strDocName = Unescape(strDocName)
'Replace illegal characters
For intCounter = 1 To Len("/\:*?""<>|")
strDocName = Replace(strDocName, Mid("/\:*?""<>|", intCounter, 1), "-")
Next
UrlDocName = strDocName
End Function
Sub Slurp(strUrl, strDest, strExt)
Dim strLink, strPage, strFile
Dim intCounter
Dim fs
Set fs = CreateObject("Scripting.FileSystemObject")
'Get the index page contents (has all the links)
strPage = GetPage(strUrl)
'Walk the page looking for links
Do While GetFirstLink(strPage) <> ""
'Get the first link in the page
strLink = GetFirstLink(strPage)
'Trim the first link away so we don't see it next time
strPage = Mid(strPage, InStr(1, strPage, " 0 Then
SaveBinary = False
Exit Function
End If
If web.Status <> "200" Then
SaveBinary = False
Exit Function
End If
On Error Resume Next
varByteArray = web.ResponseBody
Set bin = Nothing
Set bin = CreateObject("ADODB.Stream")
If bin Is Nothing Then
strData = ""
For lngCounter = 0 to UBound(varByteArray)
strData = strData & Chrw(Ascw(Chr(Ascb(Midb(varByteArray,lngCounter+1,1)))))
Next
String2File strData, strFile
SaveBinary = True
Else
bin.Type = adTypeBinary
bin.Open
bin.Write varByteArray
bin.SaveToFile strFile, adSaveCreateOverWrite
If Err.Number = 0 Then
SaveBinary = True
Else
SaveBinary = False
End If
End If
End Function
Function GetFirstLink(strPage)
Dim strUrl
Dim lngMarker
Dim strPeekText
lngMarker = InStr(1, strPage, "") = 0 Then
'The marker is INSIDE the URL we are looking for. Push it to the end of the tag.
lngMarker = InStr(lngMarker, strPage, ">")
End If
'Now we have a marker at or after the end of an "a" tag
'Trim everything past the marker
strUrl = Left(strPage, lngMarker)
'Trim everything before the href tag
strUrl = Mid(strUrl, InStrRev(strUrl, "href=", Len(strUrl), vbTextCompare) + 5)
'If we have quotes, use them
If Left(strUrl, 1) = Chr(34) Then strUrl = Mid(strUrl, 2)
If InStr(strUrl, Chr(34)) <> 0 Then strUrl = Left(strUrl, InStr(strUrl, Chr(34)) - 1)
'If we have apostrophes, use them instead of quotes
If Left(strUrl, 1) = "'" Then strUrl = Mid(strUrl, 2)
If InStr(strUrl, "'") <> 0 Then strUrl = Left(strUrl, InStr(strUrl, "'") - 1)
'We might have a leading space. Not sure how, but trim to be safe.
strUrl = Trim(strUrl)
'Trim everything past the first tag end
If InStr(strUrl, ">") <> 0 Then strUrl = Left(strUrl, InStr(strUrl, ">") - 1)
'Trim everything past the first space
If InStr(strUrl, " ") <> 0 Then strUrl = Left(strUrl, InStr(strUrl, " "))
'Trim everything past the first tab
If InStr(strUrl, vbTab) <> 0 Then strUrl = Left(strUrl, InStr(strUrl, vbTab))
'That should be all!
GetFirstLink = strUrl
End Function
Function MakeAbsolute(strHref, strCurrentPage)
'Converts strHref from root or relative link to absolute link.
'strCurrentPage MUST be an absolute URL.
Dim strMakeAbsolute, strRoot
strMakeAbsolute = strHref
'Is it already absolute?
If Left(strMakeAbsolute, 7) = "http://" Then
MakeAbsolute = strMakeAbsolute
Exit Function
End If
'Figure out what the root page is
strRoot = strCurrentPage
If InStr(8, strRoot, "/") <> 0 Then
'Make the root so it DOES NOT have a trailing slash
strRoot = Left(strRoot, InStr(8, strRoot, "/") - 1)
End If
'Is a root reference used?
If Left(strMakeAbsolute, 1) = "/" Then
MakeAbsolute = strRoot & strMakeAbsolute
Exit Function
End If
'A relative link is used (it must be at this point!)
MakeAbsolute = Left(strCurrentPage, InStrRev(strCurrentPage, "/")) & strMakeAbsolute
End Function
Sub String2File(strData, strFileName)
'Writes a string to a file
Dim fs 'As Scripting.FileSystemObject
Dim ts 'As Scripting.TextStream
Dim lngChar, strBlock, intChar
Const ForWriting = 2
Set fs = CreateObject("Scripting.FileSystemObject")
Set ts = fs.OpenTextFile(strFileName, ForWriting, True)
Err.Clear
On Error Resume Next
ts.Write strData
If Err.Number <> 0 Then
'Must have hit one of the "problem characters" between 128 and 159
For lngChar = 1 To Len(strData) Step 100
Err.Clear
ts.Write Mid(strData, lngChar, 100)
If Err.Number <> 0 Then
'This block of 100 must have the problem. Write them one-at-a-time
strBlock = Mid(strData, lngChar, 100)
For intChar = 1 To Len(strBlock)
ts.Write Chr(255 And AscW(Mid(strBlock, intChar)))
Next
End If
Next
End If
ts.Close
End Sub
Sub Status(strMessage)
If Lcase(Right(Wscript.FullName, 12)) = "\cscript.exe" Then
Wscript.Echo strMessage
End If
End Sub