Option Explicit
' Creates a Palm PDB file from a gutenberg etext file.
' Requires PKUNZIP (converts zip to text) in the path.
' <http://www.pkware.com/shareware/pkz250dos.html>
' Requires MAKEDOC (converts text to palm doc) in the path.
' (Rename this: <http://www.memoware.com/makedoc8.exe> to makedoc.exe)
' Must be passed the file name of a gutenberg etext document 
' to be used as the source file. Etext can be text or zipped.
' This code assumes there is only ONE text file per story.
'
' A simple one-line batch file like this could create PDB files
' from all zip files in a directory (leaving zip files intact):
' for %%x in (*.zip) do cscript.exe UnGutenberg.vbs %%x
' If cscript.exe is used, program status will be displayed. If
' wscript.exe is used, NOTHING will be visible (scary, huh?).
'
' Creates a PDB file with same root name as the source etext file.
' Palm document name will be derived from the title of the story.
'
' All Gutenberg "small print" will be removed from the output PDB, 
' and all text will be unwrapped. Double linefeed paragraphs will be
' converted to single linefeeds. Double spaces will be converted to 
' single spaces. Every effort is made to "sanitize" the book.
' If you want to try manual conversion, see the tips at the 
' end of this page:
' <http://www.memoware.com/mw-helpm.htm>
'
' This is not fast code! Converting "Edison, His Life and Inventions"
' (which is about 574kb zipped) took me about 12 minutes on my 133MHz.
' This is primarily due to the time needed to load the etext file into a
' string.
'
' Written by Eric Phelps
' http://www.ericphelps.com
'
' Distribute freely. NO WARRANTY
' Just because it worked a few times for me on my computer,
' with my version of scripting, and on the few test files
' I checked does NOT mean it will work for you! Consider
' this code a STARTING POINT, not a finished product!
'
Main
Wscript.Quit 0

Sub Main()
Dim filZipFile 'As Scripting.File
Dim strZipFileName 'As String
Dim filEtextFile 'As Scripting.File
Dim strEtextFileName 'As String
Dim filTemp 'As Scripting.File
Dim tsEtext 'As Scripting.TextStream
Dim folWorkingDirectory 'As Scripting.Folder
Dim strWorkingDirectoryName 'As String
Dim strTitle 'As String
Dim strTitleBuffer 'As String
Dim strEtextContent 'As String
Dim strPdbFileName 'As String
Dim fs 'As Scripting.FileSystemObject
Dim intCounter 'As Integer
Const ForReading = 1 'Scripting.IOMode
Const ForWriting = 2 'Scripting.IOMode

    '''''''''' Get the file name passed as an argument
    Set fs = CreateObject("Scripting.FileSystemObject")
    If Wscript.Arguments.Count <> 1 Then
        MsgBox "You must pass a Gutenberg etext file name on the command line!"
        Wscript.Quit 1
    End If
    strZipFileName = Wscript.Arguments(0)
    If Not fs.FileExists(strZipFileName) Then
        MsgBox Wscript.Arguments(0) & " is not a legitimate file name."
        Wscript.Quit 1
    End If
    Set filZipFile = fs.GetFile(strZipFileName)
    
    ''''''''''Create the temporary working text file
    strWorkingDirectoryName = CreateWorkingDirectory()
    set folWorkingDirectory = fs.GetFolder(strWorkingDirectoryName)
    If Lcase(Right(filZipFile.Name, 4)) = ".zip" Then
        'Extract the etext file from the zip file into the working directory
        RunCommandAsBatchFile "pkunzip.exe -e " & filZipFile.ShortPath & " " & folWorkingDirectory.ShortPath
    Else
        'Copy the etext file to the working directory
        fs.CopyFile filZipFile.Path, folWorkingDirectory.Path & "\" & filZipFile.Name
    End If
    'Make sure we only have one file
    If folWorkingDirectory.Files.Count <> 1 Then
        Status "Cancelled -- Found multiple etext files"
        fs.DeleteFile folWorkingDirectory.Path & "\*.*"
        fs.DeleteFolder folWorkingDirectory.Path
        Set tsEtext = Nothing
        Set fs = Nothing
        Exit Sub
    End If
    'Now find the etext file in the working directory!
    For Each filTemp in folWorkingDirectory.Files
        Set filEtextFile = filTemp
        Status "EText File: " & filEtextFile.Name
        Set tsEtext = filEtextFile.OpenAsTextStream(ForReading)
        Status "Loading etext file into memory for processing..."
        strEtextContent = tsEtext.ReadAll
        tsEtext.Close
        Exit For
    Next
    
    ''''''''''Find the title 
    If Instr(1, strEtextContent, " Etext of ", vbTextCompare) <> 0 Then
        strTitle = Mid(strEtextContent, Instr(1, strEtextContent, " Etext of ", vbTextCompare), 80)
        strTitle = Mid(strTitle, Len(" Etext of ") + 1)
    End If
    If strTitle = "" Then
        If Instr(1, strEtextContent, " Etext: ", vbTextCompare) <> 0 Then
            strTitle = Mid(strEtextContent, Instr(1, strEtextContent, " Etext: ", vbTextCompare), 80)
            strTitle = Mid(strTitle, Len(" Etext: ") + 1)
        End If
    End If
    If strTitle = "" Then
        If Instr(1, strEtextContent, " Etext; ", vbTextCompare) <> 0 Then
            strTitle = Mid(strEtextContent, Instr(1, strEtextContent, " Etext; ", vbTextCompare), 80)
            strTitle = Mid(strTitle, Len(" Etext; ") + 1)
        End If
    End If
    If strTitle = "" Then
        If Instr(1, strEtextContent, " Gutenberg Etext ", vbTextCompare) <> 0 Then
            strTitle = Mid(strEtextContent, Instr(1, strEtextContent, " Gutenberg Etext ", vbTextCompare), 80)
            strTitle = Mid(strTitle, Len(" Gutenberg Etext ") + 1)
        End If
    End If
    If strTitle = "" Then
        strTitle = filEtextFile.Name
    End If
    strTitle = Trim(strTitle)
    'End the title at the first non-alpha character
    For intCounter = 1 to Len(strTitle)
        If Mid(strTitle, intCounter, 1) >= "A" And Mid(strTitle, intCounter, 1) <= "Z" Then
            strTitleBuffer = strTitleBuffer & Mid(strTitle, intCounter, 1)
        ElseIf Mid(strTitle, intCounter, 1) >= "a" And Mid(strTitle, intCounter, 1) <= "z" Then
            strTitleBuffer = strTitleBuffer & Mid(strTitle, intCounter, 1)
        ElseIf Mid(strTitle, intCounter, 1) >= "0" And Mid(strTitle, intCounter, 1) <= "9" Then
            strTitleBuffer = strTitleBuffer & Mid(strTitle, intCounter, 1)
        ElseIf Mid(strTitle, intCounter, 1) = " " Then
            strTitleBuffer = strTitleBuffer & Mid(strTitle, intCounter, 1)
        ElseIf Mid(strTitle, intCounter, 1) = "'" Then
            'Do Nothing. Skip apostrophes.
        ElseIf Mid(strTitle, intCounter, 1) = "#" Then
            'Do Nothing. Skip it too.
        Else
            'Try to allow leading "H. G. Wells" but exit before typical author name at end of title
            If Len(strTitleBuffer) > 5 Then Exit For
        End If
    Next
    strTitle = strTitleBuffer
    'Trim title to maximum 30 characters
    If Len(strTitle) > 30 Then
        If Left(strTitle, Len("The ")) = "The " Then strTitle = Mid(strTitle, Len("The ") + 1)
    End If
    If Len(strTitle) > 30 Then
        If Left(strTitle, Len("A ")) = "A " Then strTitle = Mid(strTitle, Len("A ") + 1)
    End If
    If Len(strTitle) > 30 Then
        If Left(strTitle, Len("Adventures of ")) = "Adventures of " Then strTitle = Mid(strTitle, Len("Adventures of ") + 1)
    End If
    If Len(strTitle) > 30 Then
        If Left(strTitle, Len("Works of ")) = "Works of " Then strTitle = Mid(strTitle, Len("Works of ") + 1)
    End If
    If Len(strTitle) > 30 Then
        If Left(strTitle, Len("Collection of ")) = "Collection of " Then strTitle = Mid(strTitle, Len("Collection of ") + 1)
    End If
    If Len(strTitle) > 30 Then
        If Left(strTitle, Len("Life and Adventures of ")) = "Life and Adventures of " Then strTitle = Mid(strTitle, Len("Life and Adventures of ") + 1)
    End If
    If Len(strTitle) > 30 Then
        If Right(strTitle, Len("Vol.")) = "Vol." Then strTitle = Left(strTitle, Instr(strTitle,"Vol.") - 1)
    End If
    If Len(strTitle) > 30 Then
        If Right(strTitle, Len("Volume")) = "Volume" Then strTitle = Left(strTitle, Instr(strTitle,"Volume") - 1)
    End If
    If Len(strTitle) > 30 Then
        If Right(strTitle, Len("No.")) = "No." Then strTitle = Left(strTitle, Instr(strTitle,"No.") - 1)
    End If
    If Len(strTitle) > 30 Then
        If Right(strTitle, Len("Number")) = "Number" Then strTitle = Left(strTitle, Instr(strTitle,"Number") - 1)
    End If
    strTitle = Trim(strTitle)
    If Len(strTitle) > 30 Then
        strTitle = Left(strTitle, 27) & "..."
    End If
    strTitleBuffer = ""
    Status "Title: " & strTitle
    
    ''''''''''Remove the Gutenberg "small print" header
    Status "Removing Gutenberg ""small print"" header"
    If Instr(strEtextContent, "*END*") <> 0 Then
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, "*END*"))
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2)
    End If
    
    ''''''''''Remove miscellaneous Gutenberg info line
    Status "Checking for miscellaneous Gutenberg info line"
    Do While Instr(1, Left(strEtextContent, 1000), "Gutenberg ", vbTextCompare) <> 0
        Status "  Removing..."
        strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "Gutenberg ", vbTextCompare))
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2)
    Loop
    
    ''''''''''Remove miscellaneous info lines
    Status "Checking for miscellaneous info lines"
    Do While Instr(1, Left(strEtextContent, 1000), "*", vbTextCompare) <> 0
        Status "  Removing..."
        strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "*", vbTextCompare))
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2)
    Loop
    Do While Instr(1, Left(strEtextContent, 1000), "@", vbTextCompare) <> 0
        Status "  Removing..."
        strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "@", vbTextCompare))
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2)
    Loop
    Do While Instr(1, Left(strEtextContent, 1000), "-----", vbTextCompare) <> 0
        Status "  Removing..."
        strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "-----", vbTextCompare))
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2)
    Loop
    
    ''''''''''Remove the "Scanned By" info line
    Status "Checking for ""Scanned By"" data"
    If Instr(1, Left(strEtextContent, 1000), "scanned By ", vbTextCompare) <> 0 Then
        Status "  Removing..."
        strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "Scanned By ", vbTextCompare))
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2)
    End If
    If Instr(1, Left(strEtextContent, 1000), "created by ", vbTextCompare) <> 0 Then
        Status "  Removing..."
        strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "created by ", vbTextCompare))
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2)
    End If
    If Instr(1, Left(strEtextContent, 1000), "prepared by ", vbTextCompare) <> 0 Then
        Status "  Removing..."
        strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "prepared by ", vbTextCompare))
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2)
    End If
    If Instr(1, Left(strEtextContent, 1000), "prepared from ", vbTextCompare) <> 0 Then
        Status "  Removing..."
        strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "prepared from ", vbTextCompare))
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2)
    End If
    If Instr(1, Left(strEtextContent, 1000), "proofed by ", vbTextCompare) <> 0 Then
        Status "  Removing..."
        strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "proofed by ", vbTextCompare))
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2)
    End If
    If Instr(Left(strEtextContent, 1000), "OCR") <> 0 Then
        Status "  Removing..."
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, "OCR"))
        strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2)
    End If
    
    ''''''''''Remove the Gutenberg tag at the end of the etext
    Status "Checking for Gutenberg tag at end of story"
    If Instr(1, Right(strEtextContent, 1000), "Gutenberg", vbTextCompare) <> 0 Then
        Status "  Removing..."
        strEtextContent = Left(strEtextContent, InstrRev(strEtextContent, "Gutenberg"))
        strEtextContent = Left(strEtextContent, InstrRev(strEtextContent, vbCrLf) -1)
    End If
    
    ''''''''''Unwrap the text
    'Recover from stupid FTP text download
    strEtextContent = Replace(strEtextContent, vbCr & vbCrLf, vbCrLf)
    'Remove excessive linefeeds
    Do While Instr(strEtextContent, vbCrLf & vbCrLf & vbCrLf) <> 0
        strEtextContent = Replace(strEtextContent, vbCrLf & vbCrLf & vbCrLf, vbCrLf & vbCrLf)
    Loop
    'Save double linefeeds temporarily
    Status "Saving paragraph carriage returns"
    strEtextContent = Replace(strEtextContent, vbCrLf & vbCrLf, "^P")
    strEtextContent = Replace(strEtextContent, vbCr & vbCr, "^P")
    strEtextContent = Replace(strEtextContent, vbLf & vbLf, "^P")
    'Remove hyphenation
    strEtextContent = Replace(strEtextContent, "-" & vbCrLf, "")
    strEtextContent = Replace(strEtextContent, "-" & vbCr, "")
    strEtextContent = Replace(strEtextContent, "-" & vbLf, "")
    'Unwrap all text
    Status "Unwrapping text"
    strEtextContent = Replace(strEtextContent, vbCrLf, " ")
    strEtextContent = Replace(strEtextContent, vbCr, " ")
    strEtextContent = Replace(strEtextContent, vbLf, " ")
    'Replace the saved double linefeeds with single linefeeds
    Status "Restoring paragraph carriage returns"
    strEtextContent = Replace(strEtextContent, "^P", vbCrLf)
    'Replace double spaces with single spaces.
    Status "Replacing double spaces with single spaces"
    Do While Instr(strEtextContent, "  ")
        strEtextContent = Replace(strEtextContent, "  ", " ")
    Loop
    
    ''''''''''Write the changed text back to the temporary file
    Status "Saving changes"
    Set tsEtext = filEtextFile.OpenAsTextStream(ForWriting)
    tsEtext.Write strEtextContent
    strEtextContent = ""
    tsEtext.Close
    
    ''''''''''Run the MAKEDOC program to create the PDB file
    Status "Converting etext to PDB format"
    strPdbFileName = fs.GetAbsolutePathName(fs.BuildPath(filZipFile.Path, "..\" & filEtextFile.Name))
    strPdbFileName = Left(strPdbFileName, Len(strPdbFileName) - 3) & "pdb"
    strPdbFileName = Lcase(strPdbFileName)
    RunCommandAsBatchFile "makedoc.exe -r " & filEtextFile.ShortPath & " """ & strPdbFileName & """ """ & strTitle & """"
    
    ''''''''''Clean up
    fs.DeleteFile filEtextFile.Path
    fs.DeleteFolder folWorkingDirectory.Path
    Set tsEtext = Nothing
    Set fs = Nothing
End Sub

Function CreateWorkingDirectory() 'As String
'Returns the path to a directory created under the %TEMP% folder
Dim fs 'As Scripting.FileSystemObject
Dim intCounter 'As Integer
Dim strDirName 'As String
Const TemporaryFolder = 2 'Scripting.SpecialFolderConst
    Set fs = CreateObject("Scripting.FileSystemObject")
    strDirName = fs.GetSpecialFolder(TemporaryFolder)
    If Right(strDirName, 1) = "\" Then
        strDirName = strDirName & "~" & Wscript.ScriptName
    Else
        strDirName = strDirName & "\~" & Wscript.ScriptName
    End If
    intCounter = 0
    Do While fs.FolderExists(strDirName & Cstr(intCounter))
        intCounter = intCounter + 1
    Loop
    fs.CreateFolder strDirName & Cstr(intCounter)
    CreateWorkingDirectory = strDirName & Cstr(intCounter)
    ''''''''''Clean up
    Set fs = Nothing
End Function

Sub RunCommandAsBatchFile(strCommand)
    '''''''''' Declare all the variables and objects we will be using
    Dim wsh 'As Wscript.Shell
    Dim ts 'As Scripting.TextStream
    Dim fs 'As Scripting.FileSystemObject
    Dim strBatFileName 'As String

    '''''''''' Define all the constants we will (or might) need
    Const TemporaryFolder = 2 'Scripting.SpecialFolderConst.TemporaryFolder
    Const ForWriting = 2 'Scripting.IOMode.ForWriting
    Const WshHide = 0 'IWshRuntimeLibrary.WshWindowStyle.WshHide
    Const WshNormalFocus = 1 'IWshRuntimeLibrary.WshWindowStyle.WshNormalFocus
    Const WshMinimizedFocus = 2 'IWshRuntimeLibrary.WshWindowStyle.WshMinimizedNoFocus
    Const WshMaximizedNoFocus = 3 'IWshRuntimeLibrary.WshWindowStyle.WshMaximizedNoFocus
    Const WshNormalNoFocus = 4 'IWshRuntimeLibrary.WshWindowStyle.WshNormalNoFocus
    Const WshMinimizedNoFocus = 6 'IWshRuntimeLibrary.WshWindowStyle.WshMinimizedNoFocus

    '''''''''' Create the objects we will need
    Set wsh = CreateObject("Wscript.Shell")
    Set fs = CreateObject("Scripting.FileSystemObject")

    '''''''''' Create a temporary batch file name.
    strBatFileName = fs.BuildPath(fs.GetSpecialFolder(TemporaryFolder), "~" & Wscript.ScriptName & ".bat")

    '''''''''' Create the temporary batch file.
    Set ts = fs.OpenTextFile(strBatFileName, ForWriting, True)
    ts.WriteLine "@echo off"
    ts.WriteLine strCommand
    ts.WriteLine "cls"
    ts.Close

    '''''''''' Run the batch file. 
    'The first argument is the batch file name. The second is
    'the "window style". The third argument is whether or not
    'the script should wait on the batch file to finish. Since 
    'we will be deleting the batch file during our clean-up 
    'phase, we have to wait for it to finish. If the file is run
    'with CSCRIPT, show the DOS window. Otherwise, hide it.
    If Lcase(Right(Wscript.FullName, 12)) = "\cscript.exe" Then 
        wsh.Run strBatFileName, WshNormalFocus, True
    Else
        wsh.Run strBatFileName, WshHide, True
    End If

    '''''''''' Clean up.
    'Delete the batch file we created since we are done with it.
    fs.DeleteFile strBatFileName
    'Anything that used "set" should be set equal to Nothing
    Set wsh = Nothing
    Set ts = Nothing
    Set fs = Nothing
End Sub

Sub Status (strMessage)
'If the program was run with CSCRIPT, this writes a
'line into the DOS box. If run with WSCRIPT, it does nothing.
    If Lcase(Right(Wscript.FullName, 12)) = "\cscript.exe" Then 
        Wscript.Echo strMessage
    End If
End Sub

