Option Explicit ' Creates a Palm PDB file from a gutenberg etext file. ' Requires PKUNZIP (converts zip to text) in the path. ' ' Requires MAKEDOC (converts text to palm doc) in the path. ' (Rename this: to makedoc.exe) ' Must be passed the file name of a gutenberg etext document ' to be used as the source file. Etext can be text or zipped. ' This code assumes there is only ONE text file per story. ' ' A simple one-line batch file like this could create PDB files ' from all zip files in a directory (leaving zip files intact): ' for %%x in (*.zip) do cscript.exe UnGutenberg.vbs %%x ' If cscript.exe is used, program status will be displayed. If ' wscript.exe is used, NOTHING will be visible (scary, huh?). ' ' Creates a PDB file with same root name as the source etext file. ' Palm document name will be derived from the title of the story. ' ' All Gutenberg "small print" will be removed from the output PDB, ' and all text will be unwrapped. Double linefeed paragraphs will be ' converted to single linefeeds. Double spaces will be converted to ' single spaces. Every effort is made to "sanitize" the book. ' If you want to try manual conversion, see the tips at the ' end of this page: ' ' ' This is not fast code! Converting "Edison, His Life and Inventions" ' (which is about 574kb zipped) took me about 12 minutes on my 133MHz. ' This is primarily due to the time needed to load the etext file into a ' string. ' ' Written by Eric Phelps ' http://www.ericphelps.com ' ' Distribute freely. NO WARRANTY ' Just because it worked a few times for me on my computer, ' with my version of scripting, and on the few test files ' I checked does NOT mean it will work for you! Consider ' this code a STARTING POINT, not a finished product! ' Main Wscript.Quit 0 Sub Main() Dim filZipFile 'As Scripting.File Dim strZipFileName 'As String Dim filEtextFile 'As Scripting.File Dim strEtextFileName 'As String Dim filTemp 'As Scripting.File Dim tsEtext 'As Scripting.TextStream Dim folWorkingDirectory 'As Scripting.Folder Dim strWorkingDirectoryName 'As String Dim strTitle 'As String Dim strTitleBuffer 'As String Dim strEtextContent 'As String Dim strPdbFileName 'As String Dim fs 'As Scripting.FileSystemObject Dim intCounter 'As Integer Const ForReading = 1 'Scripting.IOMode Const ForWriting = 2 'Scripting.IOMode '''''''''' Get the file name passed as an argument Set fs = CreateObject("Scripting.FileSystemObject") If Wscript.Arguments.Count <> 1 Then MsgBox "You must pass a Gutenberg etext file name on the command line!" Wscript.Quit 1 End If strZipFileName = Wscript.Arguments(0) If Not fs.FileExists(strZipFileName) Then MsgBox Wscript.Arguments(0) & " is not a legitimate file name." Wscript.Quit 1 End If Set filZipFile = fs.GetFile(strZipFileName) ''''''''''Create the temporary working text file strWorkingDirectoryName = CreateWorkingDirectory() set folWorkingDirectory = fs.GetFolder(strWorkingDirectoryName) If Lcase(Right(filZipFile.Name, 4)) = ".zip" Then 'Extract the etext file from the zip file into the working directory RunCommandAsBatchFile "pkunzip.exe -e " & filZipFile.ShortPath & " " & folWorkingDirectory.ShortPath Else 'Copy the etext file to the working directory fs.CopyFile filZipFile.Path, folWorkingDirectory.Path & "\" & filZipFile.Name End If 'Make sure we only have one file If folWorkingDirectory.Files.Count <> 1 Then Status "Cancelled -- Found multiple etext files" fs.DeleteFile folWorkingDirectory.Path & "\*.*" fs.DeleteFolder folWorkingDirectory.Path Set tsEtext = Nothing Set fs = Nothing Exit Sub End If 'Now find the etext file in the working directory! For Each filTemp in folWorkingDirectory.Files Set filEtextFile = filTemp Status "EText File: " & filEtextFile.Name Set tsEtext = filEtextFile.OpenAsTextStream(ForReading) Status "Loading etext file into memory for processing..." strEtextContent = tsEtext.ReadAll tsEtext.Close Exit For Next ''''''''''Find the title If Instr(1, strEtextContent, " Etext of ", vbTextCompare) <> 0 Then strTitle = Mid(strEtextContent, Instr(1, strEtextContent, " Etext of ", vbTextCompare), 80) strTitle = Mid(strTitle, Len(" Etext of ") + 1) End If If strTitle = "" Then If Instr(1, strEtextContent, " Etext: ", vbTextCompare) <> 0 Then strTitle = Mid(strEtextContent, Instr(1, strEtextContent, " Etext: ", vbTextCompare), 80) strTitle = Mid(strTitle, Len(" Etext: ") + 1) End If End If If strTitle = "" Then If Instr(1, strEtextContent, " Etext; ", vbTextCompare) <> 0 Then strTitle = Mid(strEtextContent, Instr(1, strEtextContent, " Etext; ", vbTextCompare), 80) strTitle = Mid(strTitle, Len(" Etext; ") + 1) End If End If If strTitle = "" Then If Instr(1, strEtextContent, " Gutenberg Etext ", vbTextCompare) <> 0 Then strTitle = Mid(strEtextContent, Instr(1, strEtextContent, " Gutenberg Etext ", vbTextCompare), 80) strTitle = Mid(strTitle, Len(" Gutenberg Etext ") + 1) End If End If If strTitle = "" Then strTitle = filEtextFile.Name End If strTitle = Trim(strTitle) 'End the title at the first non-alpha character For intCounter = 1 to Len(strTitle) If Mid(strTitle, intCounter, 1) >= "A" And Mid(strTitle, intCounter, 1) <= "Z" Then strTitleBuffer = strTitleBuffer & Mid(strTitle, intCounter, 1) ElseIf Mid(strTitle, intCounter, 1) >= "a" And Mid(strTitle, intCounter, 1) <= "z" Then strTitleBuffer = strTitleBuffer & Mid(strTitle, intCounter, 1) ElseIf Mid(strTitle, intCounter, 1) >= "0" And Mid(strTitle, intCounter, 1) <= "9" Then strTitleBuffer = strTitleBuffer & Mid(strTitle, intCounter, 1) ElseIf Mid(strTitle, intCounter, 1) = " " Then strTitleBuffer = strTitleBuffer & Mid(strTitle, intCounter, 1) ElseIf Mid(strTitle, intCounter, 1) = "'" Then 'Do Nothing. Skip apostrophes. ElseIf Mid(strTitle, intCounter, 1) = "#" Then 'Do Nothing. Skip it too. Else 'Try to allow leading "H. G. Wells" but exit before typical author name at end of title If Len(strTitleBuffer) > 5 Then Exit For End If Next strTitle = strTitleBuffer 'Trim title to maximum 30 characters If Len(strTitle) > 30 Then If Left(strTitle, Len("The ")) = "The " Then strTitle = Mid(strTitle, Len("The ") + 1) End If If Len(strTitle) > 30 Then If Left(strTitle, Len("A ")) = "A " Then strTitle = Mid(strTitle, Len("A ") + 1) End If If Len(strTitle) > 30 Then If Left(strTitle, Len("Adventures of ")) = "Adventures of " Then strTitle = Mid(strTitle, Len("Adventures of ") + 1) End If If Len(strTitle) > 30 Then If Left(strTitle, Len("Works of ")) = "Works of " Then strTitle = Mid(strTitle, Len("Works of ") + 1) End If If Len(strTitle) > 30 Then If Left(strTitle, Len("Collection of ")) = "Collection of " Then strTitle = Mid(strTitle, Len("Collection of ") + 1) End If If Len(strTitle) > 30 Then If Left(strTitle, Len("Life and Adventures of ")) = "Life and Adventures of " Then strTitle = Mid(strTitle, Len("Life and Adventures of ") + 1) End If If Len(strTitle) > 30 Then If Right(strTitle, Len("Vol.")) = "Vol." Then strTitle = Left(strTitle, Instr(strTitle,"Vol.") - 1) End If If Len(strTitle) > 30 Then If Right(strTitle, Len("Volume")) = "Volume" Then strTitle = Left(strTitle, Instr(strTitle,"Volume") - 1) End If If Len(strTitle) > 30 Then If Right(strTitle, Len("No.")) = "No." Then strTitle = Left(strTitle, Instr(strTitle,"No.") - 1) End If If Len(strTitle) > 30 Then If Right(strTitle, Len("Number")) = "Number" Then strTitle = Left(strTitle, Instr(strTitle,"Number") - 1) End If strTitle = Trim(strTitle) If Len(strTitle) > 30 Then strTitle = Left(strTitle, 27) & "..." End If strTitleBuffer = "" Status "Title: " & strTitle ''''''''''Remove the Gutenberg "small print" header Status "Removing Gutenberg ""small print"" header" If Instr(strEtextContent, "*END*") <> 0 Then strEtextContent = Mid(strEtextContent, Instr(strEtextContent, "*END*")) strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2) End If ''''''''''Remove miscellaneous Gutenberg info line Status "Checking for miscellaneous Gutenberg info line" Do While Instr(1, Left(strEtextContent, 1000), "Gutenberg ", vbTextCompare) <> 0 Status " Removing..." strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "Gutenberg ", vbTextCompare)) strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2) Loop ''''''''''Remove miscellaneous info lines Status "Checking for miscellaneous info lines" Do While Instr(1, Left(strEtextContent, 1000), "*", vbTextCompare) <> 0 Status " Removing..." strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "*", vbTextCompare)) strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2) Loop Do While Instr(1, Left(strEtextContent, 1000), "@", vbTextCompare) <> 0 Status " Removing..." strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "@", vbTextCompare)) strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2) Loop Do While Instr(1, Left(strEtextContent, 1000), "-----", vbTextCompare) <> 0 Status " Removing..." strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "-----", vbTextCompare)) strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2) Loop ''''''''''Remove the "Scanned By" info line Status "Checking for ""Scanned By"" data" If Instr(1, Left(strEtextContent, 1000), "scanned By ", vbTextCompare) <> 0 Then Status " Removing..." strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "Scanned By ", vbTextCompare)) strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2) End If If Instr(1, Left(strEtextContent, 1000), "created by ", vbTextCompare) <> 0 Then Status " Removing..." strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "created by ", vbTextCompare)) strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2) End If If Instr(1, Left(strEtextContent, 1000), "prepared by ", vbTextCompare) <> 0 Then Status " Removing..." strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "prepared by ", vbTextCompare)) strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2) End If If Instr(1, Left(strEtextContent, 1000), "prepared from ", vbTextCompare) <> 0 Then Status " Removing..." strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "prepared from ", vbTextCompare)) strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2) End If If Instr(1, Left(strEtextContent, 1000), "proofed by ", vbTextCompare) <> 0 Then Status " Removing..." strEtextContent = Mid(strEtextContent, Instr(1, strEtextContent, "proofed by ", vbTextCompare)) strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2) End If If Instr(Left(strEtextContent, 1000), "OCR") <> 0 Then Status " Removing..." strEtextContent = Mid(strEtextContent, Instr(strEtextContent, "OCR")) strEtextContent = Mid(strEtextContent, Instr(strEtextContent, vbCrLf) + 2) End If ''''''''''Remove the Gutenberg tag at the end of the etext Status "Checking for Gutenberg tag at end of story" If Instr(1, Right(strEtextContent, 1000), "Gutenberg", vbTextCompare) <> 0 Then Status " Removing..." strEtextContent = Left(strEtextContent, InstrRev(strEtextContent, "Gutenberg")) strEtextContent = Left(strEtextContent, InstrRev(strEtextContent, vbCrLf) -1) End If ''''''''''Unwrap the text 'Recover from stupid FTP text download strEtextContent = Replace(strEtextContent, vbCr & vbCrLf, vbCrLf) 'Remove excessive linefeeds Do While Instr(strEtextContent, vbCrLf & vbCrLf & vbCrLf) <> 0 strEtextContent = Replace(strEtextContent, vbCrLf & vbCrLf & vbCrLf, vbCrLf & vbCrLf) Loop 'Save double linefeeds temporarily Status "Saving paragraph carriage returns" strEtextContent = Replace(strEtextContent, vbCrLf & vbCrLf, "^P") strEtextContent = Replace(strEtextContent, vbCr & vbCr, "^P") strEtextContent = Replace(strEtextContent, vbLf & vbLf, "^P") 'Remove hyphenation strEtextContent = Replace(strEtextContent, "-" & vbCrLf, "") strEtextContent = Replace(strEtextContent, "-" & vbCr, "") strEtextContent = Replace(strEtextContent, "-" & vbLf, "") 'Unwrap all text Status "Unwrapping text" strEtextContent = Replace(strEtextContent, vbCrLf, " ") strEtextContent = Replace(strEtextContent, vbCr, " ") strEtextContent = Replace(strEtextContent, vbLf, " ") 'Replace the saved double linefeeds with single linefeeds Status "Restoring paragraph carriage returns" strEtextContent = Replace(strEtextContent, "^P", vbCrLf) 'Replace double spaces with single spaces. Status "Replacing double spaces with single spaces" Do While Instr(strEtextContent, " ") strEtextContent = Replace(strEtextContent, " ", " ") Loop ''''''''''Write the changed text back to the temporary file Status "Saving changes" Set tsEtext = filEtextFile.OpenAsTextStream(ForWriting) tsEtext.Write strEtextContent strEtextContent = "" tsEtext.Close ''''''''''Run the MAKEDOC program to create the PDB file Status "Converting etext to PDB format" strPdbFileName = fs.GetAbsolutePathName(fs.BuildPath(filZipFile.Path, "..\" & filEtextFile.Name)) strPdbFileName = Left(strPdbFileName, Len(strPdbFileName) - 3) & "pdb" strPdbFileName = Lcase(strPdbFileName) RunCommandAsBatchFile "makedoc.exe -r " & filEtextFile.ShortPath & " """ & strPdbFileName & """ """ & strTitle & """" ''''''''''Clean up fs.DeleteFile filEtextFile.Path fs.DeleteFolder folWorkingDirectory.Path Set tsEtext = Nothing Set fs = Nothing End Sub Function CreateWorkingDirectory() 'As String 'Returns the path to a directory created under the %TEMP% folder Dim fs 'As Scripting.FileSystemObject Dim intCounter 'As Integer Dim strDirName 'As String Const TemporaryFolder = 2 'Scripting.SpecialFolderConst Set fs = CreateObject("Scripting.FileSystemObject") strDirName = fs.GetSpecialFolder(TemporaryFolder) If Right(strDirName, 1) = "\" Then strDirName = strDirName & "~" & Wscript.ScriptName Else strDirName = strDirName & "\~" & Wscript.ScriptName End If intCounter = 0 Do While fs.FolderExists(strDirName & Cstr(intCounter)) intCounter = intCounter + 1 Loop fs.CreateFolder strDirName & Cstr(intCounter) CreateWorkingDirectory = strDirName & Cstr(intCounter) ''''''''''Clean up Set fs = Nothing End Function Sub RunCommandAsBatchFile(strCommand) '''''''''' Declare all the variables and objects we will be using Dim wsh 'As Wscript.Shell Dim ts 'As Scripting.TextStream Dim fs 'As Scripting.FileSystemObject Dim strBatFileName 'As String '''''''''' Define all the constants we will (or might) need Const TemporaryFolder = 2 'Scripting.SpecialFolderConst.TemporaryFolder Const ForWriting = 2 'Scripting.IOMode.ForWriting Const WshHide = 0 'IWshRuntimeLibrary.WshWindowStyle.WshHide Const WshNormalFocus = 1 'IWshRuntimeLibrary.WshWindowStyle.WshNormalFocus Const WshMinimizedFocus = 2 'IWshRuntimeLibrary.WshWindowStyle.WshMinimizedNoFocus Const WshMaximizedNoFocus = 3 'IWshRuntimeLibrary.WshWindowStyle.WshMaximizedNoFocus Const WshNormalNoFocus = 4 'IWshRuntimeLibrary.WshWindowStyle.WshNormalNoFocus Const WshMinimizedNoFocus = 6 'IWshRuntimeLibrary.WshWindowStyle.WshMinimizedNoFocus '''''''''' Create the objects we will need Set wsh = CreateObject("Wscript.Shell") Set fs = CreateObject("Scripting.FileSystemObject") '''''''''' Create a temporary batch file name. strBatFileName = fs.BuildPath(fs.GetSpecialFolder(TemporaryFolder), "~" & Wscript.ScriptName & ".bat") '''''''''' Create the temporary batch file. Set ts = fs.OpenTextFile(strBatFileName, ForWriting, True) ts.WriteLine "@echo off" ts.WriteLine strCommand ts.WriteLine "cls" ts.Close '''''''''' Run the batch file. 'The first argument is the batch file name. The second is 'the "window style". The third argument is whether or not 'the script should wait on the batch file to finish. Since 'we will be deleting the batch file during our clean-up 'phase, we have to wait for it to finish. If the file is run 'with CSCRIPT, show the DOS window. Otherwise, hide it. If Lcase(Right(Wscript.FullName, 12)) = "\cscript.exe" Then wsh.Run strBatFileName, WshNormalFocus, True Else wsh.Run strBatFileName, WshHide, True End If '''''''''' Clean up. 'Delete the batch file we created since we are done with it. fs.DeleteFile strBatFileName 'Anything that used "set" should be set equal to Nothing Set wsh = Nothing Set ts = Nothing Set fs = Nothing End Sub Sub Status (strMessage) 'If the program was run with CSCRIPT, this writes a 'line into the DOS box. If run with WSCRIPT, it does nothing. If Lcase(Right(Wscript.FullName, 12)) = "\cscript.exe" Then Wscript.Echo strMessage End If End Sub