Code: Select all
// Instead of just extracting all the books from one library, this script imports a list of library's from CSV
// and for each, saves all of their books to the output CSV
// To speed up the running of this script, the following iMacros preferences are recommended...
// Go to iMacros options, then the "general" tab
// Set "Replay Speed" to fast
// Under "Visual Effects", untick "scroll to object when found" as well as "Highlight object when found"
// Under "Javascript scripting settings", untick "Show Javascript during replay"
// File is read from the "datasources" path set in iMacros prefs, not "downloads" path pref
const inputFileName= "Libraries-to-extract-from.csv";
// The starting row number (to enable importing just part of a large CSV)
const startRowID = 1;
// Name of the file where the results are output (is saved into the "Downloads" folder set in iMacros prefs)
// NB: Every time this script is run, the results are just added to the end of this file.
// So delete/rename the output file if needed - to avoid duplicate entries.
const outputFileName= "Books-url-list.csv";
// ###################
// Global variable for status message, since using iimDisplay() clears previous messages
var statusMessage;
addStatusMessage("Importing " + inputFileName + ", starting at line " + startRowID);
// For each library in the CSV file, import all of their books
var rowID = 1;
while (true) {
// Not using addStatusMessage() directly, since want to throw away this last message afterwards
iimDisplay(statusMessage + "\n-Processing row " + rowID);
var currentRowContents = getCSVRow(rowID);
if (!currentRowContents) {
// Break if end of file reached, or if there was an error reading the file (eg file not found)
addStatusMessage("Exiting on row " + rowID + ". Either an error has occurred, or the end of file was reached.");
break;
}
extractFromLibrary(currentRowContents);
rowID++;
}
function extractFromLibrary(targetLibrary) {
// URL of the books page to process
var targetBooksPage = targetLibrary + "/books";
goToPage(targetBooksPage);
var lastPageID = getLastPageID();
addStatusMessage("Saving pages 1->" + lastPageID + " for " + targetBooksPage);
for (var i = startFromPageID; i <= lastPageID; i++) {
// Not using addStatusMessage() directly, since want to throw away this last message afterwards
iimDisplay(statusMessage + "\n-Processing page " + i);
// Start of script navigated to page 1 already, so only need to change if i is not 1
if (i != 1) goToPage(targetBooksPage + "?page=" + i);
processCurrentPage();
}
}
/* Helper Functions */
function runMacro(macro) {
// Runs the specified macro with a reduced tag timeout of 3 seconds (default is 60)
return iimPlay("CODE:" + "SET !TIMEOUT_TAG 3\n" + macro);
}
function addStatusMessage(newMessage) {
// Using iimDisplay() clears previous messages, so global statusMessage variable used to save them
if (!statusMessage) {
statusMessage = "Starting script...";
}
statusMessage += "\n-" + newMessage;
iimDisplay(statusMessage);
}
function getCSVRow(rowID) {
var result = runMacro("SET !DATASOURCE " + inputFileName +
"\nSET !DATASOURCE_COLUMNS 1" +
"\nSET !DATASOURCE_LINE " + rowID +
"\nSET !EXTRACT {{!COL1}}");
if (result < 0) {
// Fetching the row failed. Could be due to end of file or else file not found.
return null;
} else {
return iimGetLastExtract(1);
}
}
function goToPage(url) {
// Navigates to the desired URL with images turned off, to decrease pageload time
runMacro("FILTER TYPE=IMAGES STATUS=ON" +
"\nURL GOTO=" + url);
}
function getLastPageID() {
// Extract the page ID of the last page of books, using relative positioning numbering
// The site uses "Page 1", "Page 2", "...", "Page N", "Next" type site navigation
// First finds the "Next" link, than extracts the link text immediately prior to it, to get last page ID
runMacro("TAG POS=1 TYPE=A ATTR=TXT:Next EXTRACT=TXT" +
"\nTAG POS=R-1 TYPE=A ATTR=TXT:* EXTRACT=TXT");
if (iimGetLastExtract(2) == "#EANF#") {
// Tags not found, or timeout reached
addStatusMessage("No next page button found, so there must only be one page total" +
" (or else the page didn't finish loading in 60s).");
lastPageID = 1;
} else {
// Tags found, so use the link text value
lastPageID = iimGetLastExtract(2);
}
return lastPageID;
}
function processCurrentPage() {
var i = 0;
while (true) {
i++;
// Attempt extraction of next library book link
// Note: extraction and saving to CSV were not combined, since hard/impossible to know when to stop,
// since logic not possible inside macros - and whenever SAVEAS TYPE=EXTRACT is used, the
// EXTRACT variable is cleared. So iimGetLastExtract(1) always returns null, regardless of success or
// failure. Even if the iimPlay return code was checked instead, #EANF# junk would still have been added
// to the last row of the CSV, which isn't desired.
// To reduce the slowdown caused by splitting the steps, the EXTRACT variable is manually set before
// using SAVEAS, rather than wasting time using TAG again.
runMacro("TAG POS=" + i + " TYPE=A ATTR=CLASS:library-link&&TITLE: EXTRACT=HREF");
var currentLibraryURL = iimGetLastExtract(1);
// If that link was found, save to the next line of the CSV, otherwise break out of loop
if (currentLibraryURL == "#EANF#") {
break;
} else {
runMacro("SET !EXTRACT " + currentLibraryURL +
"\nSAVEAS TYPE=EXTRACT FOLDER=* FILE=" + outputFileName);
}
}
}