import getopt, sys
import os.path
try:
from pysqlite2 import dbapi2 as sqlite3
except ImportError:
import sqlite3
import codecs
import re
import threading, Queue, time, datetime
import cgi
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
from urlparse import urlparse
from os import curdir, sep, makedirs
import zipfile
global_app_data_dir = "./"
# --------------------------------
# Log functions
def log_error(messagetext):
"""Log an error message."""
print "Error: ", messagetext
def log_message(messagetext):
"""Log a general message."""
print messagetext
def verbose_print(verbose, messagetext):
"""Output a general message if verbose is turned on."""
if verbose:
print messagetext
# --------------------------------
# Utility functions
def get_app_data_dir():
if sys.platform.startswith("darwin"):
path = os.path.expanduser("~/Library/Application Support/MySQL/DocLibrary/")
elif sys.platform.startswith("win"):
path = os.path.expandvars("${APPDATA}/MySQL/DocLibrary/")
else:
path = os.path.expanduser("~/.mysql/DocLibrary/")
if not os.path.exists(path):
os.makedirs(path)
return path
def get_lib_db_path():
path = os.path.join(get_app_data_dir(), "mysqldoclib.sqlite")
if os.path.exists(path):
return path
return os.path.join(global_app_data_dir, "mysqldoclib.sqlite")
def get_webui_db_path():
path = os.path.join(get_app_data_dir(), "mysqldoclib_webui.sqlite")
if os.path.exists(path):
return path
return os.path.join(global_app_data_dir, "mysqldoclib_webui.sqlite")
def get_user_db_path():
path = os.path.join(get_app_data_dir(), "mysqldoclib_usr.sqlite")
if not os.path.exists(path):
try:
# Connect to the database
db_conn = sqlite3.connect(path)
try:
# Execute SQL
try:
c = db_conn.cursor()
try:
c.execute("""
CREATE TABLE IF NOT EXISTS page_rating (
id_page_rating INTEGER PRIMARY KEY AUTOINCREMENT,
id_manual_type INTEGER NOT NULL,
title TEXT NOT NULL,
rating INTEGER
)""")
c.execute("CREATE INDEX IF NOT EXISTS idx_page_rating ON page_rating(title)")
c.execute("""
CREATE TABLE IF NOT EXISTS page_view (
id_page_view INTEGER PRIMARY KEY AUTOINCREMENT,
path TEXT NOT NULL,
hits INTEGER
)""")
c.execute("CREATE INDEX IF NOT EXISTS idx_page_view ON page_view(path)")
c.execute("""
CREATE TABLE IF NOT EXISTS lib_search (
id_lib_search INTEGER PRIMARY KEY AUTOINCREMENT,
search_text TEXT NOT NULL,
hits INTEGER
)""")
c.execute("CREATE INDEX IF NOT EXISTS idx_lib_search ON lib_search(search_text)")
c.execute("""
CREATE TABLE IF NOT EXISTS lib_status (
id_lib_file INTEGER PRIMARY KEY AUTOINCREMENT,
id_lib INTEGER,
downloaded INTEGER,
is_selected INTEGER
)""")
c.execute("INSERT INTO lib_status(id_lib, downloaded, is_selected) VALUES (1, 1, 1)")
db_conn.commit()
finally:
c.close()
except Exception, e:
log_error("Error while creating the usr database. %r" % e)
finally:
db_conn.close();
except Exception, e:
log_error("An error occurred while creating the usr database at %s. %r" % (path, e))
raise e
return path
def get_module_installation_dir():
return "./"
def read_file_content(filename, encoding="utf-8"):
"""Read the contents of a text file"""
try:
# Open file in read mode using the correct encoding
f = codecs.open(filename, "r", encoding)
try:
# Return the file contents
return f.read()
except Exception, e:
log_error("An error occurred reading from the file %s." % filename)
finally:
f.close();
except Exception, e:
log_error("An error occurred opening the file %s." % filename)
def save_file_content(filename, content, encoding="utf-8"):
"""Read the contents of a text file"""
try:
# Open file in read mode using the correct encoding
f = codecs.open(filename, "w", encoding)
try:
# Return the file contents
f.write(content)
except Exception, e:
log_error("An error occurred reading from the file %s." % filename)
finally:
f.close();
except Exception, e:
log_error("An error occurred opening the file %s." % filename)
# --------------------------------
# Library creation functions
def execute_sql_script(db_conn, filename, encoding = "UTF-8"):
"""Executes a SQL script file"""
# Read SQL file
sql_commands = read_file_content(filename, encoding)
if not sql_commands:
return False
try:
c = db_conn.cursor()
try:
# Run the SQL Script to create the database
c.executescript(sql_commands)
db_conn.commit()
return True
except Exception, e:
log_error("An error occurred while executing the SQL script. %r" % e);
return False
finally:
c.close()
except Exception, e:
log_error("An error occurred aquiring a database cursor. %r" % e);
return False
# --------------------------------
# Manual page caching functions
def html_remove_tags(data):
"""Removes all HTML tags from a given string"""
# other expression: '<[^>]*?>'
p = re.compile(r'<[^<]*?/?>')
return p.sub(' ', data)
def html_remove_extra_spaces(data):
"""Removes all extra spaces from a given string"""
p = re.compile(r'\s+')
return p.sub(' ', data)
def html_get_page_title(data):
"""Returns the chapter and title of a manual html page string"""
p = re.compile(r'\
(?P(Chapter\s)?[A-Z]?[\d\.]+)\s*(?P.*)\<\/title\>')
match = p.search(data)
if match:
return match.group('Chapter'), match.group('Title')
else:
p = re.compile(r'\(?P.*)\<\/title\>')
match = p.search(data)
if match:
return "", match.group('Title')
else:
return "", ""
def html_apply_page_modifications(data):
"""Make required changes to the html"""
p = re.compile(r'\<\/title\>')
data = p.sub('', data)
p = re.compile(r'\starget=\"_top\"')
data = p.sub('', data)
return data
class ManualPageData():
"""Data class that holds information about a manual page"""
filename = ""
title = ""
chapter = ""
content = ""
html_content = ""
class ScanManualPageThread(threading.Thread):
filename_queue = 0
manual_page_data_queue = 0
lib_zip_file = ""
def run(self):
try:
while True:
# get a filename from the queue, do not block
filename = self.filename_queue.get(False)
#full_filename = os.path.join(self.path, filename)
try:
# Open HTML file as a utf-8 file
#html_string = read_file_content(full_filename)
#if not html_string:
#continue
html_file = self.lib_zip_file.open(filename)
html_string = unicode(html_file.read(), "utf-8")
if not html_string:
continue
# Make file modifications
#save_file_content(full_filename, html_apply_page_modifications(html_string))
# Add new page data object to the queue
manual_page_data = ManualPageData()
manual_page_data.filename = filename
manual_page_data.chapter, manual_page_data.title = html_get_page_title(html_string)
manual_page_data.content = html_remove_extra_spaces(html_remove_tags(html_string))
manual_page_data.html_content = html_apply_page_modifications(html_string)
self.manual_page_data_queue.put(manual_page_data)
except Exception, e:
log_error("An error processing the page. %r" % e)
break
except Queue.Empty:
pass
def process_manual_page_data_queue(db_conn, lib_zip_file, path, id_manual, file_nr, file_count, manual_page_data_queue):
try:
# Get database cursor
c = db_conn.cursor()
files_processed = 0
# Check if there are manual_page_data objects in the queue and if so, process them
while True:
try:
# Fetch manual_page_data object from the queue if available, do not block
manual_page_data = manual_page_data_queue.get(False)
try:
# Insert HTML page
c.execute("INSERT OR REPLACE INTO web_object(path, content_type, content, allow_embedded_code_execution) VALUES(?, ?, ?, ?)",
["/" + path + "/" + os.path.basename(manual_page_data.filename), "text/html", manual_page_data.html_content, 0])
# Insert manual page and content
c.execute("INSERT INTO page(id_manual, id_web_object, title, chapter) VALUES (?, ?, ?, ?)",
[id_manual, c.lastrowid, manual_page_data.title, manual_page_data.chapter])
c.execute("INSERT INTO page_content(id_page, title, content) VALUES (?, ?, ?)",
[c.lastrowid, manual_page_data.title, manual_page_data.content])
files_processed += 1
if (files_processed % 100 == 0):
log_message("%d file(s) of %d processed..." % (file_nr + files_processed, file_count));
except Exception, e:
log_error("An error occurred while inserting the page values for %s. %r" % (manual_page_data.filename, e))
except Queue.Empty:
break
return files_processed
except Exception, e:
log_error("An error occurred aquiring a database cursor. %r" % e)
def cache_pages(db_conn, manual_ids):
try:
# Get database cursor
c = db_conn.cursor()
try:
# Get all available manuals versions
c.execute("""-- Select all manuals
SELECT m.id_manual, m.directory, m.description
FROM manual m
ORDER BY m.id_manual""")
rows = c.fetchall()
except Exception, e:
log_error("An error occurred while executing the SQL commands. %r" % e)
finally:
c.close()
# Loop over all manuals and cache the contents of the file directory
for id_manual, directory, description in rows:
# if the number of manuals has been limited
if manual_ids:
# only include the given manual
if not str(id_manual) in manual_ids:
log_message("Skipping manual %s." % description)
continue
zip_file = directory + ".zip"
# Locate the zip file, first in the user app dir
# zip_file_path = os.path.join(os.path.join(get_app_data_dir(), 'repository'), zip_file)
# if not os.path.exists(zip_file_path):
# then in the ./repository dir
zip_file_path = os.path.join(os.path.join('.', 'repository'), zip_file)
if not os.path.exists(zip_file_path):
log_error("The zip file %s cannot be found." % zip_file_path)
continue
log_message("Processing %s ..." % zip_file_path)
lib_zip_file = zipfile.ZipFile(zip_file_path, 'r')
try:
#path = os.path.join('./', directory)
#files = [file for file in os.listdir(path) if file.lower().endswith(".html")]
files = [file for file in lib_zip_file.namelist() if file.lower().endswith(".html")]
file_count = len(files)
file_nr = 0
log_message("Caching manual %s, processing %d file(s) ..." % (description, file_count))
# Generate synchronization objects
filename_queue = Queue.Queue()
manual_page_data_queue = Queue.Queue()
# Fill filename queue
for f in files: #[:1]:
filename_queue.put(f)
time_start = datetime.datetime.now()
# Start threads
Pool = []
for i in range(1):
thread = ScanManualPageThread()
thread.filename_queue = filename_queue
thread.manual_page_data_queue = manual_page_data_queue
#thread.path = path
thread.lib_zip_file = lib_zip_file
Pool.append(thread)
thread.start()
# Wait for threads to complete
while Pool:
# Process all objects in queue
file_nr += process_manual_page_data_queue(db_conn, lib_zip_file, directory, id_manual, file_nr, file_count, manual_page_data_queue)
# Check if there are still threads that are alive
for index, the_thread in enumerate(Pool):
if the_thread.isAlive():
continue
else:
del Pool[index]
break
# Process all objects still left in queue after the threads have all been closed
file_nr += process_manual_page_data_queue(db_conn, lib_zip_file, directory, id_manual, file_nr, file_count, manual_page_data_queue)
# Get database cursor
c = db_conn.cursor()
try:
# Update manual to be installed
generation_date = datetime.datetime.now().strftime("%Y-%m-%d")
c.execute("UPDATE manual SET installed=1, generation_date=? WHERE id_manual=?", (generation_date, id_manual))
except Exception, e:
log_error("An error occurred while updating the manual entry. %r" % e)
finally:
c.close()
db_conn.commit()
time_duration = datetime.datetime.now() - time_start
log_message("%d file(s) of %d processed. Duration %d.%d seconds." % (file_nr, file_count,
time_duration.seconds, time_duration.microseconds))
# Add the images as web_objects
files = [file for file in lib_zip_file.namelist() if file.lower().endswith(".png")]
log_message("Processing %d image file(s) ..." % len(files))
for filename in files:
try:
# Get database cursor
c = db_conn.cursor()
try:
image_file = lib_zip_file.open(filename)
image_file_string = image_file.read()
if not image_file_string:
continue
# Insert HTML page
c.execute("INSERT OR REPLACE INTO web_object(path, content_type, content, allow_embedded_code_execution) VALUES(?, ?, ?, ?)",
["/" + directory + "/images/" + os.path.basename(filename), "image/png", sqlite3.Binary(image_file_string), 0])
except Exception, e:
log_error("An error occurred while inserting the image file %s. %r" % (filename, e))
except Exception, e:
log_error("An error occurred aquiring a database cursor. %r" % e)
db_conn.commit()
except Exception, e:
log_error("An error occurred while executing the SQL commands. %r" % e)
finally:
lib_zip_file.close()
except Exception, e:
log_error("An error occurred aquiring a database cursor. %r" % e)
def rebuild_lib(verbose, doclib_db_name, manual_ids):
try:
# Check which database name to use
if not doclib_db_name or doclib_db_name == "":
doclib_db_name = get_lib_db_path()
# Connect to the database
db_conn = sqlite3.connect(doclib_db_name)
try:
log_message("Creating the documentation library structure...")
if execute_sql_script(db_conn, './mysqldoclib.sql'):
log_message("Documentation library created successfully.")
cache_pages(db_conn, manual_ids)
log_message( "Documentation library cached has been filled.")
else:
log_message("The documentation library structure has not been created.")
finally:
db_conn.close();
except Exception, e:
log_error("An error occurred while opening the database connection. %r" % e)
def rebuild_webui(verbose, webui_db_name):
try:
# Check which database name to use
if not webui_db_name or webui_db_name == "":
webui_db_name = get_lib_db_path()
# Connect to the database
db_conn = sqlite3.connect(webui_db_name)
try:
log_message("Creating the documentation library webui structure...")
execute_sql_script(db_conn, './mysqldoclib_webui.sql')
c = db_conn.cursor()
try:
for path, dirs, files in os.walk("webui"):
for name in files:
if name.endswith(".html") or name.endswith(".wbp") or name.endswith(".css"):
# Open HTML file as a utf-8 file
html_string = read_file_content(os.path.join(path, name))
if not html_string:
continue
log_message("Path: %s, File: %s" % (path, name) )
c.execute("INSERT OR REPLACE INTO web_object(path, content_type, content, allow_embedded_code_execution) VALUES(?, ?, ?, ?)",
["/" + path + "/" + os.path.basename(name), "text/html", html_string, 1])
else:
content_type = ""
if name.endswith(".png"):
content_type = "image/png"
elif name.endswith(".gif"):
content_type = "image/gif"
elif name.endswith(".ico"):
content_type = "image/vnd.microsoft.icon"
img_file = open(os.path.join(path, name), "rb")
log_message("Path: %s, File: %s" % (path, name) )
c.execute("INSERT OR REPLACE INTO web_object(path, content_type, content, allow_embedded_code_execution) VALUES(?, ?, ?, ?)",
["/" + path + "/" + os.path.basename(name), content_type, sqlite3.Binary(img_file.read()), 0])
finally:
c.close()
db_conn.commit()
finally:
db_conn.close();
except Exception, e:
log_error("An error occurred while opening the database connection. %r" % e)
# --------------------------------
# HTTP server functions
def open_lib_db():
# Connect to the database
db_conn = sqlite3.connect(get_lib_db_path())
# Attach webui and usr database
try:
c = db_conn.cursor()
try:
c.execute("ATTACH DATABASE ? AS webui", (get_webui_db_path(),))
c.execute("ATTACH DATABASE ? AS usr", (get_user_db_path(),))
finally:
c.close()
except Exception, e:
log_error("Could not attach webui or usr database. %r" % e)
return db_conn
def build_search_result_page(search_string, manual_type):
# Escape search string
html_escape_table = {
"&": "&",
'"': """,
"'": "'",
">": ">",
"<": "<",
}
search_string_html = "".join(html_escape_table.get(c,c) for c in search_string)
search_string = search_string.replace(";", "\\;").replace("'", "\\'")
html = '''
MySQL Workbench Documentation Library Search Result'''
try:
# Connect to the database
db_conn = open_lib_db()
try:
# Get database cursor
c = db_conn.cursor()
# Log search
c.execute("SELECT hits FROM usr.lib_search WHERE search_text = ?", (search_string,))
rows = c.fetchall()
if rows:
c.execute("UPDATE usr.lib_search SET hits = hits + 1 WHERE search_text = ?", (search_string,))
else:
c.execute("INSERT INTO usr.lib_search (search_text, hits) VALUES(?, 1)", (search_string,))
db_conn.commit()
# Do the search
sql_select = """
SELECT p.chapter, p.title, wpo.path, pr.rating, m.directory,
offsets(page_content) as fs_offsets, snippet(page_content) as snippet
FROM page_content pc
JOIN page p ON p.id_page = pc.id_page
JOIN manual m ON m.id_manual = p.id_manual
JOIN web_object wpo ON p.id_web_object = wpo.id_web_object
LEFT OUTER JOIN page_rating pr ON p.title = pr.title
LEFT OUTER JOIN usr.page_rating pru ON p.title = pru.title
WHERE page_content MATCH '""" + search_string + "'"
if int(manual_type) > 0:
sql_select += " AND m.id_manual = " + str(manual_type) + " "
sql_select += """
ORDER BY pru.rating DESC, substr(fs_offsets, 1, 1), pr.rating DESC
LIMIT 0, 51"""
try:
# Get search result
c.execute(sql_select)
rows = c.fetchall()
html += "
MySQL Document Manual Search
"
html += "Search Result for `%s` returned " % search_string_html
if len(rows) > 50:
html += "more than 50 matches. Only the first 50 matches are displayed."
else:
html += "%d matches." % len(rows)
html += "
"
for chapter, title, path, rating, directory, offsets, snippet in rows:
html += "
"
except Exception, e:
log_error("An error occurred while executing the SQL command. %r" % e)
html += " An error occurred while executing the SQL command. %r" % str(e)
finally:
c.close()
finally:
db_conn.close();
except Exception, e:
log_error("An error occurred while opening the database connection. %r" % e)
html += '''
'''
return html
class DocsLibHandler(BaseHTTPRequestHandler):
def do_GET(self):
try:
url_full = self.path
if url_full == "/":
self.send_response(301)
self.send_header("Location", "/webui/index.wbp")
self.end_headers()
return
url_parsed = urlparse(url_full)
url = url_parsed.path
content_type = ""
if url.endswith(".html"):
content_type = "text/html"
elif url.endswith(".css"):
content_type = "text/css"
elif url.endswith(".png"):
content_type = "image/png"
elif url.endswith(".gif"):
content_type = "image/gif"
elif url.endswith(".ico"):
content_type = "image/vnd.microsoft.icon"
elif url.endswith(".wbp"):
content_type = "text/html"
if len(content_type) > 0:
if url.endswith("search.wbp"):
log_message("Query: %s" % url_parsed.query)
d = dict([(k,v) for k,junk,v in [line.partition("=") for line in url_parsed.query.split("&")]])
search_string = d["search"].replace("+", " ").replace("%22", "\"")
manual_type = d["manual_type"].strip()
log_message("Search started, search_string: %s, manual_type: %d" % (search_string, int(manual_type)))
self.send_response(200)
self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(build_search_result_page(search_string, int(manual_type)).encode("utf-8"))
else:
# check for web object in database
# Get database cursor
try:
c = self.server.db_conn.cursor()
try:
database = ""
if url.startswith("/webui"):
database = "webui."
# Get search result
c.execute("""
SELECT content_type, content, allow_embedded_code_execution
FROM """ + database + """web_object
WHERE path = ?""", [url])
rows = c.fetchall()
if rows:
for wo_content_type, wo_content, wo_allow_embedded_code_execution in rows:
self.send_response(200)
self.send_header("Content-type", wo_content_type)
self.end_headers()
if wo_content_type.startswith("text/"):
self.wfile.write(wo_content.encode("utf-8"))
else:
self.wfile.write(wo_content)
# Count hits
c.execute("SELECT hits FROM usr.page_view WHERE path = ?", (url,))
rows = c.fetchall()
if rows:
c.execute("UPDATE usr.page_view SET hits = hits + 1 WHERE path = ?", (url,))
else:
c.execute("INSERT INTO usr.page_view (path, hits) VALUES(?, 1)", (url,))
self.server.db_conn.commit()
else:
try:
f = open(curdir + sep + url)
self.send_response(200)
self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(f.read())
f.close()
except Exception, e:
self.send_error(404, "File Not Found: %s" % url)
log_error("File not found. %r" % e)
except Exception, e:
self.send_error(404, "An error occurred. %r" % e)
log_error("An error occurred while executing the SQL command. %r" % e)
finally:
c.close()
except Exception, e:
log_error("An error occurred while opening the database connection. %r" % e)
except IOError:
self.send_error(404, "File Not Found: %s" % url)
def do_POST(self):
try:
ctype, pdict = cgi.parse_header(self.headers.getheader("content-type"))
if ctype == "text/plain":
log_message("Header Items: %s" % self.headers.items())
# Get submitted values
values = ""
if self.headers.has_key('content-length'):
length = int( self.headers['content-length'] )
values = self.rfile.read(length)
# AppleWebKit uses & as separators between values
if self.headers.has_key('user-agent'):
if "AppleWebKit" in self.headers['user-agent']:
values = values.replace("&", "\n")
d = dict([(k,v) for k,junk,v in [line.partition("=") for line in values.split("\n")]])
search_string = d["search"].strip()
manual_type = d["manual_type"].strip()
log_message("Search started, search_string: %s, manual_type: %d" % (search_string, int(manual_type)))
self.send_response(301)
self.end_headers()
self.wfile.write(build_search_result_page(search_string, int(manual_type)).encode("utf-8"))
else:
self.send_error(404, "Wrong content-type" % ctype)
self.end_headers()
self.wfile.write("Wrong content-type
")
except Exception, e:
verbose_print(self.server.verbose, "An Exception was raised while processing the POST handler. %r" % e)
def log_message(self, message, *args):
verbose_print(self.server.verbose, message % args)
def log_request(self, code='-', size='-'):
verbose_print(self.server.verbose, '"%s" %s %s' % (self.requestline, str(code), str(size)))
def log_error(self, message, *args):
verbose_print(self.server.verbose, message % args)
def serve_docs(port = 8080, verbose = 1, datadir= "./", ready_event=None, bind=''):
global global_app_data_dir
global_app_data_dir = datadir
try:
try:
# Connect to the database
db_conn = open_lib_db()
try:
server = HTTPServer((bind, port), DocsLibHandler)
server.verbose = verbose
server.db_conn = db_conn
verbose_print(verbose, "Started HTTP server on port %d." % port)
if ready_event:
ready_event.set()
server.serve_forever()
finally:
db_conn.close();
except Exception, e:
log_error("An error occurred while opening the database connection. %r" % e)
raise e
except KeyboardInterrupt:
verbose_print(verbose, "Keyboard interrupt received, shutting down HTTP server.")
server.socket.close()
# --------------------------------
# Main application
def usage():
print """MySQL Document Library Standalone Application - mysqldoclib.py
Usage: wbdocs.py -h -pPort -v [build-lib | rebuild-lib | build-webui | rebuild-webui | serve-docs]
This applications serves and maintains a documentation library for MySQL products.
build-lib | rebuild-lib
These commands create the documentation library repository.
build-webui | rebuild-webui
These commands rebuild the webui repository that is used to server the web pages.
serve-docs
This argument launches a web server to server the documentation library"""
def main(argv):
try:
opts, args = getopt.getopt(sys.argv[1:], "hp:d:v", ["help", "port=", "db="])
except getopt.GetoptError, e:
print "Invalid option passed to module. ", str(e)
usage()
sys.exit(2)
verbose = False
port = 8080
db_name = ""
timestamp = ""
for o, a in opts:
if o == "-v":
verbose = True
elif o in ("-h", "--help"):
usage()
sys.exit()
elif o in ("-p", "--port"):
if a.isdigit():
port = int(a)
else:
assert False, "The specified port must be a number."
elif o in ("-d", "--db"):
db_name = a
else:
assert False, "Unhandled option."
if args:
if args[0] in ("build-lib", "rebuild-lib"):
rebuild_lib(verbose, db_name, args[1:])
if args[0] in ("build-webui", "rebuild-webui"):
rebuild_webui(verbose, db_name)
elif args[0] == "serve-docs":
serve_docs(port, verbose)
else:
usage()
if __name__ == "__main__":
main(sys.argv[1:])