From b2a459b66d1a49d74d8dcafc89497aa676fd1b5a Mon Sep 17 00:00:00 2001 From: Charles Connell Date: Sat, 21 Dec 2013 16:12:30 -0500 Subject: [PATCH] The process to get pdf2htmlEX installed on beta --- fabfile.py | 32 ++++++++++++++++++++++++++++++++ karmaworld/apps/notes/gdrive.py | 4 +++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/fabfile.py b/fabfile.py index 6d9bd8f..f903dc5 100644 --- a/fabfile.py +++ b/fabfile.py @@ -280,6 +280,38 @@ def import_usde(): virtenv_exec('{0}/manage.py import_usde_csv {1}'.format(env.code_root, env.usde_csv)) virtenv_exec('{0}/manage.py sanitize_usde_schools'.format(env.code_root)) +@task +def install_pdf2htmlEX(): + """ + # Some things we need: + sudo apt-get install cmake libpng-dev libjpeg-dev libgtk2.0-dev pkg-config libfontconfig1-dev autoconf libtool + + # Ubuntu 12.04 comes with a version of poppler that is too + # old, so compile our own + wget http://poppler.freedesktop.org/poppler-0.24.4.tar.xz + tar xf poppler-0.24.4.tar.gz + ./configure --prefix=/usr --enable-xpdf-headers + make + sudo make install + + # Ubuntu 12.04 comes with a version of fontforge that is too + # old, so compile our own + git clone https://github.com/fontforge/fontforge.git + ./autogen.sh + ./configure --prefix=/usr + make + sudo make install + + # Compile pdf2htmlEX + wget https://github.com/coolwanglu/pdf2htmlEX/archive/v0.10.tar.gz + tar xf x0.10.tar.gz + cd pdf2htmlEX + cmake . + make + sudo make install + """ + print "not implemented yet!" + @task def first_deploy(): """ diff --git a/karmaworld/apps/notes/gdrive.py b/karmaworld/apps/notes/gdrive.py index ec45009..a58b743 100644 --- a/karmaworld/apps/notes/gdrive.py +++ b/karmaworld/apps/notes/gdrive.py @@ -74,7 +74,7 @@ def pdf2html(content): pdf_file.flush() tmp_dir = tempfile.gettempdir() html_file_name = uuid.uuid4().hex - html_file_path = tmp_dir + os.sep + html_file_name + html_file_path = os.path.join(tmp_dir, html_file_name) command = ['pdf2htmlEX', pdf_file.name, html_file_name] call = subprocess.Popen(command, shell=False, cwd=tmp_dir) @@ -82,6 +82,8 @@ def pdf2html(content): if call.returncode != 0: raise ValueError("PDF file could not be processed") + pdf_file.close() + try: html_file = open(html_file_path, 'r') html = html_file.read() -- 2.25.1