wiki2pdf/wiki2pdf.py
author Henning Fleddermann
Wed, 22 Jun 2011 13:19:54 +0200
changeset 33 96318c07f68f
parent 32 c67f84c77fa7
permissions -rwxr-xr-x
erge
     1 #!/usr/bin/env python
     2 # -*- coding: utf-8 -*-
     3 """
     4     wiki2pdf
     5     --------
     6     
     7     converts wiki rst pages to sphinx document structure
     8 
     9     :copyright: 2011 by Henning Fleddermann
    10     :license: GNU GPL, see COPYING for details.
    11 """
    12 import argparse
    13 import xmlrpclib
    14 import getpass
    15 import re
    16 import codecs
    17 import os
    18 import shutil
    19 
    20 INDEX = """.. include:: %(include_name)s.rst"""
    21 SPHINX_SOURCE_DIR = "./sphinx/source/"
    22 
    23 class WikiGetter(object):
    24     
    25     offset = 0
    26     
    27     def __init__(self, prefix="", name="", password="", wikiurl=""):
    28         self.prefix = prefix
    29         self.name = name
    30         self.password = password
    31         self.wikiurl = wikiurl
    32         self.offset = prefix.rfind("/") + 1
    33     
    34     def get_pagenames(self):
    35         mc = self.get_mc()
    36         opts = {"include_system": False, "include_underlay": False, "prefix": self.prefix}
    37         mc.getAllPagesEx(opts)
    38         result = mc()
    39         return tuple(result)[1]
    40     
    41     def get_pagenames_recursive(self, pagename):
    42         mc = self.get_mc()
    43         pages = []
    44         mc.getPage(pagename)
    45         try:
    46             result = tuple(mc())
    47         except xmlrpclib.Fault:
    48             return []
    49         pages.append(pagename)
    50         success = result[0]
    51         raw = result[1]
    52         if success:
    53             m = re.finditer(r"`(.*)<(.*)>`_", raw)
    54             for match in m:
    55                 if match.group(2)[0] == "/": # relative path
    56                     pages.extend(self.get_pagenames_recursive(pagename + match.group(2)))
    57                 else: # absolute path
    58                     pages.extend(self.get_pagenames_recursive(match.group(2)))
    59         return pages
    60 
    61     def get_mc(self):
    62         homewiki = xmlrpclib.ServerProxy(self.wikiurl + "?action=xmlrpc2", allow_none=True)
    63         auth_token = homewiki.getAuthToken(self.name, self.password)
    64         mc = xmlrpclib.MultiCall(homewiki)
    65         mc.applyAuthToken(auth_token)
    66         return mc
    67     
    68     def make_path(self, file):
    69         d = os.path.dirname(file)
    70         try:
    71             os.makedirs(d)
    72         except OSError:
    73             pass
    74     
    75     def parse_page(self, text):
    76         pattern = re.compile(r"^##links2image (.*)$", re.M)
    77         m = pattern.search(text)
    78         if m:
    79             img_pattern = re.compile("`<(%s)>`_" % m.group(1))
    80             img_m = img_pattern.search(text)
    81             while img_m:
    82                 text = "%s.. figure:: %s.pdf%s" % (text[:img_m.start()], img_m.group(1), text[img_m.end():])
    83                 img_m = img_pattern.search(text)
    84         pattern = re.compile(r"^#.*$\n?", re.M)
    85         m = pattern.search(text)
    86         while m:
    87             text = "%s%s" % (text[:m.start()], text[m.end():])
    88             m = pattern.search(text)
    89         return text
    90     
    91     def replace_links(self, pagename, text):
    92         pattern = re.compile("`(.*)<(.*)>`_") # ToDo: fix links like `</CvMartinKaufmann>`_ 
    93         offset = pagename.rfind("/") + 1
    94         m = pattern.search(text)
    95         while m:
    96             if m.group(2)[0]=="/": # relativer Pfad
    97                 text = "%s.. include:: %s.rst%s" % (text[:m.start()], pagename[offset:]+m.group(2), text[m.end():])
    98             else: # absoluter Pfad -> uebersetzen in relativen
    99                 text = "%s.. include:: %s.rst%s" % (text[:m.start()], m.group(2)[offset:], text[m.end():])
   100             m = pattern.search(text)
   101         return text
   102     
   103     def replace_img_links(self, text):
   104         matches = re.finditer(".. (figure|image):: (\S*)", text)
   105         for m in matches:
   106             filename = self.workaround_sphinx_filenames(m.group(2))
   107             old_filename = filename
   108             filename = filename.rpartition("/")[2] # Haaack
   109             filename = filename + " "*(len(old_filename) - len(filename))
   110             filename_parts = filename.rpartition(".")
   111             if os.path.exists(os.path.join(SPHINX_SOURCE_DIR, filename_parts[0] + ".pdf")): # always prefer pdfs if they exist
   112                 filename = filename_parts[0] + ".pdf"
   113                 filename = filename + " "*(len(old_filename) - len(filename))
   114             elif os.path.exists(os.path.join(SPHINX_SOURCE_DIR, filename.rstrip())):
   115                 pass
   116             else:
   117                 file_ending = filename_parts[2].rstrip()
   118                 if file_ending == "pdf":
   119                     dummy = "placeholders/dummy.pdf"
   120                 elif file_ending == "svg":
   121                     dummy = "placeholders/dummy.svg"
   122                 else:
   123                     dummy = "placeholders/dummy.png"
   124                 self.make_path(os.path.join(SPHINX_SOURCE_DIR, filename.rstrip()))
   125                 shutil.copy(dummy, os.path.join(SPHINX_SOURCE_DIR, filename.rstrip()))
   126             text = "%s.. %s:: %s%s" % (text[:m.start()], m.group(1), filename, text[m.end():])
   127         return text
   128 
   129     def workaround_sphinx_filenames(self, filename):
   130         parts = list(filename.rpartition("."))
   131         parts[0] = parts[0].replace(".","_")
   132         return "".join(parts)
   133     
   134     def get_attachments(self, pagename):
   135         mc = self.get_mc()
   136         mc.listAttachments(pagename)
   137         result = tuple(mc())
   138         if result[0]:
   139             for attachment in result[1]:
   140                 self.get_attachment(pagename, attachment)
   141     
   142     def get_attachment(self, pagename, attachment):
   143         mc = self.get_mc()
   144         mc.getAttachment(pagename, attachment)
   145         result = tuple(mc())
   146         if result[0]:
   147             fid = open(os.path.join(SPHINX_SOURCE_DIR, self.workaround_sphinx_filenames(attachment)), "wb") # WARNING: attachments have to be in the root-source directory. this might lead to file-name collision.
   148             fid.write(result[1].data)
   149             fid.close()
   150     
   151     def get_and_write_page(self, pagename):
   152         mc = self.get_mc()
   153         mc.getPage(pagename)
   154         result = tuple(mc())
   155         if result[0]:
   156             text = result[1]
   157             self.make_path(os.path.join(SPHINX_SOURCE_DIR, pagename[self.offset:]))
   158             f = codecs.open(os.path.join(SPHINX_SOURCE_DIR, pagename[self.offset:] + ".rst"), "wb", encoding="utf-8") # append .rst to real prefix to avoid file/directory-name collision. haaack :)
   159             text = self.parse_page(text)
   160             text = self.replace_links(pagename, text)
   161             text = self.replace_img_links(text)
   162             f.write(text)
   163             f.close()
   164 
   165 def wiki2pdf(name="", password="", wikiurl="", prefix="", recursive=False):
   166     wikigetter = WikiGetter(prefix[0], name, password, wikiurl)
   167     if recursive:
   168         pages = wikigetter.get_pagenames_recursive(prefix[0])
   169     else:
   170         pages = wikigetter.get_pagenames()
   171     pages.extend(prefix)
   172     for page in pages:
   173         print page
   174         wikigetter.get_attachments(page)
   175         wikigetter.get_and_write_page(page)
   176     include_name = prefix[0].split('/')[-1]
   177     index_rst = INDEX % {"include_name": include_name}
   178     fid = codecs.open(os.path.join(SPHINX_SOURCE_DIR, "index.rst"), 'wb')
   179     fid.write(index_rst)
   180     fid.close()
   181 
   182 if __name__ == "__main__":
   183     parser = argparse.ArgumentParser(description='send files to a wiki')
   184     parser.add_argument('-w', '--wikiurl', dest="wikiurl", type=str, default="http://localhost:8080", required=True,
   185                        help='url of the wiki to send to')
   186     parser.add_argument('-p', '--prefix', dest='prefix', required=True, nargs="+",
   187                        help='space-delimited list of the wiki pages to get. the first one determines the document-structure')
   188     parser.add_argument('-u', '--username', dest='username', required=True,
   189                        help='name of the user in that wiki')
   190     parser.add_argument('-r', '--recursive', dest='recursive', action='store_true', required=False,
   191                        help='use recursive algorithm to download only the needed pages. warning: might get stuck on back-links')
   192 
   193     args = parser.parse_args()
   194     password = getpass.getpass("password: ")
   195     wiki2pdf(name=args.username, password=password, wikiurl=args.wikiurl, prefix=args.prefix, recursive=args.recursive)
Impressum Datenschutzerklärung