#!/usr/bin/env python

# From the original:
#
#       http://nxsy.org/exporting-from-confluence-part-1
#
# Modified to work with Python 2.5 and confluence 2.10.
#
# Saint Aardvark the Carpeted
# aardvark ta saintaardvarkthecarpeted dot com
# Oct 15, 2009

from xml.etree.cElementTree import iterparse
from cStringIO import StringIO
import codecs

pages = {}

for event, elem in iterparse(file("entities.xml")):
    if elem.tag == "object" and elem.get('class') == 'Page':
        save = True
        title = None
        content = None
        children = elem.getchildren()
        id = elem.find('id')
        for child in children:
            if child.tag == "property" and child.get('name') == "title":
                title = child.text

            if child.tag == "property" and child.get('name') == "content":
                content = child.text

            if child.tag == "property" and child.get('name') == "BodyContent":
                content = child.text

            if child.tag == "property" and child.get('name') == "originalVersion":
                save = False
                orig_id = child.getchildren()[0]

            if child.tag == "collection" and child.get('name') == "bodyContents":
                grandchildren = child.getchildren()
                for grandkid in grandchildren:
                    if grandkid.tag == "element" and grandkid.get('class') == "BodyContent":
                        content_node = grandkid.getchildren()[0].text

            # else:
            #     print "Skipping %s" % child.get('name')

        if not save:
            continue

        pages[content_node] = title
        print "Will save page with title '%s'" % (title,)

        if not content:
            print "... but has no contents"
            continue

        f = codecs.open('pages/%s' % (title,), 'w', 'utf-8')
        f.write(content)

for event, elem in iterparse(file("entities.xml")):
    if elem.tag == "object" and elem.get('class') == 'BodyContent':
        id = elem.find("id")
        if id.text in pages:
            content = elem.find("property").text
            f = codecs.open('pages/%s' % (pages[id.text],), 'w', 'utf-8')
            f.write(content)
