packaging/titlegrab.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

#!/usr/bin/python -tt
import sys
import xml.dom.minidom
from xml.dom.minidom import Node


xml_files = sys.argv[1:]

if xml_files == []:
    print "Usage: titlegrab.py xml_file..."
    sys.exit (2)

for file in xml_files:
    doc = xml.dom.minidom.parse(file)

    title = ""
    for node in doc.getElementsByTagName("articleinfo"):
        T = node.getElementsByTagName("title")
        for node2 in T:
            for node3 in node2.childNodes:
                if node3.nodeType == Node.TEXT_NODE:
                    title += node3.data
    if title == "":
        # Hmm, must not be an <article> then
        for node in doc.getElementsByTagName("bookinfo"):
            T = node.getElementsByTagName("title")
            for node2 in T:
                for node3 in node2.childNodes:
                    if node3.nodeType == Node.TEXT_NODE:
                        title += node3.data
    print title