blob: e5ebaea5e32a88098d53852bb86738886cbd46a0 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
#!/usr/bin/python -tt
import sys
import xml.dom.minidom
from xml.dom.minidom import Node
xml_files = sys.argv[1:]
if xml_files == []:
print "Usage: titlegrab.py xml_file..."
sys.exit (2)
for file in xml_files:
doc = xml.dom.minidom.parse(file)
title = ""
for node in doc.getElementsByTagName("articleinfo"):
T = node.getElementsByTagName("title")
for node2 in T:
for node3 in node2.childNodes:
if node3.nodeType == Node.TEXT_NODE:
title += node3.data
if title == "":
# Hmm, must not be an <article> then
for node in doc.getElementsByTagName("bookinfo"):
T = node.getElementsByTagName("title")
for node2 in T:
for node3 in node2.childNodes:
if node3.nodeType == Node.TEXT_NODE:
title += node3.data
print title
|