| Home | Trees | Indices | Help |
|
|---|
|
|
1 """This module encapsulates a document description stored in an XML file.
2
3 This is mainly used by GNUmed/Archive.
4
5 @copyright: GPL v2 or later
6 """
7 #============================================================
8 # $Source: /home/ncq/Projekte/cvs2git/vcs-mirror/gnumed/gnumed/client/business/gmXmlDocDesc.py,v $
9 # $Id: gmXmlDocDesc.py,v 1.6 2008-02-25 17:31:41 ncq Exp $
10 __version__ = "$Revision: 1.6 $"
11 __author__ = "Karsten Hilbert <Karsten.Hilbert@gmx.net>"
12
13 import sys, os.path, fileinput, types, string, logging
14
15
16 _log = logging.getLogger('gm.docs')
17 _log.info(__version__)
18 #============================================================
20 # handlers for __getitem__()
21 _get_handler = {}
22 #--------------------------------------------------------
24 # sanity checks
25 if aBaseDir is None:
26 raise ConstructorError, "need document path"
27 if not os.path.exists(os.path.abspath(aBaseDir)):
28 raise ConstructorError, "document path [%s] does not exist" % aBaseDir
29 self.__base_dir = aBaseDir
30 _log.debug("working from directory [%s]" % self.__base_dir)
31
32 if aCfg is None:
33 _log.warning('no config file specified')
34 import gmCfg
35 self.__cfg = gmCfg.gmDefCfgFile
36 else:
37 self.__cfg = aCfg
38
39 self.__group = str(aGroup)
40
41 tmp = self.__cfg.get(self.__group, "description")
42 self.__xml_file = os.path.join(self.__base_dir, tmp)
43 if not os.path.exists(self.__xml_file):
44 raise ConstructorError, "skipping [%s]: description file [%s] not found" % (self.__base_dir, tmp)
45
46 self.__data = {}
47
48 # if not self.__load_from_xml():
49 # raise ConstructorError, "XML file [%s] cannot be parsed correctly" % anXmlFile
50
51 return None
52 #--------------------------------------------------------
54 """Load document metadata from XML file.
55 """
56 # document type
57 tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "type_tag"), anXMLfile = self.__xml_file)
58 if tmp is None:
59 _log.error("cannot load document type.")
60 return None
61 else:
62 self.__data['type'] = string.join(tmp)
63 # document comment
64 tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "comment_tag"), anXMLfile = self.__xml_file)
65 if tmp is None:
66 _log.error("cannot load document comment")
67 return None
68 else:
69 self.__data['comment'] = string.join(tmp)
70 # document reference date
71 tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "date_tag"), anXMLfile = self.__xml_file)
72 if tmp is None:
73 _log.error("cannot load document reference date.")
74 return None
75 else:
76 self.__data['date'] = string.join(tmp)
77 # external reference string
78 tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "ref_tag"), anXMLfile = self.__xml_file)
79 if tmp is None:
80 _log.error("cannot load document reference string.")
81 return None
82 else:
83 self.__data['reference'] = string.join(tmp)
84 # document description
85 tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "aux_comment_tag"), anXMLfile = self.__xml_file)
86 if tmp is None:
87 _log.error("cannot load long document description.")
88 else:
89 self.__data['description'] = string.join(tmp)
90 # list of data files
91 # if not self.__read_img_list(self.__xml_file, aBaseDir, self.__group):
92 # _log.error("Cannot retrieve list of document data files.")
93 # return None
94
95 _log.debug("long document description: " + str(self.__data['description']))
96 _log.debug("document reference string: " + str(self.__data['reference']))
97 _log.debug("document reference date: " + str(self.__data['date']))
98 _log.debug("Document comment: " + str(self.__data['comment']))
99 _log.debug("Document type: " + str(self.__data['type']))
100
101 return 1
102 #--------------------------------------------------------
103 # attribute access
104 #--------------------------------------------------------
106 try:
107 return self.__data[item]
108 except KeyError:
109 try:
110 return xmlDocDesc._get_handler[item](self)
111 except KeyError:
112 _log.exception('[%s] neither cached in self.__data nor get handler available' % item, sys.exc_info())
113 return None
114 #--------------------------------------------------------
116 try:
117 return self.__data['objects']
118 except KeyError:
119 self.__load_obj_list()
120 return self.__data['objects']
121 return None
122 #--------------------------------------------------------
123 _get_handler['objects'] = _get_obj_list
124 #--------------------------------------------------------
126 """Read list of image files from XML metadata file.
127
128 We assume the order of file names to correspond to the sequence of pages.
129 - don't use self.__get_from_xml, because we want to
130 scan lines sequentially here
131 """
132 self.__data['objects'] = {}
133 tag_name = self.__cfg.get(self.__group, "obj_tag")
134 # now scan the xml file
135 idx = 0
136 for line in fileinput.input(self.__xml_file):
137 content = self.__extract_xml_content(line, tag_name)
138 if content is None:
139 continue
140 idx += 1
141 tmp = {}
142 tmp['file name'] = os.path.abspath(os.path.join(self.__base_dir, content))
143 # this 'index' defines the order of objects in the document
144 tmp['index'] = idx
145 # we must use imaginary oid's since we are reading from a file,
146 # this OID defines the object ID in the data store, this
147 # has nothing to do with the semantic order of objects
148 self.__data['objects'][idx] = tmp
149
150 # cleanup
151 fileinput.close()
152
153 if idx == 0:
154 _log.warning("no files found for import")
155 return None
156
157 _log.debug("document data files to be processed: %s" % self.__data['objects'])
158
159 return 1
160 #--------------------------------------------------------
161 # public methods
162 #--------------------------------------------------------
167 #--------------------------------------------------------
169 # sanity
170 if not type(aTag) is types.StringType:
171 _log.error("Argument aTag (" + str(aTag) + ") is not a string.")
172 return None
173
174 TagStart = "<" + aTag + ">"
175 TagEnd = "</" + aTag + ">"
176
177 _log.info("Retrieving " + TagStart + "content" + TagEnd + ".")
178
179 inTag = 0
180 content = []
181
182 for line in fileinput.input(self.__xml_file):
183 tmp = line
184
185 # this line starts a description
186 if string.find(tmp, TagStart) != -1:
187 inTag = 1
188 # strip junk left of <tag>
189 (junk, good_stuff) = string.split (tmp, TagStart, 1)
190 _log.debug("Found tag start in line: junk='%s' content='%s'" % (junk, good_stuff))
191 tmp = good_stuff
192
193 # this line ends a description
194 if string.find(tmp, TagEnd) != -1:
195 # only if tag start has been found already
196 if inTag == 1:
197 # strip junk right of </tag>
198 (good_stuff, junk) = string.split (tmp, TagEnd, 1)
199 _log.debug("Found tag end in line: junk='%s' content='%s'" % (junk, good_stuff))
200 content.append(good_stuff)
201 # shortcut out of for loop
202 break
203
204 # might be in-tag data line or line with start tag only
205 if inTag == 1:
206 content.append(tmp)
207
208 # cleanup
209 fileinput.close()
210
211 # looped over all lines
212 if len(content) > 0:
213 _log.debug("%s tag content successfully read: %s" % (TagStart, str(content)))
214 return content
215 else:
216 return None
217
218 #--------------------------------------------------------
220 # is this a line we care about ?
221 start_tag_pos = string.find(aLine,'<%s' % aTag)
222 if start_tag_pos == -1:
223 return None
224 # yes, so check for closing tag
225 end_tag_pos = string.find(aLine, '</%s>' % aTag)
226 if end_tag_pos == -1:
227 # but we don't do multiline tags
228 _log.error("Line [%s] is incomplete for tag [%s]. We don't do multiline tags here." % (aLine, aTag))
229 return None
230 # actually extract content
231 content_start = string.find(aLine,'>', start_tag_pos, end_tag_pos) + 1
232 return aLine[content_start:end_tag_pos]
233 #============================================================
234 # main
235 #------------------------------------------------------------
236
237 #============================================================
238 # $Log: gmXmlDocDesc.py,v $
239 # Revision 1.6 2008-02-25 17:31:41 ncq
240 # - logging cleanup
241 #
242 # Revision 1.5 2008/01/30 13:34:50 ncq
243 # - switch to std lib logging
244 #
245 # Revision 1.4 2004/03/19 17:07:20 shilbert
246 # - import statement fixed
247 #
248 # Revision 1.3 2004/02/25 09:46:20 ncq
249 # - import from pycommon now, not python-common
250 #
251 # Revision 1.2 2003/11/17 10:56:35 sjtan
252 #
253 # synced and commiting.
254 #
255 # Revision 1.1 2003/10/23 06:02:38 sjtan
256 #
257 # manual edit areas modelled after r.terry's specs.
258 #
259 # Revision 1.1 2003/04/20 15:42:27 ncq
260 # - first version
261 #
262
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Sat Oct 5 03:57:02 2013 | http://epydoc.sourceforge.net |