source: ipk/source.sh4/swapinfos_ncidclient/var/swap/extensions/NcidClient/reverselookup.py@ 7588

Last change on this file since 7588 was 7451, checked in by BPanther, 15 years ago

[ipk] - copy source->source.sh4

File size: 15.0 KB
Line 
1#!/usr/bin/python
2# -*- coding: UTF-8 -*-
3'''
4$Id$
5$Author$
6$Revision$
7$Date$
8$Modified: sreichholf
9'''
10
11import re, sys, os
12import htmlentitydefs
13from xml.dom.minidom import parse
14from twisted.web.client import getPage #@UnresolvedImport
15from twisted.internet import reactor #@UnresolvedImport
16from . import debug
17
18def html2unicode(in_html, charset):
19 # first convert some WML codes from hex: e.g. &#xE4 -> &#228
20 htmlentityhexnumbermask = re.compile('(&#x(..);)')
21 entities = htmlentityhexnumbermask.finditer(in_html)
22 for x in entities:
23 in_html = in_html.replace(x.group(1), '&#' + str(int(x.group(2), 16)) + ';')
24
25 htmlentitynamemask = re.compile('(&(\D{1,5}?);)')
26 entitydict = {}
27 entities = htmlentitynamemask.finditer(in_html)
28 for x in entities:
29 # debug("[Callhtml2utf8] mask: found %s" %repr(x.group(2)))
30 entitydict[x.group(1)] = x.group(2)
31 for key, name in entitydict.items():
32 try:
33 entitydict[key] = htmlentitydefs.name2codepoint[str(name)]
34 except KeyError:
35 debug("[Callhtml2utf8] KeyError " + key + "/" + name)
36
37 htmlentitynumbermask = re.compile('(&#(\d{1,5}?);)')
38 entities = htmlentitynumbermask.finditer(in_html)
39 for x in entities:
40 # debug("[Callhtml2utf8] number: found %s" %x.group(1))
41 entitydict[x.group(1)] = x.group(2)
42 for key, codepoint in entitydict.items():
43 try:
44 uml = unichr(int(codepoint))
45 debug("[nrzuname] html2utf8: replace %s with %s in %s" %(repr(key), repr(uml), repr(in_html[0:20]+'...')))
46 in_html = in_html.replace(key, uml)
47 except ValueError, e:
48 debug("[nrzuname] html2utf8: ValueError " + repr(key) + ":" + repr(codepoint) + " (" + str(e) + ")")
49 return in_html
50
51def normalizePhoneNumber(intNo):
52 found = re.match('^\+(.*)', intNo)
53 if found:
54 intNo = '00' + found.group(1)
55 intNo = intNo.replace('(', '').replace(')', '').replace(' ', '').replace('/', '').replace('-', '')
56 found = re.match('.*?([0-9]+)', intNo)
57 if found:
58 return found.group(1)
59 else:
60 return '0'
61
62def out(number, caller):
63 debug("[nrzuname] out: %s: %s" %(number, caller))
64 found = re.match("NA: ([^;]*);VN: ([^;]*);STR: ([^;]*);HNR: ([^;]*);PLZ: ([^;]*);ORT: ([^;]*)", caller)
65 if not found:
66 return
67 ( name, vorname, strasse, hnr, plz, ort ) = (found.group(1),
68 found.group(2),
69 found.group(3),
70 found.group(4),
71 found.group(5),
72 found.group(6)
73 )
74 if vorname:
75 name += ' ' + vorname
76 if strasse or hnr or plz or ort:
77 name += ', '
78 if strasse:
79 name += strasse
80 if hnr:
81 name += ' ' + hnr
82 if (strasse or hnr) and (plz or ort):
83 name += ', '
84 if plz and ort:
85 name += plz + ' ' + ort
86 elif plz:
87 name += plz
88 elif ort:
89 name += ort
90
91 print(name)
92
93def simpleout(number, caller): #@UnusedVariable # pylint: disable-msg=W0613
94 print caller
95
96try:
97 from Tools.Directories import resolveFilename, SCOPE_PLUGINS
98 reverseLookupFileName = resolveFilename(SCOPE_PLUGINS, "Extensions/NcidClient/reverselookup.xml")
99except ImportError:
100 reverseLookupFileName = "reverselookup.xml"
101
102countries = { }
103reverselookupMtime = 0
104
105class ReverseLookupAndNotify:
106 def __init__(self, number, notificationCallback=out, charset="cp1252", countrycode = "0049"):
107 debug("[ReverseLookupAndNotify] reverse Lookup for %s!" %number)
108 self.number = number
109 self.notificationCallback = notificationCallback
110 self.caller = ""
111 self.currentWebsite = None
112 self.nextWebsiteNo = 0
113#===============================================================================
114# sorry does not work at all
115# if not charset:
116# charset = sys.getdefaultencoding()
117# debug("[ReverseLookupAndNotify] set charset from system: %s!" %charset)
118#===============================================================================
119 self.charset = charset
120
121 global reverselookupMtime
122 reverselookupMtimeAct = os.stat(reverseLookupFileName)[8]
123 if not countries or reverselookupMtimeAct > reverselookupMtime:
124 debug("[ReverseLookupAndNotify] (Re-)Reading %s\n" %reverseLookupFileName)
125 reverselookupMtime = reverselookupMtimeAct
126 dom = parse(reverseLookupFileName)
127 for top in dom.getElementsByTagName("reverselookup"):
128 for country in top.getElementsByTagName("country"):
129 code = country.getAttribute("code").replace("+","00")
130 countries[code] = country.getElementsByTagName("website")
131
132 self.countrycode = countrycode
133
134 if re.match('^\+', self.number):
135 self.number = '00' + self.number[1:]
136
137 if self.number[:len(countrycode)] == countrycode:
138 self.number = '0' + self.number[len(countrycode):]
139
140 if number[0] != "0":
141 self.notifyAndReset()
142 return
143
144 if self.number[:2] == "00":
145 if countries.has_key(self.number[:3]): # e.g. USA
146 self.countrycode = self.number[:3]
147 elif countries.has_key(self.number[:4]):
148 self.countrycode = self.number[:4]
149 elif countries.has_key(self.number[:5]):
150 self.countrycode = self.number[:5]
151 else:
152 debug("[ReverseLookupAndNotify] Country cannot be reverse handled")
153 self.notifyAndReset()
154 return
155
156 if countries.has_key(self.countrycode):
157 debug("[ReverseLookupAndNotify] Found website for reverse lookup")
158 self.websites = countries[self.countrycode]
159 self.nextWebsiteNo = 1
160 self.handleWebsite(self.websites[0])
161 else:
162 debug("[ReverseLookupAndNotify] Country cannot be reverse handled")
163 self.notifyAndReset()
164 return
165
166 def handleWebsite(self, website):
167 debug("[ReverseLookupAndNotify] handleWebsite: " + website.getAttribute("name"))
168 if self.number[:2] == "00":
169 number = website.getAttribute("prefix") + self.number.replace(self.countrycode,"")
170 else:
171 number = self.number
172
173 url = website.getAttribute("url")
174 if re.search('$AREACODE', url) or re.search('$PFXAREACODE', url):
175 debug("[ReverseLookupAndNotify] handleWebsite: (PFX)ARECODE cannot be handled")
176 # self.caller = _("UNKNOWN")
177 self.notifyAndReset()
178 return
179 #
180 # Apparently, there is no attribute called (pfx)areacode anymore
181 # So, this below will not work.
182 #
183 if re.search('\\$AREACODE', url) and website.hasAttribute("areacode"):
184 areaCodeLen = int(website.getAttribute("areacode"))
185 url = url.replace("$AREACODE", number[:areaCodeLen]).replace("$NUMBER", number[areaCodeLen:])
186 elif re.search('\\$PFXAREACODE', url) and website.hasAttribute("pfxareacode"):
187 areaCodeLen = int(website.getAttribute("pfxareacode"))
188 url = url.replace("$PFXAREACODE","%(pfxareacode)s").replace("$NUMBER", "%(number)s")
189 url = url % { 'pfxareacode': number[:areaCodeLen], 'number': number[areaCodeLen:] }
190 elif re.search('\\$NUMBER', url):
191 url = url.replace("$NUMBER","%s") %number
192 else:
193 debug("[ReverseLookupAndNotify] handleWebsite: cannot handle websites with no $NUMBER in url")
194 # self.caller = _("UNKNOWN")
195 self.notifyAndReset()
196 return
197 debug("[ReverseLookupAndNotify] Url to query: " + url)
198 url = url.encode("UTF-8", "replace")
199 self.currentWebsite = website
200 getPage(url,
201 agent="Mozilla/5.0 (Windows; U; Windows NT 6.0; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
202 ).addCallback(self._gotPage).addErrback(self._gotError)
203
204
205 def _gotPage(self, page):
206 def cleanName(text):
207 item = text.replace("%20"," ").replace("&nbsp;"," ").replace("</b>","").replace(","," ").replace('\n',' ').replace('\t',' ')
208
209 item = html2unicode(item, self.charset)
210 #===================================================================
211 # try: # this works under Windows
212 # item = item.encode('iso-8859-1')
213 # except UnicodeEncodeError:
214 # debug("[ReverseLookupAndNotify] cleanName: encoding problem with iso8859")
215 # try: # this works under Enigma2
216 # item = item.encode('utf-8')
217 # except UnicodeEncodeError:
218 # debug("[ReverseLookupAndNotify] cleanName: encoding problem with utf-8")
219 # try: # fall back
220 # item = item.encode(self.charset)
221 # except UnicodeEncodeError:
222 # # debug("[ReverseLookupAndNotify] cleanName: " + traceback.format_exc())
223 # debug("[ReverseLookupAndNotify] cleanName: encoding problem")
224 #===================================================================
225
226 newitem = item.replace(" ", " ")
227 while newitem != item:
228 item = newitem
229 newitem = item.replace(" ", " ")
230 return newitem.strip()
231
232 debug("[ReverseLookupAndNotify] _gotPage")
233 found = re.match('.*<meta http-equiv="Content-Type" content="(?:application/xhtml\+xml|text/html); charset=([^"]+)" />', page, re.S)
234 if found:
235 debug("[ReverseLookupAndNotify] Charset: " + found.group(1))
236 page = page.replace("\xa0"," ").decode(found.group(1), "replace")
237 else:
238 debug("[ReverseLookupAndNotify] Default Charset: iso-8859-1")
239 page = page.replace("\xa0"," ").decode("ISO-8859-1", "replace")
240
241 for entry in self.currentWebsite.getElementsByTagName("entry"):
242 #
243 # for the sites delivering fuzzy matches, we check against the returned number
244 #
245 pat = self.getPattern(entry, "number")
246 if pat:
247 pat = ".*?" + pat
248 debug("[ReverseLookupAndNotify] _gotPage: look for number with '''%s'''" %( pat ))
249 found = re.match(pat, page, re.S|re.M)
250 if found:
251 if self.number[:2] == '00':
252 number = '0' + self.number[4:]
253 else:
254 number = self.number
255 if number != normalizePhoneNumber(found.group(1)):
256 debug("[ReverseLookupAndNotify] _gotPage: got unequal number '''%s''' for '''%s'''" %(found.group(1), self.number))
257 continue
258
259 # look for <firstname> and <lastname> match, if not there look for <name>, if not there break
260 name = ''
261 firstname = ''
262 street = ''
263 streetno = ''
264 city = ''
265 zipcode = ''
266 pat = self.getPattern(entry, "lastname")
267 if pat:
268 pat = ".*?" + pat
269 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "lastname", pat ))
270 found = re.match(pat, page, re.S|re.M)
271 if found:
272 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "lastname", found.group(1)))
273 name = cleanName(found.group(1))
274
275 pat = self.getPattern(entry, "firstname")
276 if pat:
277 pat = ".*?" + pat
278 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "firstname", pat ))
279 found = re.match(pat, page, re.S|re.M)
280 if found:
281 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "firstname", found.group(1)))
282 firstname = cleanName(found.group(1)).strip()
283
284 else:
285 pat = ".*?" + self.getPattern(entry, "name")
286 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "name", pat ))
287 found = re.match(pat, page, re.S|re.M)
288 if found:
289 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "name", found.group(1)))
290 item = cleanName(found.group(1))
291 # debug("[ReverseLookupAndNotify] _gotPage: name: " + item)
292 name = item.strip()
293 firstNameFirst = entry.getElementsByTagName('name')[0].getAttribute('swapFirstAndLastName')
294 # debug("[ReverseLookupAndNotify] _gotPage: swapFirstAndLastName: " + firstNameFirst)
295 if firstNameFirst == 'true': # that means, the name is of the form "firstname lastname"
296 found = re.match('(.*?)\s+(.*)', name)
297 if found:
298 firstname = found.group(1)
299 name = found.group(2)
300 else:
301 debug("[ReverseLookupAndNotify] _gotPage: no name found, skipping")
302 continue
303
304 if not name:
305 continue
306
307 pat = ".*?" + self.getPattern(entry, "city")
308 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "city", pat ))
309 found = re.match(pat, page, re.S|re.M)
310 if found:
311 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "city", found.group(1)))
312 item = cleanName(found.group(1))
313 debug("[ReverseLookupAndNotify] _gotPage: city: " + item)
314 city = item.strip()
315
316 if not city:
317 continue
318
319 pat = ".*?" + self.getPattern(entry, "zipcode")
320 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "zipcode", pat ))
321 found = re.match(pat, page, re.S|re.M)
322 if found and found.group(1):
323 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "zipcode", found.group(1)))
324 item = cleanName(found.group(1))
325 debug("[ReverseLookupAndNotify] _gotPage: zipcode: " + item)
326 zipcode = item.strip()
327
328 pat = ".*?" + self.getPattern(entry, "street")
329 debug("[ReverseLookupAndNotify] _gotPage: look for '''%s''' with '''%s'''" %( "street", pat ))
330 found = re.match(pat, page, re.S|re.M)
331 if found and found.group(1):
332 debug("[ReverseLookupAndNotify] _gotPage: found for '''%s''': '''%s'''" %( "street", found.group(1)))
333 item = cleanName(found.group(1))
334 debug("[ReverseLookupAndNotify] _gotPage: street: " + item)
335 street = item.strip()
336 streetno = ''
337 found = re.match("^(.+) ([-\d]+)$", street, re.S)
338 if found:
339 street = found.group(1)
340 streetno = found.group(2)
341 #===============================================================
342 # else:
343 # found = re.match("^(\d+) (.+)$", street, re.S)
344 # if found:
345 # street = found.group(2)
346 # streetno = found.group(1)
347 #===============================================================
348
349 self.caller = "NA: %s;VN: %s;STR: %s;HNR: %s;PLZ: %s;ORT: %s" % ( name, firstname, street, streetno, zipcode, city )
350 debug("[ReverseLookupAndNotify] _gotPage: Reverse lookup succeeded:\nName: %s" %(self.caller))
351
352 self.notifyAndReset()
353 return True
354 else:
355 self._gotError("[ReverseLookupAndNotify] _gotPage: Nothing found at %s" %self.currentWebsite.getAttribute("name"))
356 return False
357
358 def _gotError(self, error = ""):
359 debug("[ReverseLookupAndNotify] _gotError - Error: %s" %error)
360 if self.nextWebsiteNo >= len(self.websites):
361 debug("[ReverseLookupAndNotify] _gotError: I give up")
362 # self.caller = _("UNKNOWN")
363 self.notifyAndReset()
364 return
365 else:
366 debug("[ReverseLookupAndNotify] _gotError: try next website")
367 self.nextWebsiteNo = self.nextWebsiteNo+1
368 self.handleWebsite(self.websites[self.nextWebsiteNo-1])
369
370 def getPattern(self, website, which):
371 pat1 = website.getElementsByTagName(which)
372 if len(pat1) == 0:
373 return ''
374 else:
375 if len(pat1) > 1:
376 debug("[ReverseLookupAndNotify] getPattern: Something strange: more than one %s for website %s" %(which, website.getAttribute("name")))
377 return pat1[0].childNodes[0].data
378
379 def notifyAndReset(self):
380 debug("[ReverseLookupAndNotify] notifyAndReset: Number: " + self.number + "; Caller: " + self.caller)
381 # debug("1: " + repr(self.caller))
382 if self.caller:
383 try:
384 debug("2: " + repr(self.caller))
385 self.caller = self.caller.encode(self.charset, 'replace')
386 debug("3: " + repr(self.caller))
387 except UnicodeDecodeError:
388 debug("[ReverseLookupAndNotify] cannot encode?!?!")
389 # self.caller = unicode(self.caller)
390 # debug("4: " + repr(self.caller))
391 self.notificationCallback(self.number, self.caller)
392 else:
393 self.notificationCallback(self.number, "")
394 if __name__ == '__main__':
395 reactor.stop() #@UndefinedVariable # pylint: disable-msg=E1101
396
397if __name__ == '__main__':
398 cwd = os.path.dirname(sys.argv[0])
399 if (len(sys.argv) == 2):
400 # nrzuname.py Nummer
401 ReverseLookupAndNotify(sys.argv[1], simpleout)
402 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101
403 elif (len(sys.argv) == 3):
404 # nrzuname.py Nummer Charset
405 setDebug(False)
406 ReverseLookupAndNotify(sys.argv[1], out, sys.argv[2])
407 reactor.run() #@UndefinedVariable # pylint: disable-msg=E1101
Note: See TracBrowser for help on using the repository browser.