| 1 | #!/usr/bin/python
|
|---|
| 2 | # mediaprem.py by Ambrosa http://www.ambrosa.net
|
|---|
| 3 | # this module is used for download EPG data from Mediaset website
|
|---|
| 4 | # derived from E2_LOADEPG
|
|---|
| 5 |
|
|---|
| 6 | __author__ = "ambrosa http://www.ambrosa.net"
|
|---|
| 7 | __copyright__ = "Copyright (C) 2008-2011 Alessandro Ambrosini"
|
|---|
| 8 | __license__ = "CreativeCommons by-nc-sa http://creativecommons.org/licenses/by-nc-sa/3.0/"
|
|---|
| 9 |
|
|---|
| 10 | import gc
|
|---|
| 11 | import os
|
|---|
| 12 | import sys
|
|---|
| 13 | import time
|
|---|
| 14 | import codecs
|
|---|
| 15 | import socket
|
|---|
| 16 | import urllib
|
|---|
| 17 | import urllib2
|
|---|
| 18 | import ConfigParser
|
|---|
| 19 | from xml.dom import minidom
|
|---|
| 20 |
|
|---|
| 21 | # import CrossEPG functions
|
|---|
| 22 | import crossepg
|
|---|
| 23 |
|
|---|
| 24 | # location of local python modules under "scripts/lib" dir.
|
|---|
| 25 | # add it to sys.path()
|
|---|
| 26 | crossepg_instroot = crossepg.epgdb_get_installroot()
|
|---|
| 27 | if crossepg_instroot == False:
|
|---|
| 28 | sys.exit(1)
|
|---|
| 29 | libdir = os.path.join(crossepg_instroot , 'scripts/lib')
|
|---|
| 30 | sys.path.append(libdir)
|
|---|
| 31 |
|
|---|
| 32 | # import local modules
|
|---|
| 33 | import sgmllib
|
|---|
| 34 | import scriptlib
|
|---|
| 35 |
|
|---|
| 36 | # =================================================================
|
|---|
| 37 | # HTML PARSER
|
|---|
| 38 |
|
|---|
| 39 |
|
|---|
| 40 | class Description_parser(sgmllib.SGMLParser):
|
|---|
| 41 | def parse(self, s):
|
|---|
| 42 | self.feed(s)
|
|---|
| 43 | self.close()
|
|---|
| 44 |
|
|---|
| 45 | def __init__(self, verbose=0):
|
|---|
| 46 | sgmllib.SGMLParser.__init__(self, verbose)
|
|---|
| 47 | self.start_div_box = False
|
|---|
| 48 | self.start_div_boxtxt = False
|
|---|
| 49 | self.description = ''
|
|---|
| 50 |
|
|---|
| 51 |
|
|---|
| 52 | def start_div(self, attributes):
|
|---|
| 53 | for name, value in attributes:
|
|---|
| 54 | if name == "class":
|
|---|
| 55 | if value == "box_Text":
|
|---|
| 56 | self.start_div_box = True
|
|---|
| 57 | elif value == "txtBox_cms":
|
|---|
| 58 | self.start_div_boxtxt = True
|
|---|
| 59 |
|
|---|
| 60 | def end_div(self):
|
|---|
| 61 | if self.start_div_boxtxt == True:
|
|---|
| 62 | self.start_div_box = False
|
|---|
| 63 | self.start_div_boxtxt = False
|
|---|
| 64 |
|
|---|
| 65 |
|
|---|
| 66 | def handle_data(self, data):
|
|---|
| 67 | if self.start_div_boxtxt == True:
|
|---|
| 68 | self.description += data.decode('iso-8859-1')
|
|---|
| 69 |
|
|---|
| 70 | def get_descr(self):
|
|---|
| 71 | return (self.description.strip(' \n\r') )
|
|---|
| 72 |
|
|---|
| 73 | # =================================================================
|
|---|
| 74 |
|
|---|
| 75 |
|
|---|
| 76 | class main:
|
|---|
| 77 |
|
|---|
| 78 | # main config file
|
|---|
| 79 | CONF_CONFIGFILENAME = "mediaprem.conf"
|
|---|
| 80 |
|
|---|
| 81 | # Network socket timeout (in seconds)
|
|---|
| 82 | CONF_SOCKET_TIMEOUT = 20
|
|---|
| 83 |
|
|---|
| 84 | # log text
|
|---|
| 85 | CONF_LOG_SCRIPT_NAME = "MediasetPremium (Italy)"
|
|---|
| 86 | CONF_LOG_PREFIX = ""
|
|---|
| 87 |
|
|---|
| 88 | # max chars in description
|
|---|
| 89 | CONF_DLDESCMAXCHAR = 250
|
|---|
| 90 |
|
|---|
| 91 | # retry number if HTTP error
|
|---|
| 92 | HTTP_ERROR_RETRY = 3
|
|---|
| 93 | # seconds to wait between retries
|
|---|
| 94 | HTTP_ERROR_WAIT_RETRY = 5
|
|---|
| 95 |
|
|---|
| 96 | # charset used in remote website epg data
|
|---|
| 97 | REMOTE_EPG_CHARSET = 'utf-8'
|
|---|
| 98 |
|
|---|
| 99 | TODAYMP = ''
|
|---|
| 100 | DAYCACHEMP = []
|
|---|
| 101 | FIELD_SEPARATOR = '###'
|
|---|
| 102 | CHANNELLIST = {}
|
|---|
| 103 |
|
|---|
| 104 |
|
|---|
| 105 | def log(self,s,video=0):
|
|---|
| 106 | self.logging.log(self.CONF_LOG_PREFIX + str(s))
|
|---|
| 107 | if video == 1:
|
|---|
| 108 | self.log2video(str(s))
|
|---|
| 109 |
|
|---|
| 110 | def log2video(self,s):
|
|---|
| 111 | self.logging.log2video_status(str(s))
|
|---|
| 112 |
|
|---|
| 113 | def convert_daymp(self,dmp):
|
|---|
| 114 | daystandard = time.strftime("%Y%m%d",time.strptime(dmp,"%Y/%m/%d"))
|
|---|
| 115 | return daystandard
|
|---|
| 116 |
|
|---|
| 117 |
|
|---|
| 118 | def get_description(self,url):
|
|---|
| 119 |
|
|---|
| 120 | if url[:7] != 'http://':
|
|---|
| 121 | return('')
|
|---|
| 122 |
|
|---|
| 123 | if (url[-5:] != '.html') and (url[-4:] != '.htm') :
|
|---|
| 124 | return('')
|
|---|
| 125 |
|
|---|
| 126 | self.log(" downloading description \'" + url + "\'")
|
|---|
| 127 | url = str(urllib.quote(url,safe=":/"))
|
|---|
| 128 |
|
|---|
| 129 | try:
|
|---|
| 130 | sock = urllib2.urlopen(url)
|
|---|
| 131 | data = sock.read()
|
|---|
| 132 | except IOError, e:
|
|---|
| 133 | serr = "unknown"
|
|---|
| 134 | if hasattr(e, 'reason'):
|
|---|
| 135 | serr = str(e.reason)
|
|---|
| 136 | elif hasattr(e, 'code'):
|
|---|
| 137 | serr = str(e.code)
|
|---|
| 138 | if hasattr(e, 'msg'):
|
|---|
| 139 | serr += " , " + str(e.msg)
|
|---|
| 140 |
|
|---|
| 141 | self.log(url + " error, reason: " + serr + ". Skip it.")
|
|---|
| 142 | return('')
|
|---|
| 143 |
|
|---|
| 144 | else:
|
|---|
| 145 | sock.close()
|
|---|
| 146 | dsparser = Description_parser()
|
|---|
| 147 | dsparser.parse(data)
|
|---|
| 148 | return(dsparser.get_descr())
|
|---|
| 149 |
|
|---|
| 150 | return('')
|
|---|
| 151 |
|
|---|
| 152 |
|
|---|
| 153 |
|
|---|
| 154 | def __init__(self,confdir,dbroot):
|
|---|
| 155 |
|
|---|
| 156 | # initialize logging
|
|---|
| 157 | self.logging = scriptlib.logging_class()
|
|---|
| 158 | # write to video OSD the script name
|
|---|
| 159 | self.logging.log2video_scriptname(self.CONF_LOG_SCRIPT_NAME)
|
|---|
| 160 |
|
|---|
| 161 |
|
|---|
| 162 | # check swap memory available
|
|---|
| 163 | osp = os.popen('free | awk \'/Swap/ { print $2 }\'','r')
|
|---|
| 164 | ret = osp.readlines()
|
|---|
| 165 | if len(ret) > 0:
|
|---|
| 166 | try:
|
|---|
| 167 | m = int(ret[0])/1024
|
|---|
| 168 | except:
|
|---|
| 169 | self.log("Error get SWAP value, abort",1)
|
|---|
| 170 | time.sleep(10)
|
|---|
| 171 | sys.exit(1)
|
|---|
| 172 |
|
|---|
| 173 | if m < 60:
|
|---|
| 174 | self.log("SWAP Not Enabled (<60MB), abort",1)
|
|---|
| 175 | time.sleep(10)
|
|---|
| 176 | sys.exit(1)
|
|---|
| 177 | else:
|
|---|
| 178 | self.log("Error get SWAP value, abort",1)
|
|---|
| 179 | time.sleep(10)
|
|---|
| 180 | sys.exit(1)
|
|---|
| 181 |
|
|---|
| 182 | osp.close()
|
|---|
| 183 |
|
|---|
| 184 |
|
|---|
| 185 | CONF_FILE = os.path.join(confdir,self.CONF_CONFIGFILENAME)
|
|---|
| 186 | if not os.path.exists(CONF_FILE) :
|
|---|
| 187 | self.log("ERROR: %s not present" % CONF_FILE,1)
|
|---|
| 188 | sys.exit(1)
|
|---|
| 189 |
|
|---|
| 190 | config = ConfigParser.ConfigParser()
|
|---|
| 191 | #config.optionxform = str # needed to return case sensitive index
|
|---|
| 192 | config.read(CONF_FILE)
|
|---|
| 193 |
|
|---|
| 194 | # reading [global] section options
|
|---|
| 195 | self.CONF_DEFAULT_PROVIDER = config.get("global","DEFAULT_PROVIDER")
|
|---|
| 196 | # save cache under dbroot
|
|---|
| 197 | self.CONF_CACHEDIR = os.path.join(dbroot,config.get("global","CACHE_DIRNAME"))
|
|---|
| 198 |
|
|---|
| 199 | self.CONF_DL_DESC = config.getint("global","DL_DESC")
|
|---|
| 200 | self.CONF_MAX_DAY_EPG = config.getint("global","MAX_DAY_EPG")
|
|---|
| 201 | self.CONF_URL = config.get("global","URL")
|
|---|
| 202 |
|
|---|
| 203 | self.CONF_GMT_ZONE = config.get("global","GMT_ZONE")
|
|---|
| 204 | if self.CONF_GMT_ZONE.strip(' ').lower() == 'equal':
|
|---|
| 205 | #self.DELTA_UTC = -scriptlib.delta_utc() # return negative if timezone is east of GMT (like Italy), invert sign
|
|---|
| 206 | self.DELTA_UTC = 0
|
|---|
| 207 | else:
|
|---|
| 208 | self.DELTA_UTC = float(self.CONF_GMT_ZONE)*3600.0
|
|---|
| 209 | if self.DELTA_UTC >= 0:
|
|---|
| 210 | self.DELTA_UTC = self.DELTA_UTC + scriptlib.delta_dst()
|
|---|
| 211 | else:
|
|---|
| 212 | self.DELTA_UTC = self.DELTA_UTC - scriptlib.delta_dst()
|
|---|
| 213 |
|
|---|
| 214 | self.DELTA_UTC = int(self.DELTA_UTC)
|
|---|
| 215 | #self.log("Website timezone - UTC = %d seconds" % self.DELTA_UTC)
|
|---|
| 216 |
|
|---|
| 217 | if not os.path.exists(self.CONF_CACHEDIR):
|
|---|
| 218 | self.log("Creating \'%s\' directory for caching" % self.CONF_CACHEDIR)
|
|---|
| 219 | os.mkdir(self.CONF_CACHEDIR)
|
|---|
| 220 |
|
|---|
| 221 | # reading [channels] section
|
|---|
| 222 | temp = config.items("channels");
|
|---|
| 223 |
|
|---|
| 224 | # create a dictionary (Python array) with index = channel ID
|
|---|
| 225 | for i in temp:
|
|---|
| 226 | self.CHANNELLIST[i[0].strip(' \n\r').lower()] = unicode(i[1].strip(' \n\r').lower(),'utf-8')
|
|---|
| 227 |
|
|---|
| 228 | if len(self.CHANNELLIST) == 0 :
|
|---|
| 229 | self.log("ERROR: [channels] section empty ?",1)
|
|---|
| 230 | sys.exit(1)
|
|---|
| 231 |
|
|---|
| 232 | # set network socket timeout
|
|---|
| 233 | socket.setdefaulttimeout(self.CONF_SOCKET_TIMEOUT)
|
|---|
| 234 |
|
|---|
| 235 | self.TODAYMP = time.strftime("%Y/%m/%d")
|
|---|
| 236 | # create a list filled with dates (format AAAA/MM/DD) from today to today+ MAX_DAY_EPG
|
|---|
| 237 | self.DAYCACHEMP=[self.TODAYMP]
|
|---|
| 238 | for day in range(1,self.CONF_MAX_DAY_EPG):
|
|---|
| 239 | self.DAYCACHEMP.append(time.strftime("%Y/%m/%d",time.localtime(time.time()+86400*day)))
|
|---|
| 240 |
|
|---|
| 241 |
|
|---|
| 242 |
|
|---|
| 243 | # ----------------------------------------------------------------------
|
|---|
| 244 |
|
|---|
| 245 |
|
|---|
| 246 | def download_and_cache(self):
|
|---|
| 247 | self.log("--- START DOWNLOAD AND CACHE DATA ---")
|
|---|
| 248 | self.log2video("STARTING DOWNLOAD")
|
|---|
| 249 |
|
|---|
| 250 | self.log("Removing old cached files")
|
|---|
| 251 | scriptlib.cleanup_oldcachedfiles(self.CONF_CACHEDIR, self.FIELD_SEPARATOR)
|
|---|
| 252 |
|
|---|
| 253 | chlist = self.CHANNELLIST
|
|---|
| 254 |
|
|---|
| 255 | self.log("Start download XML data from \'" + self.CONF_URL+"\'")
|
|---|
| 256 | self.log2video("downloading XML data ...")
|
|---|
| 257 |
|
|---|
| 258 | i = self.HTTP_ERROR_RETRY
|
|---|
| 259 | while i > 0:
|
|---|
| 260 | try:
|
|---|
| 261 | sock = urllib2.urlopen(self.CONF_URL)
|
|---|
| 262 | data = sock.read()
|
|---|
| 263 | except IOError, e:
|
|---|
| 264 | serr = "unknown"
|
|---|
| 265 | if hasattr(e, 'reason'):
|
|---|
| 266 | serr = str(e.reason)
|
|---|
| 267 | elif hasattr(e, 'code'):
|
|---|
| 268 | serr = str(e.code)
|
|---|
| 269 | if hasattr(e, 'msg'):
|
|---|
| 270 | serr += " , " + str(e.msg)
|
|---|
| 271 |
|
|---|
| 272 | self.log("\'" + self.CONF_URL + "\' connection error. Reason: "+serr+". Waiting "+str(self.HTTP_ERROR_WAIT_RETRY)+" sec. and retry ["+str(i)+"] ...")
|
|---|
| 273 | time.sleep(self.HTTP_ERROR_WAIT_RETRY) # add sleep
|
|---|
| 274 | i -= 1
|
|---|
| 275 |
|
|---|
| 276 | else:
|
|---|
| 277 | i = -99
|
|---|
| 278 | sock.close()
|
|---|
| 279 |
|
|---|
| 280 | if (i != -99):
|
|---|
| 281 | self.log("Cannot retrieve data from \'" + self.CONF_URL + "\'. Abort script")
|
|---|
| 282 | self.log2video("Error: cannot download XML data, abort")
|
|---|
| 283 | time.sleep(5)
|
|---|
| 284 | sys.exit(1)
|
|---|
| 285 |
|
|---|
| 286 | self.log("End download XML data, now processing XML code.")
|
|---|
| 287 | self.log2video("preprocessing XML data, wait ...")
|
|---|
| 288 | try:
|
|---|
| 289 | xmldoc = minidom.parseString(data)
|
|---|
| 290 | except:
|
|---|
| 291 | self.log("Warning ! Data are not in a valid XML format. Abort script")
|
|---|
| 292 | self.log2video("Error: no valid XML data, abort")
|
|---|
| 293 | time.sleep(5)
|
|---|
| 294 | sys.exit(1)
|
|---|
| 295 |
|
|---|
| 296 |
|
|---|
| 297 | self.log("End process XML data")
|
|---|
| 298 | self.log2video("end process XML data")
|
|---|
| 299 |
|
|---|
| 300 | # days list
|
|---|
| 301 | xmlref_giorno = xmldoc.getElementsByTagName('giorno')
|
|---|
| 302 | for xml_gg in xmlref_giorno:
|
|---|
| 303 | gg = xml_gg.attributes["data"].value
|
|---|
| 304 | if gg not in self.DAYCACHEMP :
|
|---|
| 305 | continue
|
|---|
| 306 |
|
|---|
| 307 | xmlref_canale = xml_gg.getElementsByTagName('canale')
|
|---|
| 308 | for xml_ch in xmlref_canale:
|
|---|
| 309 | chid = xml_ch.attributes["id"].value.strip(' \n\r').lower()
|
|---|
| 310 | if not chlist.has_key(chid) :
|
|---|
| 311 | self.log("Warning: new channel \"id=%s name=%s\" found in XML data" % (xml_ch.attributes["id"].value,xml_ch.attributes["description"]))
|
|---|
| 312 | continue
|
|---|
| 313 |
|
|---|
| 314 | clist = [chid]
|
|---|
| 315 | if self.CHANNELLIST.has_key(chid + '+1'):
|
|---|
| 316 | clist.append(chid + '+1')
|
|---|
| 317 |
|
|---|
| 318 | for c in clist:
|
|---|
| 319 |
|
|---|
| 320 | # get cache option
|
|---|
| 321 | # 0 : don't download/cache
|
|---|
| 322 | # 1 : download and cache (optional 1,new_name )
|
|---|
| 323 | # 2 : always download overwriting existing files (optional 2,new_name )
|
|---|
| 324 | # 3 : always download overwriting existing files only for TODAY (optional 3,new_name )
|
|---|
| 325 |
|
|---|
| 326 | cacheopt = int(chlist[c].split(",")[0])
|
|---|
| 327 |
|
|---|
| 328 | # if cacheopt == 0, do nothing
|
|---|
| 329 | if cacheopt == 0:
|
|---|
| 330 | continue
|
|---|
| 331 |
|
|---|
| 332 | channel_name = ''
|
|---|
| 333 | if len(chlist[c].split(",")) > 1 :
|
|---|
| 334 | if chlist[c].split(",")[1] != '' :
|
|---|
| 335 | # channel renamed, new name provided by user
|
|---|
| 336 | channel_name = chlist[c].split(",")[1].strip(' \n\r').lower()
|
|---|
| 337 |
|
|---|
| 338 | # if channel name is not present as option, quit with error
|
|---|
| 339 | if channel_name == '':
|
|---|
| 340 | self.log("ERROR ! ID=%s channel name not present" % c)
|
|---|
| 341 | sys.exit(1)
|
|---|
| 342 |
|
|---|
| 343 | channel_provider = self.CONF_DEFAULT_PROVIDER
|
|---|
| 344 | if len(chlist[c].split(",")) > 2 :
|
|---|
| 345 | if chlist[c].split(",")[2] != '' :
|
|---|
| 346 | channel_provider = chlist[c].split(",")[2].strip(' \n\r').lower()
|
|---|
| 347 |
|
|---|
| 348 | # if channel name is not present as option in channel_list.conf , quit with error
|
|---|
| 349 | if channel_name == '':
|
|---|
| 350 | self.log("ERROR ! ID=" + str(c) + " channel name not present. Skip !")
|
|---|
| 351 | continue
|
|---|
| 352 |
|
|---|
| 353 | # download only if file doesn't exist or cacheopt == 2 (always download),
|
|---|
| 354 | # using open(...,"w") files will be overwritten (saving a delete + create)
|
|---|
| 355 |
|
|---|
| 356 | day = str(self.convert_daymp(gg))
|
|---|
| 357 | eventfilename = scriptlib.fn_escape(str(c) + self.FIELD_SEPARATOR + channel_name + self.FIELD_SEPARATOR + day)
|
|---|
| 358 | eventfilepath = os.path.join(self.CONF_CACHEDIR, eventfilename)
|
|---|
| 359 | if (cacheopt == 1) and os.path.exists(eventfilepath):
|
|---|
| 360 | continue
|
|---|
| 361 | if (cacheopt == 3) and os.path.exists(eventfilepath) and (gg != self.TODAYMP):
|
|---|
| 362 | continue
|
|---|
| 363 | if (cacheopt != 1) and (cacheopt != 2) and (cacheopt != 3):
|
|---|
| 364 | self.log("Warning: unknown cache option " + str(cacheopt))
|
|---|
| 365 | exit_for_loop = True
|
|---|
| 366 | continue
|
|---|
| 367 |
|
|---|
| 368 | num_events = 0
|
|---|
| 369 | self.log(" Writing in cache \'" + eventfilename + "\'",2)
|
|---|
| 370 | self.log2video(" extracting \"%s\" [%d] (%s)" % (channel_name, num_events, day))
|
|---|
| 371 |
|
|---|
| 372 | fd=codecs.open(eventfilepath,"w",'utf-8')
|
|---|
| 373 |
|
|---|
| 374 | fd.write(str(c) + self.FIELD_SEPARATOR + channel_name + self.FIELD_SEPARATOR + channel_provider + self.FIELD_SEPARATOR + day + '\n')
|
|---|
| 375 | fd.write("Local Time (human readeable)###Unix GMT Time###Event Title###Event Description\n")
|
|---|
| 376 |
|
|---|
| 377 | xmlref_events = xml_ch.getElementsByTagName('prg')
|
|---|
| 378 | for xml_ee in xmlref_events:
|
|---|
| 379 | orainiz = xml_ee.attributes["orainizio"].value
|
|---|
| 380 |
|
|---|
| 381 | if (orainiz >='00:00') and (orainiz <= '05:59') :
|
|---|
| 382 | nextdayevent = 86400
|
|---|
| 383 | else:
|
|---|
| 384 | nextdayevent = 0
|
|---|
| 385 |
|
|---|
| 386 | event_starttime = gg + " " + orainiz
|
|---|
| 387 |
|
|---|
| 388 | if c == (chid + '+1'):
|
|---|
| 389 | # manage channel "+1"
|
|---|
| 390 | event_startime_unix_gmt = str(int(time.mktime(time.strptime(event_starttime,"%Y/%m/%d %H:%M"))) - self.DELTA_UTC + 3600 + nextdayevent)
|
|---|
| 391 | else:
|
|---|
| 392 | # normal channel, not "+1"
|
|---|
| 393 | event_startime_unix_gmt = str(int(time.mktime(time.strptime(event_starttime,"%Y/%m/%d %H:%M"))) - self.DELTA_UTC + nextdayevent)
|
|---|
| 394 |
|
|---|
| 395 |
|
|---|
| 396 | event_title = unicode(xml_ee.getElementsByTagName('titolo')[0].firstChild.data)
|
|---|
| 397 | event_title = event_title.replace('\r','')
|
|---|
| 398 | event_title = event_title.replace('\n','')
|
|---|
| 399 | event_title = event_title.strip(u' ')
|
|---|
| 400 |
|
|---|
| 401 | event_description = ''
|
|---|
| 402 | if self.CONF_DL_DESC == 1 :
|
|---|
| 403 | url_desc = xml_ee.getElementsByTagName('linkScheda')[0].firstChild.data
|
|---|
| 404 | event_description = unicode(self.get_description(url_desc.strip(' \n\r'))[:self.CONF_DLDESCMAXCHAR])
|
|---|
| 405 | event_description = event_description.replace('\r','')
|
|---|
| 406 | event_description = event_description.replace('\n',u' ')
|
|---|
| 407 | event_description = event_description.strip(u' ')
|
|---|
| 408 |
|
|---|
| 409 | fd.write(event_starttime + self.FIELD_SEPARATOR + event_startime_unix_gmt + self.FIELD_SEPARATOR + event_title + self.FIELD_SEPARATOR + event_description + '\n')
|
|---|
| 410 | num_events += 1
|
|---|
| 411 | self.log2video(" extracting \"%s\" [%d] (%s)" % (channel_name, num_events, day))
|
|---|
| 412 |
|
|---|
| 413 |
|
|---|
| 414 | fd.close()
|
|---|
| 415 |
|
|---|
| 416 | del xmldoc
|
|---|
| 417 |
|
|---|
| 418 | # ----------------------------------------------------------------------
|
|---|
| 419 |
|
|---|
| 420 |
|
|---|
| 421 | def process_cache(self):
|
|---|
| 422 | self.log("--- START PROCESSING CACHE ---")
|
|---|
| 423 | self.log2video("START PROCESSING CACHE")
|
|---|
| 424 | if not os.path.exists(self.CONF_CACHEDIR):
|
|---|
| 425 | self.log("ERROR: %s not present" % self.CONF_CACHEDIR,1)
|
|---|
| 426 | sys.exit(1)
|
|---|
| 427 |
|
|---|
| 428 | self.log("Loading lamedb")
|
|---|
| 429 | lamedb = scriptlib.lamedb_class()
|
|---|
| 430 |
|
|---|
| 431 | self.log("Initialize CrossEPG database")
|
|---|
| 432 | crossdb = scriptlib.crossepg_db_class()
|
|---|
| 433 | crossdb.open_db()
|
|---|
| 434 |
|
|---|
| 435 | events = []
|
|---|
| 436 | previous_id = ''
|
|---|
| 437 | channels_name = ''
|
|---|
| 438 | total_events = 0
|
|---|
| 439 |
|
|---|
| 440 | self.log("Start data processing")
|
|---|
| 441 | filelist = sorted(os.listdir(self.CONF_CACHEDIR))
|
|---|
| 442 | filelist.append('***END***')
|
|---|
| 443 |
|
|---|
| 444 | for f in filelist :
|
|---|
| 445 | id = f.split(self.FIELD_SEPARATOR)[0]
|
|---|
| 446 | if previous_id == '':
|
|---|
| 447 | previous_id = id
|
|---|
| 448 |
|
|---|
| 449 | if id != previous_id :
|
|---|
| 450 | total_events += len(events)
|
|---|
| 451 | self.log(" ...processing \'%s\' , nr. events %d" % (previous_id,len(events)))
|
|---|
| 452 | self.log2video("processed %d events ..." % total_events )
|
|---|
| 453 |
|
|---|
| 454 | for c in channels_name:
|
|---|
| 455 | # a channel can have zero or more SID (different channel with same name)
|
|---|
| 456 | # return the list [0e1f:00820000:0708:00c8:1:0 , 1d20:00820000:2fa8:013e:1:0 , ..... ]
|
|---|
| 457 | # return [] if channel name is not in lamedb
|
|---|
| 458 | sidbyname = lamedb.get_sid_byname(c.strip(' \n').lower())
|
|---|
| 459 |
|
|---|
| 460 | # process every SID
|
|---|
| 461 | for s in sidbyname:
|
|---|
| 462 | # convert "0e1f:00820000:0708:00c8:1:0" to sid,tsid,onid
|
|---|
| 463 | # return the list [sid,tsid,onid]
|
|---|
| 464 | ch_sid = lamedb.convert_sid(s)
|
|---|
| 465 | if len(ch_sid) == 0:
|
|---|
| 466 | continue
|
|---|
| 467 |
|
|---|
| 468 | # add channel into db
|
|---|
| 469 | # doesn't matter if the channel already exist... epgdb do all the work
|
|---|
| 470 | crossdb.add_channel(ch_sid)
|
|---|
| 471 |
|
|---|
| 472 | i = 0
|
|---|
| 473 | L = len(events) - 1
|
|---|
| 474 |
|
|---|
| 475 | # process events
|
|---|
| 476 | for e in events:
|
|---|
| 477 |
|
|---|
| 478 | items = e.split(self.FIELD_SEPARATOR)
|
|---|
| 479 | e_starttime = int(items[1])
|
|---|
| 480 |
|
|---|
| 481 | if i < L :
|
|---|
| 482 | e_length = int(events[i+1].split(self.FIELD_SEPARATOR)[1]) - e_starttime
|
|---|
| 483 | else:
|
|---|
| 484 | # last event, dummy length 90 min.
|
|---|
| 485 | e_length = 5400
|
|---|
| 486 | i += 1
|
|---|
| 487 |
|
|---|
| 488 | # extract title and encode Python Unicode with UTF-8
|
|---|
| 489 | e_title = items[2].encode('utf-8')
|
|---|
| 490 |
|
|---|
| 491 | # extract summarie and encode Python Unicode with UTF-8
|
|---|
| 492 | e_summarie = items[3].encode('utf-8')
|
|---|
| 493 |
|
|---|
| 494 | # add_event(start_time , duration , title , summarie , ISO639_language_code , strings_encoded_with_UTF-8)
|
|---|
| 495 | crossdb.add_event(e_starttime, e_length, e_title, e_summarie, 'ita', True )
|
|---|
| 496 |
|
|---|
| 497 | if f == '***END***':
|
|---|
| 498 | break
|
|---|
| 499 |
|
|---|
| 500 | events = []
|
|---|
| 501 | previous_id = id
|
|---|
| 502 | channels_name = ''
|
|---|
| 503 |
|
|---|
| 504 | if id == previous_id:
|
|---|
| 505 | self.log("Reading \'%s\'" % f)
|
|---|
| 506 | # read events from cache file using UTF-8 and insert them in events list
|
|---|
| 507 | fd = codecs.open(os.path.join(self.CONF_CACHEDIR, f),"r","utf-8")
|
|---|
| 508 | lines = fd.readlines()
|
|---|
| 509 | fd.close()
|
|---|
| 510 | if channels_name == '':
|
|---|
| 511 | # first line has channel data (id,name,provider,date)
|
|---|
| 512 | channels_name = lines[0].split(self.FIELD_SEPARATOR)[1].split('|')
|
|---|
| 513 | # the second line is only a remark
|
|---|
| 514 | # add events starting from third line
|
|---|
| 515 | events.extend(lines[2:])
|
|---|
| 516 |
|
|---|
| 517 | # end process, close CrossEPG DB saving data
|
|---|
| 518 | crossdb.close_db()
|
|---|
| 519 | self.log("TOTAL EPG EVENTS PROCESSED: %d" % total_events)
|
|---|
| 520 | self.log("--- END ---")
|
|---|
| 521 | self.log2video("END , events processed: %d" % total_events)
|
|---|
| 522 |
|
|---|
| 523 |
|
|---|
| 524 |
|
|---|
| 525 | # ****************************************************************************************************************************
|
|---|
| 526 |
|
|---|
| 527 | # MAIN CODE: SCRIPT START HERE
|
|---|
| 528 |
|
|---|
| 529 | # increase this process niceness (other processes have higher priority)
|
|---|
| 530 | os.nice(10)
|
|---|
| 531 |
|
|---|
| 532 | # set Garbage Collector to do a "generational jump" more frequently than default 700
|
|---|
| 533 | # memory saving: about 50% (!!), some performance loss (obviously)
|
|---|
| 534 | gc.set_threshold(50,10,10)
|
|---|
| 535 |
|
|---|
| 536 | SCRIPT_DIR = 'scripts/mediaprem/'
|
|---|
| 537 |
|
|---|
| 538 | # get CrossEPG installation dir.
|
|---|
| 539 | crossepg_instroot = crossepg.epgdb_get_installroot()
|
|---|
| 540 | if crossepg_instroot == False:
|
|---|
| 541 | sys.exit(1)
|
|---|
| 542 | scriptlocation = os.path.join(crossepg_instroot , SCRIPT_DIR)
|
|---|
| 543 |
|
|---|
| 544 | # get where CrossEPG save data (dbroot) and use it as script cache repository
|
|---|
| 545 | crossepg_dbroot = crossepg.epgdb_get_dbroot()
|
|---|
| 546 | if crossepg_dbroot == False:
|
|---|
| 547 | sys.exit(1)
|
|---|
| 548 |
|
|---|
| 549 | # initialize script class
|
|---|
| 550 | script_class = main(scriptlocation , crossepg_dbroot)
|
|---|
| 551 |
|
|---|
| 552 | # download data and cache them
|
|---|
| 553 | script_class.download_and_cache()
|
|---|
| 554 |
|
|---|
| 555 | # read cached data and inject into CrossEPG database
|
|---|
| 556 | script_class.process_cache()
|
|---|
| 557 |
|
|---|