| 1 | #!/usr/bin/python
|
|---|
| 2 | # rai.py by Ambrosa http://www.ambrosa.net
|
|---|
| 3 | # this module is used for download EPG data from Rai website
|
|---|
| 4 | # derived from E2_LOADEPG
|
|---|
| 5 |
|
|---|
| 6 | __author__ = "ambrosa http://www.ambrosa.net"
|
|---|
| 7 | __copyright__ = "Copyright (C) 2008-2011 Alessandro Ambrosini"
|
|---|
| 8 | __license__ = "CreativeCommons by-nc-sa http://creativecommons.org/licenses/by-nc-sa/3.0/"
|
|---|
| 9 |
|
|---|
| 10 | import os
|
|---|
| 11 | import sys
|
|---|
| 12 | import time
|
|---|
| 13 | import codecs
|
|---|
| 14 | import socket
|
|---|
| 15 | import string
|
|---|
| 16 | import random
|
|---|
| 17 | import urllib2
|
|---|
| 18 | import ConfigParser
|
|---|
| 19 |
|
|---|
| 20 | # import CrossEPG functions
|
|---|
| 21 | import crossepg
|
|---|
| 22 |
|
|---|
| 23 | # location of local python modules under "scripts/lib" dir.
|
|---|
| 24 | # add it to sys.path()
|
|---|
| 25 | crossepg_instroot = crossepg.epgdb_get_installroot()
|
|---|
| 26 | if crossepg_instroot == False:
|
|---|
| 27 | sys.exit(1)
|
|---|
| 28 | libdir = os.path.join(crossepg_instroot , 'scripts/lib')
|
|---|
| 29 | sys.path.append(libdir)
|
|---|
| 30 |
|
|---|
| 31 | # import local modules
|
|---|
| 32 | import sgmllib
|
|---|
| 33 | import scriptlib
|
|---|
| 34 |
|
|---|
| 35 | # =================================================================
|
|---|
| 36 | # HTML PARSER
|
|---|
| 37 |
|
|---|
| 38 | class Titolo_parser(sgmllib.SGMLParser):
|
|---|
| 39 |
|
|---|
| 40 | def parse(self, s):
|
|---|
| 41 | self.feed(s)
|
|---|
| 42 | self.close()
|
|---|
| 43 |
|
|---|
| 44 | def __init__(self, day_get, verbose=0):
|
|---|
| 45 | sgmllib.SGMLParser.__init__(self, verbose)
|
|---|
| 46 | self.daynow = day_get
|
|---|
| 47 | self.daynext = time.strftime("%Y%m%d",time.localtime(time.mktime(time.strptime(day_get,"%Y%m%d"))+86400))
|
|---|
| 48 | self.day = self.daynow
|
|---|
| 49 | self.guidatoday = []
|
|---|
| 50 | self.guidatomorrow = []
|
|---|
| 51 | self.sera = False
|
|---|
| 52 | self.tomorrow = False
|
|---|
| 53 | self.start_orario = False
|
|---|
| 54 | self.start_titolo = False
|
|---|
| 55 | self.inside_a_titolo = False
|
|---|
| 56 | self.inside_palinsesto = False
|
|---|
| 57 |
|
|---|
| 58 |
|
|---|
| 59 | def start_div(self,attributes):
|
|---|
| 60 | for name,value in attributes:
|
|---|
| 61 | if name == "class":
|
|---|
| 62 | if value == "intG":
|
|---|
| 63 | self.inside_palinsesto = True
|
|---|
| 64 |
|
|---|
| 65 | def start_span(self, attributes):
|
|---|
| 66 | if self.inside_palinsesto == True:
|
|---|
| 67 | for name, value in attributes:
|
|---|
| 68 | if name == "class":
|
|---|
| 69 | if value == "ora":
|
|---|
| 70 | self.start_orario = True
|
|---|
| 71 | if value == "info":
|
|---|
| 72 | self.start_titolo = True
|
|---|
| 73 |
|
|---|
| 74 | def start_a(self,attributes):
|
|---|
| 75 | if self.inside_palinsesto == True:
|
|---|
| 76 | if self.start_titolo == True:
|
|---|
| 77 | self.inside_a_titolo = True
|
|---|
| 78 |
|
|---|
| 79 | def handle_data(self, data):
|
|---|
| 80 | if self.inside_palinsesto == True:
|
|---|
| 81 |
|
|---|
| 82 | if self.start_orario == True:
|
|---|
| 83 |
|
|---|
| 84 | # if time < 06:00 is a next day event
|
|---|
| 85 | if int(time.strftime("%H",time.strptime(data,"%H:%M"))) < 6 :
|
|---|
| 86 | self.day = self.daynext
|
|---|
| 87 | self.tomorrow = True
|
|---|
| 88 | else:
|
|---|
| 89 | if self.tomorrow == True:
|
|---|
| 90 | self.inside_a_titolo = False
|
|---|
| 91 | self.start_titolo = False
|
|---|
| 92 | self.inside_palinsesto = False
|
|---|
| 93 | return
|
|---|
| 94 |
|
|---|
| 95 | self.dataoraevento = time.strftime("%Y-%m-%d %H:%M",time.strptime(self.day+'-'+data,"%Y%m%d-%H:%M"))
|
|---|
| 96 | self.start_orario = False
|
|---|
| 97 |
|
|---|
| 98 | if self.inside_a_titolo == True:
|
|---|
| 99 | if self.tomorrow == False:
|
|---|
| 100 | self.guidatoday.append((self.dataoraevento,data.strip()))
|
|---|
| 101 | else:
|
|---|
| 102 | self.guidatomorrow.append((self.dataoraevento,data.strip()))
|
|---|
| 103 |
|
|---|
| 104 | self.inside_a_titolo = False
|
|---|
| 105 | self.start_titolo = False
|
|---|
| 106 | self.inside_palinsesto = False
|
|---|
| 107 |
|
|---|
| 108 |
|
|---|
| 109 | def get_guida(self):
|
|---|
| 110 | return ((self.guidatoday,self.guidatomorrow))
|
|---|
| 111 |
|
|---|
| 112 |
|
|---|
| 113 | # =================================================================
|
|---|
| 114 |
|
|---|
| 115 |
|
|---|
| 116 | class main:
|
|---|
| 117 |
|
|---|
| 118 | # main config file
|
|---|
| 119 | CONF_CONFIGFILENAME = "rai.conf"
|
|---|
| 120 |
|
|---|
| 121 | # Network socket timeout (in seconds)
|
|---|
| 122 | CONF_SOCKET_TIMEOUT = 20
|
|---|
| 123 |
|
|---|
| 124 | # log text
|
|---|
| 125 | CONF_LOG_SCRIPT_NAME = "RAI (Italy)"
|
|---|
| 126 | CONF_LOG_PREFIX = "RAI: "
|
|---|
| 127 |
|
|---|
| 128 | # retry number if HTTP error
|
|---|
| 129 | HTTP_ERROR_RETRY = 3
|
|---|
| 130 | # seconds to wait between retries
|
|---|
| 131 | HTTP_ERROR_WAIT_RETRY = 5
|
|---|
| 132 |
|
|---|
| 133 | # random time delay (in seconds) between access to remote web pages
|
|---|
| 134 | CONF_RANDOM_MIN = 0.0
|
|---|
| 135 | CONF_RANDOM_MAX = 2.0
|
|---|
| 136 |
|
|---|
| 137 | # charset used in remote website epg data
|
|---|
| 138 | REMOTE_EPG_CHARSET = 'utf-8'
|
|---|
| 139 |
|
|---|
| 140 | TODAY = ''
|
|---|
| 141 | DAYCACHE = []
|
|---|
| 142 | FIELD_SEPARATOR = '###'
|
|---|
| 143 | CHANNELLIST = {}
|
|---|
| 144 |
|
|---|
| 145 |
|
|---|
| 146 | def log(self,s,video=0):
|
|---|
| 147 | self.logging.log(self.CONF_LOG_PREFIX + str(s))
|
|---|
| 148 | if video == 1:
|
|---|
| 149 | self.log2video(str(s))
|
|---|
| 150 |
|
|---|
| 151 | def log2video(self,s):
|
|---|
| 152 | self.logging.log2video_status(str(s))
|
|---|
| 153 |
|
|---|
| 154 |
|
|---|
| 155 | def __init__(self,confdir,dbroot):
|
|---|
| 156 |
|
|---|
| 157 | # initialize logging
|
|---|
| 158 | self.logging = scriptlib.logging_class()
|
|---|
| 159 | # write to video OSD the script name
|
|---|
| 160 | self.logging.log2video_scriptname(self.CONF_LOG_SCRIPT_NAME)
|
|---|
| 161 |
|
|---|
| 162 |
|
|---|
| 163 | CONF_FILE = os.path.join(confdir,self.CONF_CONFIGFILENAME)
|
|---|
| 164 | if not os.path.exists(CONF_FILE) :
|
|---|
| 165 | self.log("ERROR: %s not present" % CONF_FILE,1)
|
|---|
| 166 | sys.exit(1)
|
|---|
| 167 |
|
|---|
| 168 | config = ConfigParser.ConfigParser()
|
|---|
| 169 | config.optionxform = str # needed to return case sensitive index
|
|---|
| 170 | config.read(CONF_FILE)
|
|---|
| 171 |
|
|---|
| 172 | # reading [global] section options
|
|---|
| 173 | self.CONF_DEFAULT_PROVIDER = config.get("global","DEFAULT_PROVIDER")
|
|---|
| 174 | # save cache under dbroot
|
|---|
| 175 | self.CONF_CACHEDIR = os.path.join(dbroot,config.get("global","CACHE_DIRNAME"))
|
|---|
| 176 |
|
|---|
| 177 | self.CONF_MAX_DAY_EPG = config.getint("global","MAX_DAY_EPG")
|
|---|
| 178 | self.CONF_URL = config.get("global","URL")
|
|---|
| 179 |
|
|---|
| 180 | self.CONF_GMT_ZONE = config.get("global","GMT_ZONE")
|
|---|
| 181 | if self.CONF_GMT_ZONE.strip(' ').lower() == 'equal':
|
|---|
| 182 | #self.DELTA_UTC = -scriptlib.delta_utc() # return negative if timezone is east of GMT (like Italy), invert sign
|
|---|
| 183 | self.DELTA_UTC = 0
|
|---|
| 184 | else:
|
|---|
| 185 | self.DELTA_UTC = float(self.CONF_GMT_ZONE)*3600.0
|
|---|
| 186 | if self.DELTA_UTC >= 0:
|
|---|
| 187 | self.DELTA_UTC = self.DELTA_UTC + scriptlib.delta_dst()
|
|---|
| 188 | else:
|
|---|
| 189 | self.DELTA_UTC = self.DELTA_UTC - scriptlib.delta_dst()
|
|---|
| 190 |
|
|---|
| 191 | self.DELTA_UTC = int(self.DELTA_UTC)
|
|---|
| 192 | #self.log("Website timezone - UTC = %d seconds" % self.DELTA_UTC)
|
|---|
| 193 |
|
|---|
| 194 | if not os.path.exists(self.CONF_CACHEDIR):
|
|---|
| 195 | self.log("Creating \'%s\' directory for caching" % self.CONF_CACHEDIR)
|
|---|
| 196 | os.mkdir(self.CONF_CACHEDIR)
|
|---|
| 197 |
|
|---|
| 198 | # reading [channels] section
|
|---|
| 199 | temp=config.items("channels");
|
|---|
| 200 |
|
|---|
| 201 | # create a dictionary (Python array) with index = channel ID
|
|---|
| 202 | for i in temp:
|
|---|
| 203 | self.CHANNELLIST[i[0]] = unicode(i[1],'utf-8')
|
|---|
| 204 |
|
|---|
| 205 | if len(self.CHANNELLIST) == 0 :
|
|---|
| 206 | self.log("ERROR: [channels] section empty ?",1)
|
|---|
| 207 | sys.exit(1)
|
|---|
| 208 |
|
|---|
| 209 | # set network socket timeout
|
|---|
| 210 | socket.setdefaulttimeout(self.CONF_SOCKET_TIMEOUT)
|
|---|
| 211 |
|
|---|
| 212 | # initialize random generator
|
|---|
| 213 | random.seed()
|
|---|
| 214 |
|
|---|
| 215 | # today date (format AAAAMMDD)
|
|---|
| 216 | self.TODAY = time.strftime("%Y%m%d")
|
|---|
| 217 |
|
|---|
| 218 | # create a list filled with dates (format AAAAMMDD) from today to today+MAX_DAY_EPG
|
|---|
| 219 | self.DAYCACHE=[self.TODAY]
|
|---|
| 220 | for day in range(1,self.CONF_MAX_DAY_EPG):
|
|---|
| 221 | self.DAYCACHE.append(time.strftime("%Y%m%d",time.localtime(time.time()+86400*day)))
|
|---|
| 222 |
|
|---|
| 223 |
|
|---|
| 224 | # ----------------------------------------------------------------------
|
|---|
| 225 |
|
|---|
| 226 |
|
|---|
| 227 | def download_and_cache(self):
|
|---|
| 228 | self.log("--- START DOWNLOAD AND CACHE DATA ---")
|
|---|
| 229 | self.log2video("STARTING DOWNLOAD")
|
|---|
| 230 |
|
|---|
| 231 | self.log("Removing old cached files")
|
|---|
| 232 | scriptlib.cleanup_oldcachedfiles(self.CONF_CACHEDIR, self.FIELD_SEPARATOR)
|
|---|
| 233 |
|
|---|
| 234 | #self.log("Start downloading HTML data from \'%s\'" % self.CONF_URL)
|
|---|
| 235 |
|
|---|
| 236 | chlist = self.CHANNELLIST
|
|---|
| 237 |
|
|---|
| 238 | # get remote XML files
|
|---|
| 239 | # chid format: channel id , 0|1|2(,new name)
|
|---|
| 240 | # i.e. ("101" , "1,SkyCinema1")
|
|---|
| 241 | for c in sorted(chlist.keys()):
|
|---|
| 242 | self.guidatoday = []
|
|---|
| 243 | self.guidatomorrow = []
|
|---|
| 244 |
|
|---|
| 245 | # get cache option
|
|---|
| 246 | # 0 : don't download/cache
|
|---|
| 247 | # 1 : download and cache (optional 1,new_name )
|
|---|
| 248 | # 2 : always download overwriting existing files (optional 2,new_name )
|
|---|
| 249 | # 3 : always download overwriting existing files only for TODAY (optional 3,new_name )
|
|---|
| 250 |
|
|---|
| 251 | cacheopt = int(string.split(chlist[c],",")[0])
|
|---|
| 252 |
|
|---|
| 253 | # if cacheopt == 0, do nothing
|
|---|
| 254 | if cacheopt == 0:
|
|---|
| 255 | continue
|
|---|
| 256 |
|
|---|
| 257 | channel_name = ''
|
|---|
| 258 | if len(chlist[c].split(",")) > 1 :
|
|---|
| 259 | if chlist[c].split(",")[1] != '' :
|
|---|
| 260 | # channel renamed, new name provided by user
|
|---|
| 261 | channel_name = chlist[c].split(",")[1].strip(' ').lower()
|
|---|
| 262 |
|
|---|
| 263 | # if channel name is not present as option, quit with error
|
|---|
| 264 | if channel_name == '':
|
|---|
| 265 | self.log("ERROR ! ID=%s channel name not present" % c, 1)
|
|---|
| 266 | sys.exit(1)
|
|---|
| 267 |
|
|---|
| 268 | channel_provider = self.CONF_DEFAULT_PROVIDER
|
|---|
| 269 | if len(chlist[c].split(",")) > 2 :
|
|---|
| 270 | if chlist[c].split(",")[2] != '' :
|
|---|
| 271 | channel_provider = chlist[c].split(",")[2].strip(' ').lower()
|
|---|
| 272 |
|
|---|
| 273 | exit_for_loop = False
|
|---|
| 274 | for day in self.DAYCACHE:
|
|---|
| 275 | if exit_for_loop == True:
|
|---|
| 276 | break
|
|---|
| 277 |
|
|---|
| 278 | day_get = time.strftime("%Y_%m_%d",time.strptime(day,"%Y%m%d"))
|
|---|
| 279 | xmlfile = "?%s_%s" % (c,day_get)
|
|---|
| 280 |
|
|---|
| 281 | # download only if file doesn't exist or cacheopt == 2 (always download),
|
|---|
| 282 | # using open(...,"w") files will be overwritten (saving a delete + create)
|
|---|
| 283 |
|
|---|
| 284 | eventfilename = scriptlib.fn_escape(str(c) + self.FIELD_SEPARATOR + channel_name + self.FIELD_SEPARATOR + day)
|
|---|
| 285 | eventfilepath = os.path.join(self.CONF_CACHEDIR, eventfilename)
|
|---|
| 286 | if (cacheopt == 1) and os.path.exists(eventfilepath):
|
|---|
| 287 | continue
|
|---|
| 288 | if (cacheopt == 3) and os.path.exists(eventfilepath) and (day != self.TODAY):
|
|---|
| 289 | continue
|
|---|
| 290 | if (cacheopt != 1) and (cacheopt != 2) and (cacheopt != 3):
|
|---|
| 291 | self.log("Warning: unknown cache option " + str(cacheopt))
|
|---|
| 292 | exit_for_loop = True
|
|---|
| 293 | continue
|
|---|
| 294 |
|
|---|
| 295 | self.log("Download HTML data from \'%s\'" % (self.CONF_URL + xmlfile))
|
|---|
| 296 | self.log2video("Download " + c)
|
|---|
| 297 |
|
|---|
| 298 | i = self.HTTP_ERROR_RETRY
|
|---|
| 299 | while i > 0 :
|
|---|
| 300 | # wait randomly to avoid overloading website
|
|---|
| 301 | time.sleep(random.uniform(self.CONF_RANDOM_MIN, self.CONF_RANDOM_MAX))
|
|---|
| 302 |
|
|---|
| 303 | try:
|
|---|
| 304 | sock=urllib2.urlopen(self.CONF_URL + xmlfile)
|
|---|
| 305 | data=sock.read()
|
|---|
| 306 |
|
|---|
| 307 | except IOError, e:
|
|---|
| 308 | serr="unknown"
|
|---|
| 309 | if hasattr(e, 'reason'):
|
|---|
| 310 | serr=str(e.reason)
|
|---|
| 311 | elif hasattr(e, 'code'):
|
|---|
| 312 | serr=str(e.code)
|
|---|
| 313 | if hasattr(e, 'msg'):
|
|---|
| 314 | serr+=" , "+str(e.msg)
|
|---|
| 315 |
|
|---|
| 316 | self.log("\'%s\' connection error. Reason: %s. Waiting %d sec. and retry [%d] ..." % (self.CONF_URL + xmlfile, serr, self.HTTP_ERROR_WAIT_RETRY, i))
|
|---|
| 317 | time.sleep(self.HTTP_ERROR_WAIT_RETRY) # add sleep
|
|---|
| 318 | i -= 1
|
|---|
| 319 |
|
|---|
| 320 | else:
|
|---|
| 321 | i = 0 # force quit WHILE loop
|
|---|
| 322 | sock.close()
|
|---|
| 323 |
|
|---|
| 324 | dtparser = Titolo_parser(day)
|
|---|
| 325 | dtparser.parse(data)
|
|---|
| 326 | self.guida = self.guidatomorrow
|
|---|
| 327 | (self.guidatoday, self.guidatomorrow) = dtparser.get_guida()
|
|---|
| 328 |
|
|---|
| 329 | # if no data, quit for loop and stop downloading
|
|---|
| 330 | if len(self.guidatoday) == 0:
|
|---|
| 331 | exit_for_loop = True
|
|---|
| 332 | break
|
|---|
| 333 |
|
|---|
| 334 | self.guida = self.guida + self.guidatoday
|
|---|
| 335 |
|
|---|
| 336 | self.log(" writing in cache \'%s\'" % eventfilename)
|
|---|
| 337 | # write data in cache file using UTF-8 encoding
|
|---|
| 338 | fd = codecs.open(eventfilepath, "w", 'utf-8')
|
|---|
| 339 | fd.write(str(c) + self.FIELD_SEPARATOR + channel_name + self.FIELD_SEPARATOR + channel_provider + self.FIELD_SEPARATOR + day + '\n')
|
|---|
| 340 | fd.write("Local Time (human readeable)###Unix GMT Time###Event Title###Event Description\n")
|
|---|
| 341 |
|
|---|
| 342 | # extract all events and put in eventfile
|
|---|
| 343 | for event in self.guida:
|
|---|
| 344 | (dataora,titolo) = event
|
|---|
| 345 | event_starttime = dataora
|
|---|
| 346 | # time.mktime return Unix time inside GMT timezone
|
|---|
| 347 | event_startime_unix_gmt = str(int(time.mktime(time.strptime(event_starttime,"%Y-%m-%d %H:%M"))) - self.DELTA_UTC )
|
|---|
| 348 | #event_startime_unix_gmt = str(int(time.mktime(time.strptime(event_starttime,"%Y-%m-%d %H:%M"))) )
|
|---|
| 349 | #self.log(event_starttime + " , " + str(self.DELTA_UTC) + " , " + str(int(time.mktime(time.strptime(event_starttime,"%Y-%m-%d %H:%M")))) + " , " + event_startime_unix_gmt )
|
|---|
| 350 |
|
|---|
| 351 | # convert remote data (RAI website use UTF-8) in Python Unicode (UCS2)
|
|---|
| 352 | event_title = unicode(titolo,self.REMOTE_EPG_CHARSET)
|
|---|
| 353 |
|
|---|
| 354 | event_title = event_title.replace('\r','')
|
|---|
| 355 | event_title = event_title.replace('\n',u' ')
|
|---|
| 356 | event_title = event_title.strip(u' ')
|
|---|
| 357 |
|
|---|
| 358 | event_description = u''
|
|---|
| 359 |
|
|---|
| 360 | fd.write(event_starttime + self.FIELD_SEPARATOR + event_startime_unix_gmt + self.FIELD_SEPARATOR + event_title + self.FIELD_SEPARATOR + event_description + '\n')
|
|---|
| 361 |
|
|---|
| 362 | fd.close()
|
|---|
| 363 |
|
|---|
| 364 |
|
|---|
| 365 | # ----------------------------------------------------------------------
|
|---|
| 366 |
|
|---|
| 367 |
|
|---|
| 368 | def process_cache(self):
|
|---|
| 369 | self.log("--- START PROCESSING CACHE ---")
|
|---|
| 370 | self.log2video("START PROCESSING CACHE")
|
|---|
| 371 | if not os.path.exists(self.CONF_CACHEDIR):
|
|---|
| 372 | self.log("ERROR: %s not present" % self.CONF_CACHEDIR,1)
|
|---|
| 373 | sys.exit(1)
|
|---|
| 374 |
|
|---|
| 375 | self.log("Loading lamedb")
|
|---|
| 376 | lamedb = scriptlib.lamedb_class()
|
|---|
| 377 |
|
|---|
| 378 | self.log("Initialize CrossEPG database")
|
|---|
| 379 | crossdb = scriptlib.crossepg_db_class()
|
|---|
| 380 | crossdb.open_db()
|
|---|
| 381 |
|
|---|
| 382 | events = []
|
|---|
| 383 | previous_id = ''
|
|---|
| 384 | channels_name = ''
|
|---|
| 385 | total_events = 0
|
|---|
| 386 |
|
|---|
| 387 | self.log("Start data processing")
|
|---|
| 388 | filelist = sorted(os.listdir(self.CONF_CACHEDIR))
|
|---|
| 389 | filelist.append('***END***')
|
|---|
| 390 |
|
|---|
| 391 | for f in filelist :
|
|---|
| 392 | id = f.split(self.FIELD_SEPARATOR)[0]
|
|---|
| 393 | if previous_id == '':
|
|---|
| 394 | previous_id = id
|
|---|
| 395 |
|
|---|
| 396 | if id != previous_id :
|
|---|
| 397 | total_events += len(events)
|
|---|
| 398 | self.log(" ...processing \'%s\' , nr. events %d" % (previous_id,len(events)))
|
|---|
| 399 | self.log2video("processed %d events ..." % total_events )
|
|---|
| 400 |
|
|---|
| 401 | for c in channels_name:
|
|---|
| 402 | # a channel can have zero or more SID (different channel with same name)
|
|---|
| 403 | # return the list [0e1f:00820000:0708:00c8:1:0 , 1d20:00820000:2fa8:013e:1:0 , ..... ]
|
|---|
| 404 | # return [] if channel name is not in lamedb
|
|---|
| 405 | sidbyname = lamedb.get_sid_byname(c.strip(' \n').lower())
|
|---|
| 406 |
|
|---|
| 407 | # process every SID
|
|---|
| 408 | for s in sidbyname:
|
|---|
| 409 | # convert "0e1f:00820000:0708:00c8:1:0" to sid,tsid,onid
|
|---|
| 410 | # return the list [sid,tsid,onid]
|
|---|
| 411 | ch_sid = lamedb.convert_sid(s)
|
|---|
| 412 | if len(ch_sid) == 0:
|
|---|
| 413 | continue
|
|---|
| 414 |
|
|---|
| 415 | # add channel into db
|
|---|
| 416 | # doesn't matter if the channel already exist... epgdb do all the work
|
|---|
| 417 | crossdb.add_channel(ch_sid)
|
|---|
| 418 |
|
|---|
| 419 | i = 0
|
|---|
| 420 | L = len(events) - 1
|
|---|
| 421 |
|
|---|
| 422 | # process events
|
|---|
| 423 | for e in events:
|
|---|
| 424 |
|
|---|
| 425 | e_starttime = int(e.split(self.FIELD_SEPARATOR)[1])
|
|---|
| 426 |
|
|---|
| 427 | if i < L :
|
|---|
| 428 | e_length = int(events[i+1].split(self.FIELD_SEPARATOR)[1]) - e_starttime
|
|---|
| 429 | else:
|
|---|
| 430 | # last event, dummy length 90 min.
|
|---|
| 431 | e_length = 5400
|
|---|
| 432 | i += 1
|
|---|
| 433 |
|
|---|
| 434 | # extract title and encode Python Unicode with UTF-8
|
|---|
| 435 | e_title = e.split(self.FIELD_SEPARATOR)[2].encode('utf-8')
|
|---|
| 436 |
|
|---|
| 437 | # RAI website HAVE NOT long description. (bleah !).
|
|---|
| 438 | e_summarie = u' '
|
|---|
| 439 | # encode Python Unicode in UTF-8
|
|---|
| 440 | e_summarie = e_summarie.encode('utf-8')
|
|---|
| 441 |
|
|---|
| 442 | # add_event(start_time , duration , title , summarie , ISO639_language_code , strings_encoded_with_UTF-8)
|
|---|
| 443 | crossdb.add_event(e_starttime, e_length, e_title, e_summarie, 'ita', True )
|
|---|
| 444 |
|
|---|
| 445 | if f == '***END***':
|
|---|
| 446 | break
|
|---|
| 447 |
|
|---|
| 448 | events = []
|
|---|
| 449 | previous_id = id
|
|---|
| 450 | channels_name = ''
|
|---|
| 451 |
|
|---|
| 452 | if id == previous_id:
|
|---|
| 453 | self.log("Reading \'%s\'" % f)
|
|---|
| 454 | # read events from cache file using UTF-8 and insert them in events list
|
|---|
| 455 | fd = codecs.open(os.path.join(self.CONF_CACHEDIR, f),"r","utf-8")
|
|---|
| 456 | lines = fd.readlines()
|
|---|
| 457 | fd.close()
|
|---|
| 458 | if channels_name == '':
|
|---|
| 459 | # first line has channel data (id,name,provider,date)
|
|---|
| 460 | channels_name = lines[0].split(self.FIELD_SEPARATOR)[1].split('|')
|
|---|
| 461 | # the second line is only a remark
|
|---|
| 462 | # add events starting from third line
|
|---|
| 463 | events.extend(lines[2:])
|
|---|
| 464 |
|
|---|
| 465 | # end process, close CrossEPG DB saving data
|
|---|
| 466 | crossdb.close_db()
|
|---|
| 467 | self.log("TOTAL EPG EVENTS PROCESSED: %d" % total_events)
|
|---|
| 468 | self.log("--- END ---")
|
|---|
| 469 | self.log2video("END , events processed: %d" % total_events)
|
|---|
| 470 |
|
|---|
| 471 |
|
|---|
| 472 |
|
|---|
| 473 | # ****************************************************************************************************************************
|
|---|
| 474 |
|
|---|
| 475 | # MAIN CODE: SCRIPT START HERE
|
|---|
| 476 |
|
|---|
| 477 | SCRIPT_DIR = 'scripts/rai/'
|
|---|
| 478 |
|
|---|
| 479 | # get CrossEPG installation dir.
|
|---|
| 480 | crossepg_instroot = crossepg.epgdb_get_installroot()
|
|---|
| 481 | if crossepg_instroot == False:
|
|---|
| 482 | sys.exit(1)
|
|---|
| 483 | scriptlocation = os.path.join(crossepg_instroot , SCRIPT_DIR)
|
|---|
| 484 |
|
|---|
| 485 | # get where CrossEPG save data (dbroot) and use it as script cache repository
|
|---|
| 486 | crossepg_dbroot = crossepg.epgdb_get_dbroot()
|
|---|
| 487 | if crossepg_dbroot == False:
|
|---|
| 488 | sys.exit(1)
|
|---|
| 489 |
|
|---|
| 490 | # initialize script class
|
|---|
| 491 | script_class = main(scriptlocation , crossepg_dbroot)
|
|---|
| 492 |
|
|---|
| 493 | # download data and cache them
|
|---|
| 494 | script_class.download_and_cache()
|
|---|
| 495 |
|
|---|
| 496 | # read cached data and inject into CrossEPG database
|
|---|
| 497 | script_class.process_cache()
|
|---|
| 498 |
|
|---|