0001 """Strptime-related classes and functions. 0002 0003 CLASSES: 0004 LocaleTime -- Discovers and stores locale-specific time information 0005 TimeRE -- Creates regexes for pattern matching a string of text containing 0006 time information 0007 0008 FUNCTIONS: 0009 _getlang -- Figure out what language is being used for the locale 0010 strptime -- Calculates the time struct represented by the passed-in string 0011 0012 """ 0013 import time 0014 import locale 0015 import calendar 0016 from re import compile as re_compile 0017 from re import IGNORECASE 0018 from re import escape as re_escape 0019 from datetime import date as datetime_date 0020 try: 0021 from thread import allocate_lock as _thread_allocate_lock 0022 except: 0023 from dummy_thread import allocate_lock as _thread_allocate_lock 0024 0025 __author__ = "Brett Cannon" 0026 __email__ = "brett@python.org" 0027 0028 __all__ = ['strptime'] 0029 0030 def _getlang(): 0031 # Figure out what the current language is set to. 0032 return locale.getlocale(locale.LC_TIME) 0033 0034 class LocaleTime(object): 0035 """Stores and handles locale-specific information related to time. 0036 0037 ATTRIBUTES: 0038 f_weekday -- full weekday names (7-item list) 0039 a_weekday -- abbreviated weekday names (7-item list) 0040 f_month -- full month names (13-item list; dummy value in [0], which 0041 is added by code) 0042 a_month -- abbreviated month names (13-item list, dummy value in 0043 [0], which is added by code) 0044 am_pm -- AM/PM representation (2-item list) 0045 LC_date_time -- format string for date/time representation (string) 0046 LC_date -- format string for date representation (string) 0047 LC_time -- format string for time representation (string) 0048 timezone -- daylight- and non-daylight-savings timezone representation 0049 (2-item list of sets) 0050 lang -- Language used by instance (2-item tuple) 0051 """ 0052 0053 def __init__(self): 0054 """Set all attributes. 0055 0056 Order of methods called matters for dependency reasons. 0057 0058 The locale language is set at the offset and then checked again before 0059 exiting. This is to make sure that the attributes were not set with a 0060 mix of information from more than one locale. This would most likely 0061 happen when using threads where one thread calls a locale-dependent 0062 function while another thread changes the locale while the function in 0063 the other thread is still running. Proper coding would call for 0064 locks to prevent changing the locale while locale-dependent code is 0065 running. The check here is done in case someone does not think about 0066 doing this. 0067 0068 Only other possible issue is if someone changed the timezone and did 0069 not call tz.tzset . That is an issue for the programmer, though, 0070 since changing the timezone is worthless without that call. 0071 0072 """ 0073 self.lang = _getlang() 0074 self.__calc_weekday() 0075 self.__calc_month() 0076 self.__calc_am_pm() 0077 self.__calc_timezone() 0078 self.__calc_date_time() 0079 if _getlang() != self.lang: 0080 raise ValueError("locale changed during initialization") 0081 0082 def __pad(self, seq, front): 0083 # Add '' to seq to either the front (is True), else the back. 0084 seq = list(seq) 0085 if front: 0086 seq.insert(0, '') 0087 else: 0088 seq.append('') 0089 return seq 0090 0091 def __calc_weekday(self): 0092 # Set self.a_weekday and self.f_weekday using the calendar 0093 # module. 0094 a_weekday = [calendar.day_abbr[i].lower() for i in range(7)] 0095 f_weekday = [calendar.day_name[i].lower() for i in range(7)] 0096 self.a_weekday = a_weekday 0097 self.f_weekday = f_weekday 0098 0099 def __calc_month(self): 0100 # Set self.f_month and self.a_month using the calendar module. 0101 a_month = [calendar.month_abbr[i].lower() for i in range(13)] 0102 f_month = [calendar.month_name[i].lower() for i in range(13)] 0103 self.a_month = a_month 0104 self.f_month = f_month 0105 0106 def __calc_am_pm(self): 0107 # Set self.am_pm by using time.strftime(). 0108 0109 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that 0110 # magical; just happened to have used it everywhere else where a 0111 # static date was needed. 0112 am_pm = [] 0113 for hour in (01,22): 0114 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) 0115 am_pm.append(time.strftime("%p", time_tuple).lower()) 0116 self.am_pm = am_pm 0117 0118 def __calc_date_time(self): 0119 # Set self.date_time, self.date, & self.time by using 0120 # time.strftime(). 0121 0122 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of 0123 # overloaded numbers is minimized. The order in which searches for 0124 # values within the format string is very important; it eliminates 0125 # possible ambiguity for what something represents. 0126 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) 0127 date_time = [None, None, None] 0128 date_time[0] = time.strftime("%c", time_tuple).lower() 0129 date_time[1] = time.strftime("%x", time_tuple).lower() 0130 date_time[2] = time.strftime("%X", time_tuple).lower() 0131 replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), 0132 (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), 0133 (self.a_month[3], '%b'), (self.am_pm[1], '%p'), 0134 ('1999', '%Y'), ('99', '%y'), ('22', '%H'), 0135 ('44', '%M'), ('55', '%S'), ('76', '%j'), 0136 ('17', '%d'), ('03', '%m'), ('3', '%m'), 0137 # '3' needed for when no leading zero. 0138 ('2', '%w'), ('10', '%I')] 0139 replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone 0140 for tz in tz_values]) 0141 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): 0142 current_format = date_time[offset] 0143 for old, new in replacement_pairs: 0144 # Must deal with possible lack of locale info 0145 # manifesting itself as the empty string (e.g., Swedish's 0146 # lack of AM/PM info) or a platform returning a tuple of empty 0147 # strings (e.g., MacOS 9 having timezone as ('','')). 0148 if old: 0149 current_format = current_format.replace(old, new) 0150 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0)) 0151 if time.strftime(directive, time_tuple).find('00'): 0152 U_W = '%U' 0153 else: 0154 U_W = '%W' 0155 date_time[offset] = current_format.replace('11', U_W) 0156 self.LC_date_time = date_time[0] 0157 self.LC_date = date_time[1] 0158 self.LC_time = date_time[2] 0159 0160 def __calc_timezone(self): 0161 # Set self.timezone by using time.tzname. 0162 # Do not worry about possibility of time.tzname[0] == timetzname[1] 0163 # and time.daylight; handle that in strptime . 0164 try: 0165 time.tzset() 0166 except AttributeError: 0167 pass 0168 no_saving = frozenset(["utc", "gmt", time.tzname[0].lower()]) 0169 if time.daylight: 0170 has_saving = frozenset([time.tzname[1].lower()]) 0171 else: 0172 has_saving = frozenset() 0173 self.timezone = (no_saving, has_saving) 0174 0175 0176 class TimeRE(dict): 0177 """Handle conversion from format directives to regexes.""" 0178 0179 def __init__(self, locale_time=None): 0180 """Create keys/values. 0181 0182 Order of execution is important for dependency reasons. 0183 0184 """ 0185 if locale_time: 0186 self.locale_time = locale_time 0187 else: 0188 self.locale_time = LocaleTime() 0189 base = super(TimeRE, self) 0190 base.__init__({ 0191 # The " \d" part of the regex is to make %c from ANSI C work 0192 'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 0193 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)", 0194 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])", 0195 'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 0196 'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])", 0197 'M': r"(?P<M>[0-5]\d|\d)", 0198 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)", 0199 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)", 0200 'w': r"(?P<w>[0-6])", 0201 # W is set below by using 'U' 0202 'y': r"(?P<y>\d\d)", 0203 #XXX: Does 'Y' need to worry about having less or more than 0204 # 4 digits? 0205 'Y': r"(?P<Y>\d\d\d\d)", 0206 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 0207 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), 0208 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), 0209 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), 0210 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), 0211 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone 0212 for tz in tz_names), 0213 'Z'), 0214 '%': '%'}) 0215 base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) 0216 base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) 0217 base.__setitem__('x', self.pattern(self.locale_time.LC_date)) 0218 base.__setitem__('X', self.pattern(self.locale_time.LC_time)) 0219 0220 def __seqToRE(self, to_convert, directive): 0221 """Convert a list to a regex string for matching a directive. 0222 0223 Want possible matching values to be from longest to shortest. This 0224 prevents the possibility of a match occuring for a value that also 0225 a substring of a larger value that should have matched (e.g., 'abc' 0226 matching when 'abcdef' should have been the match). 0227 0228 """ 0229 to_convert = sorted(to_convert, key=len, reverse=True) 0230 for value in to_convert: 0231 if value != '': 0232 break 0233 else: 0234 return '' 0235 regex = '|'.join(re_escape(stuff) for stuff in to_convert) 0236 regex = '(?P<%s>%s' % (directive, regex) 0237 return '%s)' % regex 0238 0239 def pattern(self, format): 0240 """Return regex pattern for the format string. 0241 0242 Need to make sure that any characters that might be interpreted as 0243 regex syntax are escaped. 0244 0245 """ 0246 processed_format = '' 0247 # The sub() call escapes all characters that might be misconstrued 0248 # as regex syntax. Cannot use re.escape since we have to deal with 0249 # format directives (%m, etc.). 0250 regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") 0251 format = regex_chars.sub(r"\\\1", format) 0252 whitespace_replacement = re_compile('\s+') 0253 format = whitespace_replacement.sub('\s*', format) 0254 while '%' in format: 0255 directive_index = format.index('%')+1 0256 processed_format = "%s%s%s" % (processed_format, 0257 format[:directive_index-1], 0258 self[format[directive_index]]) 0259 format = format[directive_index+1:] 0260 return "%s%s" % (processed_format, format) 0261 0262 def compile(self, format): 0263 """Return a compiled re object for the format string.""" 0264 return re_compile(self.pattern(format), IGNORECASE) 0265 0266 _cache_lock = _thread_allocate_lock() 0267 # DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock 0268 # first! 0269 _TimeRE_cache = TimeRE() 0270 _CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache 0271 _regex_cache = {} 0272 0273 def strptime(data_string, format="%a %b %d %H:%M:%S %Y"): 0274 """Return a time struct based on the input string and the format string.""" 0275 global _TimeRE_cache 0276 _cache_lock.acquire() 0277 try: 0278 time_re = _TimeRE_cache 0279 locale_time = time_re.locale_time 0280 if _getlang() != locale_time.lang: 0281 _TimeRE_cache = TimeRE() 0282 if len(_regex_cache) > _CACHE_MAX_SIZE: 0283 _regex_cache.clear() 0284 format_regex = _regex_cache.get(format) 0285 if not format_regex: 0286 format_regex = time_re.compile(format) 0287 _regex_cache[format] = format_regex 0288 finally: 0289 _cache_lock.release() 0290 found = format_regex.match(data_string) 0291 if not found: 0292 raise ValueError("time data did not match format: data=%s fmt=%s" % 0293 (data_string, format)) 0294 if len(data_string) != found.end(): 0295 raise ValueError("unconverted data remains: %s" % 0296 data_string[found.end():]) 0297 year = 1900 0298 month = day = 1 0299 hour = minute = second = 0 0300 tz = -1 0301 # Default to -1 to signify that values not known; not critical to have, 0302 # though 0303 week_of_year = -1 0304 week_of_year_start = -1 0305 # weekday and julian defaulted to -1 so as to signal need to calculate 0306 # values 0307 weekday = julian = -1 0308 found_dict = found.groupdict() 0309 for group_key in found_dict.iterkeys(): 0310 # Directives not explicitly handled below: 0311 # c, x, X 0312 # handled by making out of other directives 0313 # U, W 0314 # worthless without day of the week 0315 if group_key == 'y': 0316 year = int(found_dict['y']) 0317 # Open Group specification for strptime() states that a %y 0318 #value in the range of [00, 68] is in the century 2000, while 0319 #[69,99] is in the century 1900 0320 if year <= 68: 0321 year += 2000 0322 else: 0323 year += 1900 0324 elif group_key == 'Y': 0325 year = int(found_dict['Y']) 0326 elif group_key == 'm': 0327 month = int(found_dict['m']) 0328 elif group_key == 'B': 0329 month = locale_time.f_month.index(found_dict['B'].lower()) 0330 elif group_key == 'b': 0331 month = locale_time.a_month.index(found_dict['b'].lower()) 0332 elif group_key == 'd': 0333 day = int(found_dict['d']) 0334 elif group_key == 'H': 0335 hour = int(found_dict['H']) 0336 elif group_key == 'I': 0337 hour = int(found_dict['I']) 0338 ampm = found_dict.get('p', '').lower() 0339 # If there was no AM/PM indicator, we'll treat this like AM 0340 if ampm in ('', locale_time.am_pm[0]): 0341 # We're in AM so the hour is correct unless we're 0342 # looking at 12 midnight. 0343 # 12 midnight == 12 AM == hour 0 0344 if hour == 12: 0345 hour = 0 0346 elif ampm == locale_time.am_pm[1]: 0347 # We're in PM so we need to add 12 to the hour unless 0348 # we're looking at 12 noon. 0349 # 12 noon == 12 PM == hour 12 0350 if hour != 12: 0351 hour += 12 0352 elif group_key == 'M': 0353 minute = int(found_dict['M']) 0354 elif group_key == 'S': 0355 second = int(found_dict['S']) 0356 elif group_key == 'A': 0357 weekday = locale_time.f_weekday.index(found_dict['A'].lower()) 0358 elif group_key == 'a': 0359 weekday = locale_time.a_weekday.index(found_dict['a'].lower()) 0360 elif group_key == 'w': 0361 weekday = int(found_dict['w']) 0362 if weekday == 0: 0363 weekday = 6 0364 else: 0365 weekday -= 1 0366 elif group_key == 'j': 0367 julian = int(found_dict['j']) 0368 elif group_key in ('U', 'W'): 0369 week_of_year = int(found_dict[group_key]) 0370 if group_key == 'U': 0371 # U starts week on Sunday 0372 week_of_year_start = 6 0373 else: 0374 # W starts week on Monday 0375 week_of_year_start = 0 0376 elif group_key == 'Z': 0377 # Since -1 is default value only need to worry about setting tz if 0378 # it can be something other than -1. 0379 found_zone = found_dict['Z'].lower() 0380 for value, tz_values in enumerate(locale_time.timezone): 0381 if found_zone in tz_values: 0382 # Deal with bad locale setup where timezone names are the 0383 # same and yet time.daylight is true; too ambiguous to 0384 # be able to tell what timezone has daylight savings 0385 if (time.tzname[0] == time.tzname[1] and 0386 time.daylight and found_zone not in ("utc", "gmt")): 0387 break 0388 else: 0389 tz = value 0390 break 0391 # If we know the week of the year and what day of that week, we can figure 0392 # out the Julian day of the year 0393 # Calculations below assume 0 is a Monday 0394 if julian == -1 and week_of_year != -1 and weekday != -1: 0395 # Calculate how many days in week 0 0396 first_weekday = datetime_date(year, 1, 1).weekday() 0397 preceeding_days = 7 - first_weekday 0398 if preceeding_days == 7: 0399 preceeding_days = 0 0400 # Adjust for U directive so that calculations are not dependent on 0401 # directive used to figure out week of year 0402 if weekday == 6 and week_of_year_start == 6: 0403 week_of_year -= 1 0404 # If a year starts and ends on a Monday but a week is specified to 0405 # start on a Sunday we need to up the week to counter-balance the fact 0406 # that with %W that first Monday starts week 1 while with %U that is 0407 # week 0 and thus shifts everything by a week 0408 if weekday == 0 and first_weekday == 0 and week_of_year_start == 6: 0409 week_of_year += 1 0410 # If in week 0, then just figure out how many days from Jan 1 to day of 0411 # week specified, else calculate by multiplying week of year by 7, 0412 # adding in days in week 0, and the number of days from Monday to the 0413 # day of the week 0414 if week_of_year == 0: 0415 julian = 1 + weekday - first_weekday 0416 else: 0417 days_to_week = preceeding_days + (7 * (week_of_year - 1)) 0418 julian = 1 + days_to_week + weekday 0419 # Cannot pre-calculate datetime_date() since can change in Julian 0420 #calculation and thus could have different value for the day of the week 0421 #calculation 0422 if julian == -1: 0423 # Need to add 1 to result since first day of the year is 1, not 0. 0424 julian = datetime_date(year, month, day).toordinal() - \ 0425 datetime_date(year, 1, 1).toordinal() + 1 0426 else: # Assume that if they bothered to include Julian day it will 0427 #be accurate 0428 datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal()) 0429 year = datetime_result.year 0430 month = datetime_result.month 0431 day = datetime_result.day 0432 if weekday == -1: 0433 weekday = datetime_date(year, month, day).weekday() 0434 return time.struct_time((year, month, day, 0435 hour, minute, second, 0436 weekday, julian, tz)) 0437
Generated by PyXR 0.9.4