PyXR

c:\python24\lib \ _strptime.py



0001 """Strptime-related classes and functions.
0002 
0003 CLASSES:
0004     LocaleTime -- Discovers and stores locale-specific time information
0005     TimeRE -- Creates regexes for pattern matching a string of text containing
0006                 time information
0007 
0008 FUNCTIONS:
0009     _getlang -- Figure out what language is being used for the locale
0010     strptime -- Calculates the time struct represented by the passed-in string
0011 
0012 """
0013 import time
0014 import locale
0015 import calendar
0016 from re import compile as re_compile
0017 from re import IGNORECASE
0018 from re import escape as re_escape
0019 from datetime import date as datetime_date
0020 try:
0021     from thread import allocate_lock as _thread_allocate_lock
0022 except:
0023     from dummy_thread import allocate_lock as _thread_allocate_lock
0024 
0025 __author__ = "Brett Cannon"
0026 __email__ = "brett@python.org"
0027 
0028 __all__ = ['strptime']
0029 
0030 def _getlang():
0031     # Figure out what the current language is set to.
0032     return locale.getlocale(locale.LC_TIME)
0033 
0034 class LocaleTime(object):
0035     """Stores and handles locale-specific information related to time.
0036 
0037     ATTRIBUTES:
0038         f_weekday -- full weekday names (7-item list)
0039         a_weekday -- abbreviated weekday names (7-item list)
0040         f_month -- full month names (13-item list; dummy value in [0], which
0041                     is added by code)
0042         a_month -- abbreviated month names (13-item list, dummy value in
0043                     [0], which is added by code)
0044         am_pm -- AM/PM representation (2-item list)
0045         LC_date_time -- format string for date/time representation (string)
0046         LC_date -- format string for date representation (string)
0047         LC_time -- format string for time representation (string)
0048         timezone -- daylight- and non-daylight-savings timezone representation
0049                     (2-item list of sets)
0050         lang -- Language used by instance (2-item tuple)
0051     """
0052 
0053     def __init__(self):
0054         """Set all attributes.
0055 
0056         Order of methods called matters for dependency reasons.
0057 
0058         The locale language is set at the offset and then checked again before
0059         exiting.  This is to make sure that the attributes were not set with a
0060         mix of information from more than one locale.  This would most likely
0061         happen when using threads where one thread calls a locale-dependent
0062         function while another thread changes the locale while the function in
0063         the other thread is still running.  Proper coding would call for
0064         locks to prevent changing the locale while locale-dependent code is
0065         running.  The check here is done in case someone does not think about
0066         doing this.
0067 
0068         Only other possible issue is if someone changed the timezone and did
0069         not call tz.tzset .  That is an issue for the programmer, though,
0070         since changing the timezone is worthless without that call.
0071 
0072         """
0073         self.lang = _getlang()
0074         self.__calc_weekday()
0075         self.__calc_month()
0076         self.__calc_am_pm()
0077         self.__calc_timezone()
0078         self.__calc_date_time()
0079         if _getlang() != self.lang:
0080             raise ValueError("locale changed during initialization")
0081 
0082     def __pad(self, seq, front):
0083         # Add '' to seq to either the front (is True), else the back.
0084         seq = list(seq)
0085         if front:
0086             seq.insert(0, '')
0087         else:
0088             seq.append('')
0089         return seq
0090 
0091     def __calc_weekday(self):
0092         # Set self.a_weekday and self.f_weekday using the calendar
0093         # module.
0094         a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
0095         f_weekday = [calendar.day_name[i].lower() for i in range(7)]
0096         self.a_weekday = a_weekday
0097         self.f_weekday = f_weekday
0098 
0099     def __calc_month(self):
0100         # Set self.f_month and self.a_month using the calendar module.
0101         a_month = [calendar.month_abbr[i].lower() for i in range(13)]
0102         f_month = [calendar.month_name[i].lower() for i in range(13)]
0103         self.a_month = a_month
0104         self.f_month = f_month
0105 
0106     def __calc_am_pm(self):
0107         # Set self.am_pm by using time.strftime().
0108 
0109         # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
0110         # magical; just happened to have used it everywhere else where a
0111         # static date was needed.
0112         am_pm = []
0113         for hour in (01,22):
0114             time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
0115             am_pm.append(time.strftime("%p", time_tuple).lower())
0116         self.am_pm = am_pm
0117 
0118     def __calc_date_time(self):
0119         # Set self.date_time, self.date, & self.time by using
0120         # time.strftime().
0121 
0122         # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
0123         # overloaded numbers is minimized.  The order in which searches for
0124         # values within the format string is very important; it eliminates
0125         # possible ambiguity for what something represents.
0126         time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
0127         date_time = [None, None, None]
0128         date_time[0] = time.strftime("%c", time_tuple).lower()
0129         date_time[1] = time.strftime("%x", time_tuple).lower()
0130         date_time[2] = time.strftime("%X", time_tuple).lower()
0131         replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
0132                     (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
0133                     (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
0134                     ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
0135                     ('44', '%M'), ('55', '%S'), ('76', '%j'),
0136                     ('17', '%d'), ('03', '%m'), ('3', '%m'),
0137                     # '3' needed for when no leading zero.
0138                     ('2', '%w'), ('10', '%I')]
0139         replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
0140                                                 for tz in tz_values])
0141         for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
0142             current_format = date_time[offset]
0143             for old, new in replacement_pairs:
0144                 # Must deal with possible lack of locale info
0145                 # manifesting itself as the empty string (e.g., Swedish's
0146                 # lack of AM/PM info) or a platform returning a tuple of empty
0147                 # strings (e.g., MacOS 9 having timezone as ('','')).
0148                 if old:
0149                     current_format = current_format.replace(old, new)
0150             time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
0151             if time.strftime(directive, time_tuple).find('00'):
0152                 U_W = '%U'
0153             else:
0154                 U_W = '%W'
0155             date_time[offset] = current_format.replace('11', U_W)
0156         self.LC_date_time = date_time[0]
0157         self.LC_date = date_time[1]
0158         self.LC_time = date_time[2]
0159 
0160     def __calc_timezone(self):
0161         # Set self.timezone by using time.tzname.
0162         # Do not worry about possibility of time.tzname[0] == timetzname[1]
0163         # and time.daylight; handle that in strptime .
0164         try:
0165             time.tzset()
0166         except AttributeError:
0167             pass
0168         no_saving = frozenset(["utc", "gmt", time.tzname[0].lower()])
0169         if time.daylight:
0170             has_saving = frozenset([time.tzname[1].lower()])
0171         else:
0172             has_saving = frozenset()
0173         self.timezone = (no_saving, has_saving)
0174 
0175 
0176 class TimeRE(dict):
0177     """Handle conversion from format directives to regexes."""
0178 
0179     def __init__(self, locale_time=None):
0180         """Create keys/values.
0181 
0182         Order of execution is important for dependency reasons.
0183 
0184         """
0185         if locale_time:
0186             self.locale_time = locale_time
0187         else:
0188             self.locale_time = LocaleTime()
0189         base = super(TimeRE, self)
0190         base.__init__({
0191             # The " \d" part of the regex is to make %c from ANSI C work
0192             'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
0193             'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
0194             'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
0195             'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
0196             'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
0197             'M': r"(?P<M>[0-5]\d|\d)",
0198             'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
0199             'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
0200             'w': r"(?P<w>[0-6])",
0201             # W is set below by using 'U'
0202             'y': r"(?P<y>\d\d)",
0203             #XXX: Does 'Y' need to worry about having less or more than
0204             #     4 digits?
0205             'Y': r"(?P<Y>\d\d\d\d)",
0206             'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
0207             'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
0208             'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
0209             'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
0210             'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
0211             'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
0212                                         for tz in tz_names),
0213                                 'Z'),
0214             '%': '%'})
0215         base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
0216         base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
0217         base.__setitem__('x', self.pattern(self.locale_time.LC_date))
0218         base.__setitem__('X', self.pattern(self.locale_time.LC_time))
0219 
0220     def __seqToRE(self, to_convert, directive):
0221         """Convert a list to a regex string for matching a directive.
0222 
0223         Want possible matching values to be from longest to shortest.  This
0224         prevents the possibility of a match occuring for a value that also
0225         a substring of a larger value that should have matched (e.g., 'abc'
0226         matching when 'abcdef' should have been the match).
0227 
0228         """
0229         to_convert = sorted(to_convert, key=len, reverse=True)
0230         for value in to_convert:
0231             if value != '':
0232                 break
0233         else:
0234             return ''
0235         regex = '|'.join(re_escape(stuff) for stuff in to_convert)
0236         regex = '(?P<%s>%s' % (directive, regex)
0237         return '%s)' % regex
0238 
0239     def pattern(self, format):
0240         """Return regex pattern for the format string.
0241 
0242         Need to make sure that any characters that might be interpreted as
0243         regex syntax are escaped.
0244 
0245         """
0246         processed_format = ''
0247         # The sub() call escapes all characters that might be misconstrued
0248         # as regex syntax.  Cannot use re.escape since we have to deal with
0249         # format directives (%m, etc.).
0250         regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
0251         format = regex_chars.sub(r"\\\1", format)
0252         whitespace_replacement = re_compile('\s+')
0253         format = whitespace_replacement.sub('\s*', format)
0254         while '%' in format:
0255             directive_index = format.index('%')+1
0256             processed_format = "%s%s%s" % (processed_format,
0257                                            format[:directive_index-1],
0258                                            self[format[directive_index]])
0259             format = format[directive_index+1:]
0260         return "%s%s" % (processed_format, format)
0261 
0262     def compile(self, format):
0263         """Return a compiled re object for the format string."""
0264         return re_compile(self.pattern(format), IGNORECASE)
0265 
0266 _cache_lock = _thread_allocate_lock()
0267 # DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
0268 # first!
0269 _TimeRE_cache = TimeRE()
0270 _CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
0271 _regex_cache = {}
0272 
0273 def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
0274     """Return a time struct based on the input string and the format string."""
0275     global _TimeRE_cache
0276     _cache_lock.acquire()
0277     try:
0278         time_re = _TimeRE_cache
0279         locale_time = time_re.locale_time
0280         if _getlang() != locale_time.lang:
0281             _TimeRE_cache = TimeRE()
0282         if len(_regex_cache) > _CACHE_MAX_SIZE:
0283             _regex_cache.clear()
0284         format_regex = _regex_cache.get(format)
0285         if not format_regex:
0286             format_regex = time_re.compile(format)
0287             _regex_cache[format] = format_regex
0288     finally:
0289         _cache_lock.release()
0290     found = format_regex.match(data_string)
0291     if not found:
0292         raise ValueError("time data did not match format:  data=%s  fmt=%s" %
0293                          (data_string, format))
0294     if len(data_string) != found.end():
0295         raise ValueError("unconverted data remains: %s" %
0296                           data_string[found.end():])
0297     year = 1900
0298     month = day = 1
0299     hour = minute = second = 0
0300     tz = -1
0301     # Default to -1 to signify that values not known; not critical to have,
0302     # though
0303     week_of_year = -1
0304     week_of_year_start = -1
0305     # weekday and julian defaulted to -1 so as to signal need to calculate
0306     # values
0307     weekday = julian = -1
0308     found_dict = found.groupdict()
0309     for group_key in found_dict.iterkeys():
0310         # Directives not explicitly handled below:
0311         #   c, x, X
0312         #      handled by making out of other directives
0313         #   U, W
0314         #      worthless without day of the week
0315         if group_key == 'y':
0316             year = int(found_dict['y'])
0317             # Open Group specification for strptime() states that a %y
0318             #value in the range of [00, 68] is in the century 2000, while
0319             #[69,99] is in the century 1900
0320             if year <= 68:
0321                 year += 2000
0322             else:
0323                 year += 1900
0324         elif group_key == 'Y':
0325             year = int(found_dict['Y'])
0326         elif group_key == 'm':
0327             month = int(found_dict['m'])
0328         elif group_key == 'B':
0329             month = locale_time.f_month.index(found_dict['B'].lower())
0330         elif group_key == 'b':
0331             month = locale_time.a_month.index(found_dict['b'].lower())
0332         elif group_key == 'd':
0333             day = int(found_dict['d'])
0334         elif group_key == 'H':
0335             hour = int(found_dict['H'])
0336         elif group_key == 'I':
0337             hour = int(found_dict['I'])
0338             ampm = found_dict.get('p', '').lower()
0339             # If there was no AM/PM indicator, we'll treat this like AM
0340             if ampm in ('', locale_time.am_pm[0]):
0341                 # We're in AM so the hour is correct unless we're
0342                 # looking at 12 midnight.
0343                 # 12 midnight == 12 AM == hour 0
0344                 if hour == 12:
0345                     hour = 0
0346             elif ampm == locale_time.am_pm[1]:
0347                 # We're in PM so we need to add 12 to the hour unless
0348                 # we're looking at 12 noon.
0349                 # 12 noon == 12 PM == hour 12
0350                 if hour != 12:
0351                     hour += 12
0352         elif group_key == 'M':
0353             minute = int(found_dict['M'])
0354         elif group_key == 'S':
0355             second = int(found_dict['S'])
0356         elif group_key == 'A':
0357             weekday = locale_time.f_weekday.index(found_dict['A'].lower())
0358         elif group_key == 'a':
0359             weekday = locale_time.a_weekday.index(found_dict['a'].lower())
0360         elif group_key == 'w':
0361             weekday = int(found_dict['w'])
0362             if weekday == 0:
0363                 weekday = 6
0364             else:
0365                 weekday -= 1
0366         elif group_key == 'j':
0367             julian = int(found_dict['j'])
0368         elif group_key in ('U', 'W'):
0369             week_of_year = int(found_dict[group_key])
0370             if group_key == 'U':
0371                 # U starts week on Sunday
0372                 week_of_year_start = 6
0373             else:
0374                 # W starts week on Monday
0375                 week_of_year_start = 0
0376         elif group_key == 'Z':
0377             # Since -1 is default value only need to worry about setting tz if
0378             # it can be something other than -1.
0379             found_zone = found_dict['Z'].lower()
0380             for value, tz_values in enumerate(locale_time.timezone):
0381                 if found_zone in tz_values:
0382                     # Deal with bad locale setup where timezone names are the
0383                     # same and yet time.daylight is true; too ambiguous to
0384                     # be able to tell what timezone has daylight savings
0385                     if (time.tzname[0] == time.tzname[1] and
0386                        time.daylight and found_zone not in ("utc", "gmt")):
0387                         break
0388                     else:
0389                         tz = value
0390                         break
0391     # If we know the week of the year and what day of that week, we can figure
0392     # out the Julian day of the year
0393     # Calculations below assume 0 is a Monday
0394     if julian == -1 and week_of_year != -1 and weekday != -1:
0395         # Calculate how many days in week 0
0396         first_weekday = datetime_date(year, 1, 1).weekday()
0397         preceeding_days = 7 - first_weekday
0398         if preceeding_days == 7:
0399             preceeding_days = 0
0400         # Adjust for U directive so that calculations are not dependent on
0401         # directive used to figure out week of year
0402         if weekday == 6 and week_of_year_start == 6:
0403             week_of_year -= 1
0404         # If a year starts and ends on a Monday but a week is specified to
0405         # start on a Sunday we need to up the week to counter-balance the fact
0406         # that with %W that first Monday starts week 1 while with %U that is
0407         # week 0 and thus shifts everything by a week
0408         if weekday == 0 and first_weekday == 0 and week_of_year_start == 6:
0409             week_of_year += 1
0410         # If in week 0, then just figure out how many days from Jan 1 to day of
0411         # week specified, else calculate by multiplying week of year by 7,
0412         # adding in days in week 0, and the number of days from Monday to the
0413         # day of the week
0414         if week_of_year == 0:
0415             julian = 1 + weekday - first_weekday
0416         else:
0417             days_to_week = preceeding_days + (7 * (week_of_year - 1))
0418             julian = 1 + days_to_week + weekday
0419     # Cannot pre-calculate datetime_date() since can change in Julian
0420     #calculation and thus could have different value for the day of the week
0421     #calculation
0422     if julian == -1:
0423         # Need to add 1 to result since first day of the year is 1, not 0.
0424         julian = datetime_date(year, month, day).toordinal() - \
0425                   datetime_date(year, 1, 1).toordinal() + 1
0426     else:  # Assume that if they bothered to include Julian day it will
0427            #be accurate
0428         datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal())
0429         year = datetime_result.year
0430         month = datetime_result.month
0431         day = datetime_result.day
0432     if weekday == -1:
0433         weekday = datetime_date(year, month, day).weekday()
0434     return time.struct_time((year, month, day,
0435                              hour, minute, second,
0436                              weekday, julian, tz))
0437 

Generated by PyXR 0.9.4
SourceForge.net Logo