# Name: RB Axtell # Class: CSC 250 # Contents: functions and answers for relab # Date: Februrary 10, 2010 import re ###################################################### # Question 1: def question1(): a = r'<([A-Z][A-Z0-9]*)\b[^>]*><([A-Z][A-Z0-9]*)\b[^>]*>(.*?)' # Backreferences match two sets of html tags b = r' The spring 2010 foundations class' c = r'

The spring 2010 foundations class

' m = re.search(a,b,re.IGNORECASE) n = re.search(a,c,re.IGNORECASE) print m.groups() # The outer html tag, inner html tag, and contents of b print n.groups() # The other html tag, inner html tag, and contents of c ###################################################### # Question 2: # 1) # \b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b # \b is a word boundry # \d{1,3} is a number of 1 to 3 digits # \. is a period because . represents any character in RE # This RE looks for a word boundry, # followed by 4 1 to 3 digit numbers separated by periods, # followed by another word boundry # 2) # \b(25[0-5]|2[0-4][0-9]|[01]?[0-9}[0-9]?)\.(25[0-5]|2[0-4][0-9][01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9][01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9][01]?[0-9][0-9]?)\b # 25[0-5] is 25, followed by any digit 0-5 (250,251,...,255) # 2[0-4][0-9] is 2, followed by any digit 0-4, followed by any digit 0-9 (200,201,...,249) # [01]?[0-9][0-9]? is 0 or 1 0 or 1 times, followed by any digit 0-9, followed by any digit 0-9 0 or 1 times # (0,1,...,10,11,...,199) and (00,01,...,10,11,...,199) and (000,001,...,010,011,...199) # Those three with | (or) will produce any number of 1 to 3 digits up to 255 # This RE looks for a word boundry, # followed by 4 1 to 3 digit numbers from 0-255 separated by periods, # followed by another word boundry # 3) # \b(?:(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9}[0-9]?)\.){3}(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9}[0-9]?)\b # (?: Will match to whatever RE is inside, but that substring cannot be retreived after match # (25[0-5]|2[0-4][0-9]|[01]?[0-9}[0-9]?)\.){3} are any 3 1 to 3 digit number up to 255 followed by periods # This RE looks for a word boundry, # followed by 4 1 to 3 digit numbers from 0-255 separated by periods, # followed by another word boundry ###################################################### # Question 3: def question3(): a = r'^(?=.*?\bmelody\b)(?=.*?\bsimilarity\b)(?=.*?\bcomputer\b).*$' b = r'The melody has a similarity to that computer!' # All three words in the given order c = r'The computer has a melody that has some similarity too.' # All three words in a different order d = r'The melody and the computer.' # Only two of the words m = re.search(a,b,re.IGNORECASE) n = re.search(a,c,re.IGNORECASE) o = re.search(a,d,re.IGNORECASE) print m.group(0) # The sentence a print n.group(0) # The sentence b print o.group(0) # Will cause an error because not all three words are present # ^ is the beginning of a string # (?= looks ahead so it can match multiple in different orders than the given order # .*? looks for some or no characters before the specified word # \bmelody\b looks for melody with a word boundry on either side # $ is the end of a string # This RE looks for a whole line containing all three specified words in any order