# Name: RB Axtell
# Class: CSC 250
# Contents: functions and answers for relab
# Date: Februrary 10, 2010
import re
######################################################
# Question 1:
def question1():
a = r'<([A-Z][A-Z0-9]*)\b[^>]*><([A-Z][A-Z0-9]*)\b[^>]*>(.*?)\2>\1>' # Backreferences match two sets of html tags
b = r'
The spring 2010 foundations class'
c = r' The spring 2010 foundations class
'
m = re.search(a,b,re.IGNORECASE)
n = re.search(a,c,re.IGNORECASE)
print m.groups() # The outer html tag, inner html tag, and contents of b
print n.groups() # The other html tag, inner html tag, and contents of c
######################################################
# Question 2:
# 1)
# \b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b
# \b is a word boundry
# \d{1,3} is a number of 1 to 3 digits
# \. is a period because . represents any character in RE
# This RE looks for a word boundry,
# followed by 4 1 to 3 digit numbers separated by periods,
# followed by another word boundry
# 2)
# \b(25[0-5]|2[0-4][0-9]|[01]?[0-9}[0-9]?)\.(25[0-5]|2[0-4][0-9][01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9][01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9][01]?[0-9][0-9]?)\b
# 25[0-5] is 25, followed by any digit 0-5 (250,251,...,255)
# 2[0-4][0-9] is 2, followed by any digit 0-4, followed by any digit 0-9 (200,201,...,249)
# [01]?[0-9][0-9]? is 0 or 1 0 or 1 times, followed by any digit 0-9, followed by any digit 0-9 0 or 1 times
# (0,1,...,10,11,...,199) and (00,01,...,10,11,...,199) and (000,001,...,010,011,...199)
# Those three with | (or) will produce any number of 1 to 3 digits up to 255
# This RE looks for a word boundry,
# followed by 4 1 to 3 digit numbers from 0-255 separated by periods,
# followed by another word boundry
# 3)
# \b(?:(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9}[0-9]?)\.){3}(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9}[0-9]?)\b
# (?: Will match to whatever RE is inside, but that substring cannot be retreived after match
# (25[0-5]|2[0-4][0-9]|[01]?[0-9}[0-9]?)\.){3} are any 3 1 to 3 digit number up to 255 followed by periods
# This RE looks for a word boundry,
# followed by 4 1 to 3 digit numbers from 0-255 separated by periods,
# followed by another word boundry
######################################################
# Question 3:
def question3():
a = r'^(?=.*?\bmelody\b)(?=.*?\bsimilarity\b)(?=.*?\bcomputer\b).*$'
b = r'The melody has a similarity to that computer!' # All three words in the given order
c = r'The computer has a melody that has some similarity too.' # All three words in a different order
d = r'The melody and the computer.' # Only two of the words
m = re.search(a,b,re.IGNORECASE)
n = re.search(a,c,re.IGNORECASE)
o = re.search(a,d,re.IGNORECASE)
print m.group(0) # The sentence a
print n.group(0) # The sentence b
print o.group(0) # Will cause an error because not all three words are present
# ^ is the beginning of a string
# (?= looks ahead so it can match multiple in different orders than the given order
# .*? looks for some or no characters before the specified word
# \bmelody\b looks for melody with a word boundry on either side
# $ is the end of a string
# This RE looks for a whole line containing all three specified words in any order