diff --git a/python 2/koans/about_regex.py b/python 2/koans/about_regex.py index 600b82908..37504ccf1 100755 --- a/python 2/koans/about_regex.py +++ b/python 2/koans/about_regex.py @@ -6,7 +6,7 @@ class AboutRegex(Koan): """ This koans are based on the Ben's book: Regular Expressions in 10 minutes. - I found this books very useful so I decided to write a koans in order to practice everything I had learned from it. + I found this book very useful so I decided to write a koans in order to practice everything I had learned from it. http://www.forta.com/books/0672325667/ """ @@ -235,4 +235,199 @@ def using_metacharacters_matching_alphanumeric_characters(self): +"1_2-34R\n" \ +"BA_-2e3" - self.assertEquals( len(re.findall(__, string)), 5, "I want to find the ids") + self.assertEquals( len(re.findall(__, string)), 5, "I want to find the ids") + + def using_metacharacters_matching_whitespaces_and_nonwhitespace(self): + """ + Lesson 3 Using metacharacters + + Like with the digits you have metacharacters for specific whitespace character: + \s Any whitespace character(same as [\f\n\r\t\v]) + \S Any nonwhitespace character(same as [\f\n\r\t\v]) + Note [\b], the backspace metacharacter, is not included in \s or excluded by \S + + """ + f = open('koans/regex_whitespaces', 'r') + string = f.read() + #TIP: This text contains a text. Yo have to find all whitespaces + self.assertEquals( len(re.findall(__, string)), 7, "I want to find all whitespaces") + + def matching_one_or_more_characters(self): + """ + Lesson 4 + + To match one or more instances of a character (or set), simply append a +. + + matches at least one. + + When you use + with sets, the + should be placed outside the set. [0-9]+ + + """ + string = "For questions about the book use support@forta.com or" \ + " hola@flopezluis.es for questions about this koans" + mails = re.findall(__, string) + self.assertEquals(mails[0],"support@forta.com", "I want to find the first email") + self.assertEquals(mails[1],"hola@flopezluis.es", "I want to find the second email") + + def matching_one_or_more_characters_second(self): + """ + Lesson 4 + + The last example match the two addresses but the last pattern wouldn't + match correctly this address: ben.forta@forta.com + I'd match forta@forta.com + + To match one or more instances of a character (or set), simply append a +. + + matches at least one. + + When you use + with sets, the + should be placed outside the set. [0-9]+ + + """ + string = "For questions about the book use ben.support@forta.com or" \ + " hola@flopez.luis.es for questions about this koans" + #TIP: you must use sets + mails = re.findall(__, string) + self.assertEquals(mails[0],"ben.support@forta.com", "I want to find the first email") + self.assertEquals(mails[1],"hola@flopez.luis.es", "I want to find the second email") + + def matching_zero_or_more_characters(self): + """ + Lesson 4 + + To match zero or more instances of a character (or set), simply append a *. + + If in the last example We had had the next text: + hello .ben@forta.com is my email address. + The pattern [\w.]+@[\w.]+\.\w+ had match ".ben@forta.com", so you need to + match an alphanumeric text with optional aditional characters. + + """ + string = "hello .ben@forta.com is my email address." + + mails = re.findall(__, string) + self.assertEquals(mails[0],"ben@forta.com", "I want to find the email") + + def matching_zero_or_one_character(self): + """ + Lesson 4 + + To match zero or one instances of a character (or set), simply append a ?. + + """ + string = "The URL is http://www.forta.com/, to connect " \ + + "securely use https://www.forta.com/ instead" + self.assertEquals(len(re.findall(__,string)), 2, "I want to find the email") + + def using_intervals_exact_intervals(self): + """ + Lesson 4 + + To specify an exact number of matches, you place that number between {}. + {3} means match three instances of the previous character or set. + + We're going to use the same text that in test_using_multiple_ranges + Remember that in that case the pattern was: + #[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f] + """ + #TIP: now you only need to find one [0-9A-Fa-f] and the interval. + string = '
' + self.assertEquals(len(re.findall(__, string)),2, "I want to find all the colors in RGB") + + def using_intervals_range_intervals(self): + """ + Lesson 4 + + Intervals may also be used to specify a range of values. + {2,4} means match a minimum of 2 and a maximum of 4 + + """ + string = "4/8/03\n" \ + +"10-6-2004\n" \ + +"2/2/2\n" \ + +"01-01-01" + + self.assertEquals(len(re.findall("__", string)),3, "I want the correct dates.") + + def using_intervals_at_least(self): + """ + Lesson 4 + + We can specify the minimum of instances to be matched (without any maximun). + {2,} means match at least 2 instances + + """ + string = "1001: $496.80\n" \ + +"1002: $1290.69\n" \ + +"1003: $26.43\n" \ + +"1004: $613.42\n" \ + +"1005: $7.61\n" \ + +"1006: $414.90\n" \ + +"1007: $25.00" + #tip: Yo must match: several digits + colon + whitespace + $ + at least 3 digits + . + 2 digits (decimals) + self.assertEquals(len(re.findall("__", string)),4, "Search all orders valued at 100$ or more.") + + def using_intervals_preventing_over_mathing(self): + """ + Lesson 4 + + Consider this example. Text that follows is part of a Web Page. The regular expression needs to match + any text within tags. + text: This offer is not available to customers living in AK and HI + regex: <[Bb]>.*[Bb]> + Result: AK and HI + Instead of two matches, only one was found. the .* matched everything after the first until the last + so that the text AK and HI was matched. + The reason for this is that metacharacters such as * and + are greedy. They look for the greatest + possible match as opposed to the smallest. + The solution is to use lazy versions of these quantifiers(they are referred to as being lazy because + they match the fewest characters instead oj the most). + + Lazy quantifiers are defined by appending an ?: + *? + *? + {n,}? + """ + string = "This offer is not available to customers living in AK and HI" + + self.assertEquals(len(re.findall(__, string)),2, " The regular expression needs to match any text within tags.") + + def position_mathing_word_boundaries(self): + """ + Lesson 5 + + \b is used to match the start or end of a word + + """ + string = "The cat scattered his food all over the room" + m = re.search(__, string) + self.assertTrue(m and m.group(0) and m.group(0)== 'cat', "You must match the cat") + + def position_mathing_string_boundaries(self): + """ + Lesson 5 + + String boundaries are used to match patterns at the start or end of an entire string. + The string boundary metacharacters are ^ for start of string and $ for end of string. + + ^ is one of several metacharacters thar has multiple uses. It negates a set only if + in a set (enclosed within [ and ]) is the first character after the opening ]. + Outside of a set, and at the beginning of a pattern, ^ matches the start of string. + + This is a more difficult :) you have to type a pattern which matches the 2 expressions + """ + string = '\n' \ + + '