From 7ee6d7dd7ed8cb4d7a397ef806f79f6c933fb6d7 Mon Sep 17 00:00:00 2001 From: flopezluis Date: Mon, 7 Feb 2011 09:38:14 +0100 Subject: [PATCH 1/9] added test for matching whitespaces --- python 2/koans/regex_whitespaces | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 python 2/koans/regex_whitespaces diff --git a/python 2/koans/regex_whitespaces b/python 2/koans/regex_whitespaces new file mode 100644 index 000000000..d832c1217 --- /dev/null +++ b/python 2/koans/regex_whitespaces @@ -0,0 +1,2 @@ +I'm Felix Lopez +I'm a developer From ac3f997f90002cdccc67d495d6a630291e4e4ad1 Mon Sep 17 00:00:00 2001 From: flopezluis Date: Mon, 7 Feb 2011 09:38:20 +0100 Subject: [PATCH 2/9] added test for matching whitespaces --- python 2/koans/about_regex.py | 20 ++++++++++++++++++-- python 2/koans/regex_solutions.txt | 4 +++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/python 2/koans/about_regex.py b/python 2/koans/about_regex.py index 600b82908..d800033d4 100755 --- a/python 2/koans/about_regex.py +++ b/python 2/koans/about_regex.py @@ -6,7 +6,7 @@ class AboutRegex(Koan): """ This koans are based on the Ben's book: Regular Expressions in 10 minutes. - I found this books very useful so I decided to write a koans in order to practice everything I had learned from it. + I found this book very useful so I decided to write a koans in order to practice everything I had learned from it. http://www.forta.com/books/0672325667/ """ @@ -235,4 +235,20 @@ def using_metacharacters_matching_alphanumeric_characters(self): +"1_2-34R\n" \ +"BA_-2e3" - self.assertEquals( len(re.findall(__, string)), 5, "I want to find the ids") + self.assertEquals( len(re.findall(__, string)), 5, "I want to find the ids") + + def using_metacharacters_matching_whitespaces_and_nonwhitespace(self): + """ + Lesson 3 Using metacharacters + + Like with the digits you have metacharacters for specific whitespace character: + \s Any whitespace character(same as [\f\n\r\t\v]) + \S Any nonwhitespace character(same as [\f\n\r\t\v]) + Note [\b], the backspace metacharacter, is not included in \s or excluded by \S + + """ + f = open('koans/regex_whitespaces', 'r') + string = f.read() + #TIP: This text contains a text. Yo have to find all whitespaces + self.assertEquals( len(re.findall(__, string)), 7, "I want to find all whitespaces") + diff --git a/python 2/koans/regex_solutions.txt b/python 2/koans/regex_solutions.txt index bacd966ad..676dd7c6b 100644 --- a/python 2/koans/regex_solutions.txt +++ b/python 2/koans/regex_solutions.txt @@ -25,4 +25,6 @@ using_metacharacters_macthing_white_spaces: using_metacharacters_matching_digits: myArray\[\d\] using_metacharacters_matching_alphanumeric_characters: - \w\w\w-\w\w\w + \w\w\w-\w\w\w +using_metacharacters_matching_whitespaces_and_nonwhitespace: + [\s] From 8efefe79b67c850cf231b4546bab6e4a5c4994b0 Mon Sep 17 00:00:00 2001 From: flopezluis Date: Fri, 11 Feb 2011 18:48:19 +0100 Subject: [PATCH 3/9] added matching ore or more chars --- python 2/koans/about_regex.py | 37 ++++++++++++++++++++++++++++++ python 2/koans/regex_solutions.txt | 6 ++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/python 2/koans/about_regex.py b/python 2/koans/about_regex.py index d800033d4..d2561988e 100755 --- a/python 2/koans/about_regex.py +++ b/python 2/koans/about_regex.py @@ -252,3 +252,40 @@ def using_metacharacters_matching_whitespaces_and_nonwhitespace(self): #TIP: This text contains a text. Yo have to find all whitespaces self.assertEquals( len(re.findall(__, string)), 7, "I want to find all whitespaces") + def matching_one_or_more_characters(self): + """ + Lesson 4 + + To match one or more instances of a character (or set), simply append a +. + + matches at least one. + + When you use + with sets, the + should be placed outside the set. [0-9]+ + + """ + string = "For questions about the book use support@forta.com or" \ + " hola@flopezluis.es for questions about this koans" + mails = re.findall(__, string) + self.assertEquals(mails[0],"support@forta.com", "I want to find the first email") + self.assertEquals(mails[1],"hola@flopezluis.es", "I want to find the second email") + + def matching_one_or_more_characters_second(self): + """ + Lesson 4 + + The last example match the two addresses but the last pattern wouldn't + match correctly this address: ben.forta@forta.com + I'd match forta@forta.com + + To match one or more instances of a character (or set), simply append a +. + + matches at least one. + + When you use + with sets, the + should be placed outside the set. [0-9]+ + + """ + string = "For questions about the book use ben.support@forta.com or" \ + " hola@flopez.luis.es for questions about this koans" + #TIP: you must use sets + mails = re.findall(__, string) + self.assertEquals(mails[0],"ben.support@forta.com", "I want to find the first email") + self.assertEquals(mails[1],"hola@flopez.luis.es", "I want to find the second email") + diff --git a/python 2/koans/regex_solutions.txt b/python 2/koans/regex_solutions.txt index 676dd7c6b..5bdddfb3c 100644 --- a/python 2/koans/regex_solutions.txt +++ b/python 2/koans/regex_solutions.txt @@ -27,4 +27,8 @@ using_metacharacters_matching_digits: using_metacharacters_matching_alphanumeric_characters: \w\w\w-\w\w\w using_metacharacters_matching_whitespaces_and_nonwhitespace: - [\s] + [\s] +matching_one_or_more_characters: + \w+@\w+\.\w+ +matching_one_or_more_characters_second: + [\w.]+@[\w.]+\.\w+" From d63608d36f66c39629f92d78def8afbd10ed07ce Mon Sep 17 00:00:00 2001 From: flopezluis Date: Mon, 14 Feb 2011 08:31:16 +0100 Subject: [PATCH 4/9] added zero or more and zero or one --- python 2/koans/about_regex.py | 27 +++++++++++++++++++++++++++ python 2/koans/regex_solutions.txt | 6 +++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/python 2/koans/about_regex.py b/python 2/koans/about_regex.py index d2561988e..7d1527e1a 100755 --- a/python 2/koans/about_regex.py +++ b/python 2/koans/about_regex.py @@ -289,3 +289,30 @@ def matching_one_or_more_characters_second(self): self.assertEquals(mails[0],"ben.support@forta.com", "I want to find the first email") self.assertEquals(mails[1],"hola@flopez.luis.es", "I want to find the second email") + def matching_zero_or_more_characters(self): + """ + Lesson 4 + + To match zero or more instances of a character (or set), simply append a *. + + If in the last example We had had the next text: + hello .ben@forta.com is my email address. + The pattern [\w.]+@[\w.]+\.\w+ had match ".ben@forta.com", so you need to + match an alphanumeric text with optional aditional characters. + + """ + string = "hello .ben@forta.com is my email address." + + mails = re.findall(__, string) + self.assertEquals(mails[0],"ben@forta.com", "I want to find the email") + + def matching_zero_or_one_character(self): + """ + Lesson 4 + + To match zero or one instances of a character (or set), simply append a ?. + + """ + string = "The URL is http://www.forta.com/, to connect " \ + + "securely use https://www.forta.com/ instead" + self.assertEquals(len(re.findall(__,string)), 2, "I want to find the email") diff --git a/python 2/koans/regex_solutions.txt b/python 2/koans/regex_solutions.txt index 5bdddfb3c..5b3fc1c4a 100644 --- a/python 2/koans/regex_solutions.txt +++ b/python 2/koans/regex_solutions.txt @@ -31,4 +31,8 @@ using_metacharacters_matching_whitespaces_and_nonwhitespace: matching_one_or_more_characters: \w+@\w+\.\w+ matching_one_or_more_characters_second: - [\w.]+@[\w.]+\.\w+" + [\w.]+@[\w.]+\.\w+ +matching_zero_or_more_characters: + \w+[\w.]*@[\w.]+\.\w+ +matching_zero_or_one_character: + https?://[\w./]+ From a981b4237b2ed609730d72f7dbd73eda2807abaf Mon Sep 17 00:00:00 2001 From: flopezluis Date: Mon, 14 Feb 2011 19:31:26 +0100 Subject: [PATCH 5/9] added ranges --- python 2/koans/about_regex.py | 30 ++++++++++++++++++++++++++++++ python 2/koans/regex_solutions.txt | 5 ++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/python 2/koans/about_regex.py b/python 2/koans/about_regex.py index 7d1527e1a..532ecd563 100755 --- a/python 2/koans/about_regex.py +++ b/python 2/koans/about_regex.py @@ -316,3 +316,33 @@ def matching_zero_or_one_character(self): string = "The URL is http://www.forta.com/, to connect " \ + "securely use https://www.forta.com/ instead" self.assertEquals(len(re.findall(__,string)), 2, "I want to find the email") + + def using_intervals_exact_intervals(self): + """ + Lesson 4 + + To specify an exact number of matches, you place that number between {}. + {3} means match three instances of the previous character or set. + + We're going to use the same text that in test_using_multiple_ranges + Remember that in that case the pattern was: + #[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f] + """ + #TIP: now you only need to find one [0-9A-Fa-f] and the interval. + string = '' + self.assertEquals(len(re.findall(__, string)),2, "I want to find all the colors in RGB") + + def using_intervals_range_intervals(self): + """ + Lesson 4 + + Intervals may also be used to specify a range of values. + {2,4} means match a minimum of 2 and a maximum of 4 + + """ + string = "4/8/03\n" \ + +"10-6-2004\n" \ + +"2/2/2\n" \ + +"01-01-01" + + self.assertEquals(len(re.findall("__", string)),3, "I want the correct dates.") diff --git a/python 2/koans/regex_solutions.txt b/python 2/koans/regex_solutions.txt index 5b3fc1c4a..f5baca6f4 100644 --- a/python 2/koans/regex_solutions.txt +++ b/python 2/koans/regex_solutions.txt @@ -21,7 +21,8 @@ using_metacharacters_escaping: myArray\[0\] using_metacharacters_macthing_white_spaces: \n\n - IMPORTANT: Windows uses a carriage return line fedd combination used as an end-of-line marker, so you need to use \r\n. + IMPORTANT: Windows uses a carriage return line fedd combination used as an end-of-line marker, so you need to use \r\n and the pattern for both systems is + [\r]?\n[\r]?\n but ? will be covered later. using_metacharacters_matching_digits: myArray\[\d\] using_metacharacters_matching_alphanumeric_characters: @@ -36,3 +37,5 @@ matching_zero_or_more_characters: \w+[\w.]*@[\w.]+\.\w+ matching_zero_or_one_character: https?://[\w./]+ +using_intervals_range_intervals: + \d{1,2}[-\/]\d{1,2}[-\/]\d{2,4} From 679998ae489670c7944c540665b0b3fda9e052e0 Mon Sep 17 00:00:00 2001 From: flopezluis Date: Thu, 17 Feb 2011 08:52:48 +0100 Subject: [PATCH 6/9] added interval at least --- python 2/koans/about_regex.py | 19 +++++++++++++++++++ python 2/koans/regex_solutions.txt | 2 ++ 2 files changed, 21 insertions(+) diff --git a/python 2/koans/about_regex.py b/python 2/koans/about_regex.py index 532ecd563..3a9346adc 100755 --- a/python 2/koans/about_regex.py +++ b/python 2/koans/about_regex.py @@ -346,3 +346,22 @@ def using_intervals_range_intervals(self): +"01-01-01" self.assertEquals(len(re.findall("__", string)),3, "I want the correct dates.") + + def using_intervals_at_least(self): + """ + Lesson 4 + + We can specify the minimum of instances to be matched (without any maximun). + {2,} means match at least 2 instances + + """ + string = "1001: $496.80\n" \ + +"1002: $1290.69\n" \ + +"1003: $26.43\n" \ + +"1004: $613.42\n" \ + +"1005: $7.61\n" \ + +"1006: $414.90\n" \ + +"1007: $25.00" + #tip: Yo must match: several digits + colon + whitespace + $ + at least 3 digits + . + 2 digits (decimals) + self.assertEquals(len(re.findall("__", string)),4, "Search all orders valued at 100$ or more.") + diff --git a/python 2/koans/regex_solutions.txt b/python 2/koans/regex_solutions.txt index f5baca6f4..a519e9267 100644 --- a/python 2/koans/regex_solutions.txt +++ b/python 2/koans/regex_solutions.txt @@ -39,3 +39,5 @@ matching_zero_or_one_character: https?://[\w./]+ using_intervals_range_intervals: \d{1,2}[-\/]\d{1,2}[-\/]\d{2,4} +using_intervals_at_least: + \d+: \$\d{3,}\.\d{2} From dac6ccd49aaf2ca0dc2fea3c7d153d71e9fba64f Mon Sep 17 00:00:00 2001 From: flopezluis Date: Fri, 18 Feb 2011 08:56:16 +0100 Subject: [PATCH 7/9] add greedy quantifiers --- python 2/koans/about_regex.py | 25 +++++++++++++++++++++++++ python 2/koans/regex_solutions.txt | 2 ++ 2 files changed, 27 insertions(+) diff --git a/python 2/koans/about_regex.py b/python 2/koans/about_regex.py index 3a9346adc..a18831fc9 100755 --- a/python 2/koans/about_regex.py +++ b/python 2/koans/about_regex.py @@ -365,3 +365,28 @@ def using_intervals_at_least(self): #tip: Yo must match: several digits + colon + whitespace + $ + at least 3 digits + . + 2 digits (decimals) self.assertEquals(len(re.findall("__", string)),4, "Search all orders valued at 100$ or more.") + def using_intervals_preventing_over_mathing(self): + """ + Lesson 4 + + Consider this example. Text that follows is part of a Web Page. The regular expression needs to match + any text within tags. + text: This offer is not available to customers living in AK and HI + regex: <[Bb]>.* + Result: AK and HI + Instead of two matches, only one was found. the .* matched everything after the first until the last + so that the text AK and HI was matched. + The reason for this is that metacharacters such as * and + are greedy. They look for the greatest + possible match as opposed to the smallest. + The solution is to use lazy versions of these quantifiers(they are referred to as being lazy because + they match the fewest characters instead oj the most). + + Lazy quantifiers are defined by appending an ?: + *? + *? + {n,}? + """ + string = "This offer is not available to customers living in AK and HI" + + self.assertEquals(len(re.findall(__, string)),2, " The regular expression needs to match any text within tags.") + diff --git a/python 2/koans/regex_solutions.txt b/python 2/koans/regex_solutions.txt index a519e9267..d85e8266a 100644 --- a/python 2/koans/regex_solutions.txt +++ b/python 2/koans/regex_solutions.txt @@ -41,3 +41,5 @@ using_intervals_range_intervals: \d{1,2}[-\/]\d{1,2}[-\/]\d{2,4} using_intervals_at_least: \d+: \$\d{3,}\.\d{2} +using_intervals_preventing_over_mathing: + <[Bb]>.*? From 882c3d5180d1f85093fdb9b128949dface7f1932 Mon Sep 17 00:00:00 2001 From: flopezluis Date: Mon, 7 Mar 2011 16:06:01 +0100 Subject: [PATCH 8/9] added word boundaries --- python 2/koans/about_regex.py | 10 ++++++++++ python 2/koans/regex_solutions.txt | 2 ++ 2 files changed, 12 insertions(+) diff --git a/python 2/koans/about_regex.py b/python 2/koans/about_regex.py index a18831fc9..7baa6586f 100755 --- a/python 2/koans/about_regex.py +++ b/python 2/koans/about_regex.py @@ -390,3 +390,13 @@ def using_intervals_preventing_over_mathing(self): self.assertEquals(len(re.findall(__, string)),2, " The regular expression needs to match any text within tags.") + def position_mathing_word_boundaries(self): + """ + Lesson 5 + + \b is used to match the start or end of a word + + """ + string = "The cat scattered his food all over the room" + m = re.search(__, string) + self.assertTrue(m and m.group(0) and m.group(0)== 'cat', "You must match the cat") diff --git a/python 2/koans/regex_solutions.txt b/python 2/koans/regex_solutions.txt index d85e8266a..bc92c17a2 100644 --- a/python 2/koans/regex_solutions.txt +++ b/python 2/koans/regex_solutions.txt @@ -43,3 +43,5 @@ using_intervals_at_least: \d+: \$\d{3,}\.\d{2} using_intervals_preventing_over_mathing: <[Bb]>.*? +position_mathing_word_boundaries: + \\bcat\\b" From 165681098a05ce8d52e4b8510f64d6c988dac953 Mon Sep 17 00:00:00 2001 From: flopezluis Date: Fri, 11 Mar 2011 19:11:07 +0100 Subject: [PATCH 9/9] add string boundaries --- python 2/koans/about_regex.py | 31 ++++++++++++++++++++++++++++++ python 2/koans/regex_solutions.txt | 2 ++ 2 files changed, 33 insertions(+) diff --git a/python 2/koans/about_regex.py b/python 2/koans/about_regex.py index 7baa6586f..37504ccf1 100755 --- a/python 2/koans/about_regex.py +++ b/python 2/koans/about_regex.py @@ -400,3 +400,34 @@ def position_mathing_word_boundaries(self): string = "The cat scattered his food all over the room" m = re.search(__, string) self.assertTrue(m and m.group(0) and m.group(0)== 'cat', "You must match the cat") + + def position_mathing_string_boundaries(self): + """ + Lesson 5 + + String boundaries are used to match patterns at the start or end of an entire string. + The string boundary metacharacters are ^ for start of string and $ for end of string. + + ^ is one of several metacharacters thar has multiple uses. It negates a set only if + in a set (enclosed within [ and ]) is the first character after the opening ]. + Outside of a set, and at the beginning of a pattern, ^ matches the start of string. + + This is a more difficult :) you have to type a pattern which matches the 2 expressions + """ + string = '\n' \ + + '', "You must match the cat") + + string = 'too bad too bad \n' \ + + '\n' \ + + '.*? position_mathing_word_boundaries: \\bcat\\b" +position_mathing_string_boundaries: + ^\s*<\?xml.*\?>