root/trunk/meta/regexPredicate.d

Revision 84, 7.8 kB (checked in by pragma, 3 years ago)

Updated the bejezus out of this thing.


Currently Supported

  • character classes (including inverse char classes via [...])
  • match one or more (+)
  • match zero or more (*)
  • match zero or one (?)
  • escape sequences
  • whitespace matching (ws chars are treated literally right now)
  • {n} and {n,m} predicates
  • at the start of an expression
  • $ at the end of an expression
  • grouping via ()
  • most standard escape sequences
  • union operator (outside of parens)
Line 
1 /+
2     Copyright (c) 2005 Eric Anderton
3         
4     Permission is hereby granted, free of charge, to any person
5     obtaining a copy of this software and associated documentation
6     files (the "Software"), to deal in the Software without
7     restriction, including without limitation the rights to use,
8     copy, modify, merge, publish, distribute, sublicense, and/or
9     sell copies of the Software, and to permit persons to whom the
10     Software is furnished to do so, subject to the following
11     conditions:
12
13     The above copyright notice and this permission notice shall be
14     included in all copies or substantial portions of the Software.
15
16     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23     OTHER DEALINGS IN THE SOFTWARE.
24 +/
25 module meta.regexPredicate;
26
27 import meta.conv;
28 import meta.math;
29 import meta.strhacks;
30 import meta.string;
31
32 /* TODO: support all of these
33 exec
34
35     A regular expression method that executes a search for a match in a string. It returns an array of information.
36
37 test
38
39     A regular expression method that tests for a match in a string. It returns true or false.
40
41 match
42
43     A String method that executes a search for a match in a string. It returns an array of information or null on a mismatch.
44
45 search
46
47     A String method that tests for a match in a string. It returns the index of the match, or -1 if the search fails.
48
49 replace
50
51     A String method that executes a search for a match in a string, and replaces the matched substring with a replacement substring.
52
53 split
54
55     A String method that uses a regular expression or a fixed string to break a string into an array of substrings.
56 */
57
58 //match routines
59 alias char[][] function(char[] str) MatchPredicate;
60 alias const noMatch = (char[][]).init ;
61
62 // unions the results of two matches together
63 template matchUnion(alias firstMatch,alias secondMatch,char[] key){
64     char[][] matchUnion(char[] str){
65         char[][] results = firstMatch(str);
66         results ~= secondMatch(str);
67         return results;
68     }
69 }
70
71 // attempts a single basic match from the start of the string only
72 template matchTest(alias testPredicate,char[] key){
73     char[][] matchTest(char[] str){
74         char[][] results;
75         int result = testPredicate(str);
76         if(result != testFail && result > 0){
77             results ~= str[0..result];
78         }
79         return results;
80     }
81 }
82
83 /*
84 // aggressive test- tests every possible substring for matches
85 //NOTE: you probably should never use this
86 template matchAggressive(alias testPredicate,char[] key){
87     char[][] matchAggressive(char[] str){
88         char[][] results;
89         for(uint start=0; start<str.length; start++){
90             for(uint end=str.length; end>start; end--){
91                 int result = testPredicate(str[start..end]);
92                 if(result != testFail && result == end-start){
93                     results ~= str[start..result];
94                 }
95             }
96         }
97         return results;
98     }
99 }*/
100
101 // tests all substrings that start at the start of string
102 template matchTestFromStart(alias testPredicate,char[] key,bit aggressive=false){
103     char[][] matchTestFromStart(char[] str){
104         for(uint end=str.length; end>0; end--){
105             char[][] results;
106             int result = testPredicate(str[0..end]);
107             if(result != testFail && result > 0){
108                 results ~= str[0..result];
109                 return results;
110             }
111         }
112         return results;     
113     }
114 }
115
116 // tests all substrings that terminate at the string's end
117 //TODO: refactor by reversing the string (should make for a faster match)
118 template matchTestFromEnd(alias testPredicate,char[] key,bit aggressive=false){
119     char[][] matchTestFromEnd(char[] str){
120         char[][] results;
121         for(uint start=0; start<str.length; start++){
122             int result = testPredicate(str[start..$]);
123             if(result != testFail && result == str.length-start){
124                 results ~= str[start..$];
125                 static if(!aggressive) return results; 
126             }
127         }
128         return results;
129     }   
130 }
131
132 // test must completely cover the entire string
133 template matchTestPerfect(alias testPredicate,char[] key){
134     char[][] matchTestFromStart(char[] str){
135         char[][] results;
136         int result = testPredicate(str);
137         if(result != testFail && result == str.length){
138             results ~= str[0..result];
139         }
140         return results;
141     }
142 }
143
144 //test routines
145 alias int function(char[] str) TestPredicate;
146 const int testFail = -1;
147
148 /// empty terminal
149 template testEmpty(){
150     int testEmpty(char[] str){
151         return 0;
152     }
153 }
154
155 /// two consecutive tests
156 template testUnion(alias testFirst,alias testSecond,char[] key){
157     int testUnion(char[] str){
158         int result = testFirst(str);
159         if(result != testFail){
160             int nextResult = testSecond(str[result..$]);
161             if(result != testFail){
162                 return result + nextResult;
163             }
164         }
165         return testFail;
166     }
167 }
168
169 // two consecutive tests, either one will pass
170 template testOr(alias testFirst,alias testSecond,char[] key){
171     int testOr(char[] str){
172         int result = testFirst(str);
173         if(result != testFail) return result;
174         result = testSecond(str);
175         return result;
176     }
177 }
178
179
180 template testText(char[] text){
181     int testText(char[] str){
182         if(str.length == 0 || text.length > str.length) return testFail;
183         if(str[0..text.length] == text){
184             return text.length;
185         }
186         return testFail;
187     }
188 }
189
190 template testOneOrMore(alias testPredicate,char[] key){
191     int testOneOrMore(char[] str){
192         if(str.length == 0) return testFail;
193         int result = testPredicate(str);
194         if(result != testFail){
195             int nextResult = .testOneOrMore!(testPredicate,key)(str[result..$]);
196             if(nextResult != testFail){
197                 return result + nextResult;
198             }
199             return result;
200         }
201         return testFail;
202     }
203 }
204
205
206 template testZeroOrMore(alias testPredicate,char[] key){
207     int testZeroOrMore(char[] str){
208         if(str.length == 0) return 0;
209         int result = testPredicate(str);
210         if(result != testFail){
211             int nextResult = .testZeroOrMore!(testPredicate,key)(str[result..$]);
212             if(nextResult != testFail){
213                 return result + nextResult;
214             }
215             return result;
216         }
217         return 0;
218     }
219 }
220
221
222 template testZeroOrOne(alias testPredicate,char[] key){
223     int testZeroOrOne(char[] str){
224         if(str.length == 0) return testFail;
225         int result = testPredicate(str);
226         if(result == testFail) return 0;
227         return result;
228     }
229 }
230
231 template testRange(char[] term1,char[] term2){
232     int testRange(char[] str){
233         if(str.length == 0) return testFail;
234         if(str[0] >= term1[0] && str[0] <= term2[0]){
235             return 1;
236         }
237         return testFail;
238     }
239 }
240
241 template testTimes(uint min,uint max,alias testPredicate,char[] key){
242     int testTimes(char[] str){
243         if(str.length == 0) return testFail;
244         int result = 0;
245         uint i;
246         for(i=0; i<max; i++){
247             int nextResult = testPredicate(str[result..$]);
248             if(nextResult == testFail){
249                 if(i < min) return testFail;
250                 break;
251             }
252             result += nextResult;
253         }
254         return result;
255     }
256 }
257
258 template testAny(){
259     int testAny(char[] str){
260         if(str.length == 0) return testFail;
261         //TODO: check for newline (some regexps dont' test this)
262         return 1;
263     }
264 }
265
266 template testChar(char[] ch){
267     int testChar(char[] str){
268         if(str.length == 0) return testFail;
269         if(str[0] == ch[0]){
270             return 1;
271         }
272         return testFail;
273     }
274 }
275
276 template testWordChar(){
277     int testWordChar(char[] str){
278         if(str.length == 0) return testFail;
279         if(
280             (str[0] >= 'a' && str[0] <= 'z') ||
281             (str[0] >= 'A' && str[0] <= 'Z') ||
282             (str[0] >= '0' && str[0] <= '9') ||
283             str[0] == '_'
284         ){
285             return 1;
286         }
287         return testFail;
288     }
289 }
290
291 template testCharInverse(alias testPredicate,char[] key){
292     int testCharInverse(char[] str){
293         if(testPredicate(str) == testFail) return 1;
294         return testFail;
295     }
296 }
Note: See TracBrowser for help on using the browser.