Browse code

dsm:mod_regex: add substring selection

$regex.match[n] is set to nth substring match, starting with 1
($regex.match[1] is set to first substring match...)

Stefan Sayer authored on 21/07/2014 11:53:28
Showing 3 changed files
... ...
@@ -74,7 +74,7 @@ int MOD_CLS_NAME::preload() {
74 74
 }
75 75
 
76 76
 int MOD_CLS_NAME::add_regex(const string& r_name, const string& r_reg) {
77
-  if (regexes[r_name].regcomp(r_reg.c_str(), REG_NOSUB | REG_EXTENDED)) {
77
+  if (regexes[r_name].regcomp(r_reg.c_str(), /* REG_NOSUB | */ REG_EXTENDED)) {
78 78
     ERROR("compiling '%s' for regex '%s'\n", r_reg.c_str(), r_name.c_str());
79 79
     regexes.erase(r_name);
80 80
     return -1;
... ...
@@ -97,8 +97,18 @@ MATCH_CONDITION_START(SCExecRegexCondition) {
97 97
     return false;
98 98
   }
99 99
 
100
-  int res = it->second.regexec(val.c_str(), 1, NULL, 0);
100
+  regmatch_t matches[it->second.get_nsub()+1];
101
+  int res = it->second.regexec(val.c_str(), it->second.get_nsub(), matches, 0);
101 102
   // res==0 -> match
103
+
104
+  if (!res) {
105
+    for (size_t i=1;i<it->second.get_nsub()+1;i++) {
106
+      if (matches[i].rm_so < 0) continue;
107
+      sc_sess->var["regex.match["+int2str((unsigned int)i)+"]"] =
108
+	val.substr(matches[i].rm_so, matches[i].rm_eo - matches[i].rm_so);
109
+    }
110
+  }
111
+
102 112
   DBG("regex did %smatch\n", res==0?"":"not ");
103 113
   if (inv) {
104 114
     return res != 0;
... ...
@@ -131,7 +141,17 @@ EXEC_ACTION_START(SCExecRegexAction) {
131 141
     EXEC_ACTION_STOP;
132 142
   }
133 143
 
134
-  int res = it->second.regexec(val.c_str(), 1, NULL, 0);
144
+  regmatch_t matches[it->second.get_nsub()+1];
145
+  int res = it->second.regexec(val.c_str(), it->second.get_nsub()+1, matches, 0);
146
+
147
+  if (!res) {
148
+    for (size_t i=1;i<it->second.get_nsub()+1;i++) {
149
+      if (matches[i].rm_so < 0) continue;
150
+      sc_sess->var["regex.match["+int2str((unsigned int)i)+"]"] =
151
+	val.substr(matches[i].rm_so, matches[i].rm_eo - matches[i].rm_so);
152
+    }
153
+  }
154
+
135 155
   if (!res) {
136 156
     // yeah side effects
137 157
     sc_sess->var["regex.match"] = "1";
... ...
@@ -179,3 +199,7 @@ int TsRegex::regexec(const char *_string, size_t nmatch,
179 199
   m.unlock();
180 200
   return res;
181 201
 }
202
+
203
+size_t TsRegex::get_nsub() {
204
+  return i ? reg.re_nsub : 0;
205
+}
... ...
@@ -46,7 +46,7 @@ class TsRegex {
46 46
   ~TsRegex();
47 47
   int regcomp(const char *regex, int cflags);
48 48
   int regexec(const char *_string, size_t nmatch, regmatch_t pmatch[], int eflags);
49
-
49
+  size_t get_nsub();
50 50
 };
51 51
 
52 52
 DECLARE_MODULE_BEGIN(MOD_CLS_NAME);
... ...
@@ -15,6 +15,8 @@ regex.compile(name, reg_ex)
15 15
 regex.match(name, match_string)
16 16
  Match match_string on regex referenced by name.
17 17
  $regex.match is set to 1 if matched, 0 if not matched.
18
+ $regex.match[n] is set to nth substring match, starting with 1
19
+ ($regex.match[1] is set to first substring match...)
18 20
 
19 21
 regex.clear(name)
20 22
  Clear the regex referenced by name.
... ...
@@ -22,9 +24,8 @@ regex.clear(name)
22 24
 Conditions:
23 25
  regex.match(name, match_string)
24 26
   Match match_string on regex referenced by name.
25
-
26
-
27
+  $regex.match[n] is set to nth substring match, starting with 1
28
+  ($regex.match[1] is set to first substring match...)
27 29
 
28 30
 TODO:
29
- - implement substring adressing
30 31
  - find a better way for $regex.match side-effect