00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <cassert>
00034
00035 #include "StrUtil.h"
00036
00037 using std::string;
00038 using std::ostringstream;
00039
00040 StringIntHMap StrUtil::cflagMap;
00041 RegexCache StrUtil::regexCache;
00042
00043 string StrUtil::trim(const string& s) {
00044 size_t start = 0;
00045 while (isspace(s[start])) {
00046 start++;
00047 }
00048
00049 size_t end = s.size();
00050 while (end >= start && isspace(s[end - 1])) {
00051 end--;
00052 }
00053
00054 if (end < start) {
00055 return "";
00056 }
00057
00058 return s.substr(start, end - start);
00059 }
00060
00061 string StrUtil::trimFront(const string& s) {
00062 size_t start = 0;
00063 size_t sz = s.size();
00064 while (start < sz && isspace(s[start])) {
00065 start++;
00066 }
00067
00068 return s.substr(start);
00069 }
00070
00071 string StrUtil::trimBack(const string& s, char c) {
00072 size_t end = s.size();
00073 while (end > 0 && s[end - 1] == c) {
00074 end--;
00075 }
00076
00077 return s.substr(0, end);
00078 }
00079
00080 string StrUtil::toLower(string s) {
00081 for (uint i = 0; i < s.size(); i++) {
00082 s[i] = ::tolower(s[i]);
00083 }
00084 return s;
00085 }
00086
00087 string StrUtil::toUpper(string s) {
00088 for (uint i = 0; i < s.size(); i++) {
00089 s[i] = ::toupper(s[i]);
00090 }
00091 return s;
00092 }
00093
00094 size_t StrUtil::read(string& src, string& buf, size_t n) {
00095 buf.clear();
00096 if (src.size() < n) {
00097 return 0;
00098 }
00099
00100 buf = src.substr(0, n);
00101 src = src.substr(n);
00102 return n;
00103 }
00104
00105 size_t StrUtil::readLine(string& src, string& buf) {
00106 string::size_type rn = src.find("\r\n");
00107 string::size_type n = src.find("\n");
00108 size_t bytesRead = 0;
00109
00110 buf.clear();
00111
00112 if (rn != string::npos) {
00113 buf = src.substr(0, rn);
00114 bytesRead = buf.size() + 2;
00115 }
00116 else if (n != string::npos) {
00117 buf = src.substr(0, n);
00118 bytesRead = buf.size() + 1;
00119 }
00120
00121 if (bytesRead > 0) {
00122 src = src.substr(bytesRead);
00123 }
00124
00125 return bytesRead;
00126 }
00127
00128 string StrUtil::replaceAll(string s, const string& t, const string& r) {
00129 while (true) {
00130 string::size_type i = s.find(t);
00131 if (i == string::npos) {
00132 return s;
00133 }
00134 s.replace(i, t.size(), r);
00135 }
00136 }
00137
00138 bool StrUtil::matches(const string& re, const string& s, bool ignoreCase,
00139 bool matchNewline) throw (RegexException) {
00140
00141 regex_t* rex = compileRegex(re, ignoreCase, matchNewline, true);
00142
00143 int match = regexec(rex, s.c_str(), 0, 0, 0);
00144
00145 return (match == 0);
00146 }
00147
00148 StringList StrUtil::match(const string& re, const string& s, bool ignoreCase,
00149 bool matchNewline) throw (RegexException) {
00150 StringList r;
00151 match(re, s, r, ignoreCase, matchNewline);
00152 return r;
00153 }
00154
00155 bool StrUtil::match(const string& re, const string& s, StringList& r,
00156 bool ignoreCase, bool matchNewline) throw (RegexException) {
00157 regex_t* rex = compileRegex(re, ignoreCase, matchNewline, false);
00158
00159 size_t nmatch = 10;
00160 regmatch_t pmatch[nmatch];
00161 int match = regexec(rex, s.c_str(), nmatch, pmatch, 0);
00162
00163 if (match == 0) {
00164
00165
00166 size_t last;
00167 for (last = nmatch; pmatch[last - 1].rm_so == -1; last--) { }
00168
00169 for (size_t i = 1; i < last; i++) {
00170 regmatch_t offsets = pmatch[i];
00171
00172 if (pmatch[i].rm_so != -1) {
00173 r.push_back(s.substr(offsets.rm_so, offsets.rm_eo - offsets.rm_so));
00174 }
00175 else {
00176 r.push_back(string());
00177 }
00178 }
00179 return true;
00180 }
00181 ASSERT(match == REG_NOMATCH);
00182 return false;
00183 }
00184
00185 void StrUtil::throwRegexException(const string& re, int error, regex_t* rex)
00186 throw (RegexException) {
00187 if (error != 0) {
00188 size_t len = regerror(error, rex, 0, 0);
00189 char errbuf[len];
00190 regerror(error, rex, errbuf, len);
00191 string errstr = "regex error " + re + ": ";
00192 errstr.append(errbuf);
00193 throw RegexException(errstr);
00194 }
00195 }
00196
00197 regex_t* StrUtil::compileRegex(string re, bool ignoreCase, bool matchNewline,
00198 bool nosub) throw (RegexException) {
00199
00200 int cflags = REG_EXTENDED;
00201 if (nosub) {
00202 cflags |= REG_NOSUB;
00203 }
00204 if (ignoreCase) {
00205 cflags |= REG_ICASE;
00206 }
00207 if (!matchNewline) {
00208 cflags |= REG_NEWLINE;
00209 }
00210
00211
00212 if (regexCache.containsKey(re)) {
00213 if (cflagMap[re] == cflags) {
00214 return regexCache[re];
00215 }
00216 else {
00217 regex_t* delrex = regexCache[re];
00218 regfree(delrex);
00219 delete delrex;
00220 delrex = 0;
00221 regexCache.remove(re);
00222 cflagMap.erase(re);
00223 }
00224 }
00225 else if (regexCache.isFullDirty()) {
00226 string del = regexCache.getLastDirtyKey();
00227 regex_t* delrex = regexCache[del];
00228
00229 assert(delrex);
00230 regfree(delrex);
00231 cflagMap.erase(del);
00232 delete delrex;
00233 delrex = 0;
00234 }
00235
00236 string tre = translatePerlRE(re);
00237
00238 regex_t* rex = new regex_t;
00239 int error = regcomp(rex, tre.c_str(), cflags);
00240 if (error != 0) {
00241 regfree(rex);
00242 delete rex;
00243 rex = 0;
00244 throwRegexException(re, error, rex);
00245 }
00246
00247 regexCache.addDirty(re, rex);
00248 cflagMap[re] = cflags;
00249 return rex;
00250 }
00251
00252 string StrUtil::translatePerlRE(string re) {
00253 re = replaceAll(re, "\\w", "[[:alnum:]_]");
00254 re = replaceAll(re, "\\W", "[^[:alnum:]_]");
00255 re = replaceAll(re, "\\d", "[[:digit:]]");
00256 re = replaceAll(re, "\\D", "[^[:digit:]]");
00257 re = replaceAll(re, "\\s", "[[:space:]]");
00258 re = replaceAll(re, "\\S", "[^[:space:]]");
00259 return re;
00260 }
00261
00262 void StrUtil::toCStr(const StringList& l, const char* a[]) {
00263 for (size_t i = 0; i < l.size(); i++) {
00264 a[i] = l[i].c_str();
00265 }
00266 }
00267
00268 bool StrUtil::isPrintable(const string& s) {
00269 size_t i = 0;
00270 while (i < s.size() && isprint(s[i])) {
00271 i++;
00272 }
00273 return (i == s.size());
00274 }
00275
00276 StringList StrUtil::split(const string& delim, string s, bool returnEmpty) {
00277 StringList r;
00278 while (!s.empty()) {
00279 while (s.find(delim) == 0) {
00280 s = s.substr(delim.size());
00281 if (returnEmpty) {
00282 r.push_back("");
00283 }
00284 }
00285 size_t i = s.find(delim);
00286 if (i != string::npos) {
00287 r.push_back(s.substr(0, i));
00288 s = s.substr(i + delim.size());
00289 }
00290 else {
00291
00292 if (!s.empty()) {
00293 r.push_back(s);
00294 return r;
00295 }
00296 }
00297 }
00298
00299 if (returnEmpty) {
00300 r.push_back("");
00301 }
00302
00303 return r;
00304 }
00305
00306 string StrUtil::join(const string& delim, const StringList& l) {
00307 string r = "";
00308 if (!l.empty()) {
00309 size_t i = 0;
00310 while (i < l.size() - 1) {
00311 r.append(l[i]);
00312 r.append(delim);
00313 i++;
00314 }
00315 r.append(l[i]);
00316 }
00317 return r;
00318 }
00319
00320 string StrUtil::spaces(size_t w, size_t l) {
00321 if (l >= w) {
00322 return " ";
00323 }
00324 else {
00325 return string(w - l, ' ');
00326 }
00327 }
00328
00329 namespace StrUtilNamespace {
00330 StdStringList getTypeFromTemplate(const std::string& fn, const char* typeVar[]) {
00331 StdStringList ret;
00332 string find = "[with ";
00333 find += typeVar[0];
00334 find += " = ";
00335 size_t start = std::string::npos;
00336 size_t end = std::string::npos;
00337
00338 start = fn.find(find);
00339 if(start == std::string::npos) {
00340 ASSERT(0);
00341 }
00342 start = start + 6 + strlen(typeVar[0]) + 3;
00343
00344 int i = 0;
00345
00346 while(typeVar[i+1] != 0) {
00347
00348 find = ", " + std::string(typeVar[i+1]) + " = ";
00349 end = fn.find(find);
00350 if(end == std::string::npos) {
00351 ASSERT(0);
00352 }
00353 ret.push_back(fn.substr(start, end-start));
00354 start = end + 2 + strlen(typeVar[i+1]) + 3;
00355 i++;
00356 }
00357
00358 find = "]";
00359 end = fn.find(find);
00360 if(end == std::string::npos) {
00361 ASSERT(0);
00362 }
00363 ret.push_back(fn.substr(start, end-start));
00364
00365 return ret;
00366 }
00367 }
00368