ulxmlrpcpp.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002               ulxmlrpcpp.cpp  -  common stuff for xml-rpc project
00003                              -------------------
00004     begin                : Sam Apr 20 2002
00005     copyright            : (C) 2002-2007 by Ewald Arnold
00006     email                : ulxmlrpcpp@ewald-arnold.de
00007 
00008     $Id: ulxmlrpcpp.cpp 1057 2007-08-10 20:30:19Z ewald-arnold $
00009 
00010  ***************************************************************************/
00011 
00012 /**************************************************************************
00013  *
00014  * This program is free software; you can redistribute it and/or modify
00015  * it under the terms odebug  true: compiled with DEBUGf the GNU Lesser General Public License as
00016  * published by the Free Software Foundation; either version 2 of the License,
00017  * or (at your option) any later version.
00018  *
00019  * This program is distributed in the hope that it will be useful,
00020  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00021  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00022  * GNU General Public License for more details.
00023  *
00024  * You should have received a copy of the GNU Lesser General Public License
00025  * along with this program; if not, write to the Free Software
00026  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00027  *
00028  ***************************************************************************/
00029 
00030 //#define ULXR_UNICODE_ONLY_HELPERS
00031 #define ULXR_NEED_EXPORTS
00032 #include <ulxmlrpcpp/ulxmlrpcpp.h>  // always first header
00033 
00034 #include <cctype>
00035 #include <cerrno>
00036 #include <sstream>
00037 #include <iosfwd>
00038 #include <cctype>
00039 #include <cstdlib>
00040 
00041 #include <ulxmlrpcpp/ulxr_log4j.h>
00042 #include <ulxmlrpcpp/ulxr_except.h>
00043 #include <ulxmlrpcpp/ulxr_wbxmlparse.h>
00044 #include <ulxmlrpcpp/ulxr_htmlform_handler.h>
00045 #include <ulxmlrpcpp/ulxr_tcpip_connection.h>
00046 
00047 #ifndef ULXR_OMIT_REENTRANT_PROTECTOR
00048 #include <ulxmlrpcpp/ulxr_mutex.h>
00049 #endif
00050 
00051 
00095 namespace ulxr {
00096 
00097 
00098 ULXR_API_IMPL(void) getVersion (int &major, int &minor, int &patch, bool &debug, CppString &/*info*/)
00099 {
00100   CppString s (ULXR_GET_STRING(ULXR_VERSION));
00101   CppString num;
00102 /*
00103 #ifdef ULXR_USE_INTRINSIC_VALUE_TYPES
00104   info = ulxr_i18n("Conversion from intrinsic types to ulxr::Value() is activated.\n");
00105 #else
00106   info = ulxr_i18n("Conversion from intrinsic types to ulxr::Value() is NOT activated.\n");
00107 #endif
00108 */
00109   std::size_t pos = s.find(ULXR_CHAR('.'));
00110   bool good = true;
00111   if (pos != CppString::npos)
00112   {
00113     num = s.substr(0, pos);
00114     if (num.length() == 0)
00115       good = false;
00116     major = ulxr_atoi(getLatin1(num).c_str());
00117     s.erase(0, pos+1);
00118     pos = s.find('.');
00119 
00120     if (pos != CppString::npos)
00121     {
00122       num = s.substr(0, pos);
00123       if (num.length() == 0)
00124         good = false;
00125       minor = ulxr_atoi(getLatin1(num).c_str());
00126       s.erase(0, pos+1);
00127 
00128       if (s.length() == 0)
00129         good = false;
00130       patch = ulxr_atoi(getLatin1(s).c_str());
00131     }
00132     else
00133       good = false;
00134   }
00135   else
00136     good = false;
00137 
00138   if (!good)
00139   {
00140     major = -1;
00141     minor = -1;
00142     patch = -1;
00143   }
00144 
00145 #ifdef DEBUG
00146   debug = true;
00147 #else
00148   debug = false;
00149 #endif
00150 }
00151 
00152 #if defined (ULXR_UNICODE) || defined(ULXR_UNICODE_ONLY_HELPERS)
00153 
00154 ULXR_API_IMPL(Cpp16BitString) getUnicode(const std::string &latin1)
00155 {
00156   Cpp16BitString ret;
00157   for (unsigned i = 0; i < latin1.length(); ++i)
00158     ret += (unsigned char) latin1[i];
00159 
00160   return ret;
00161 }
00162 
00163 
00164 ULXR_API_IMPL(std::string) getLatin1(const Cpp16BitString &uni)
00165 {
00166   std::string ret;
00167   for (unsigned i = 0; i < uni.length(); ++i)
00168     ret += (unsigned char) uni[i];
00169 
00170   return ret;
00171 }
00172 
00173 #endif
00174 
00175 ULXR_API_IMPL(CppString) stripWS(const CppString &s)
00176 {
00177    unsigned start = 0;
00178    while (start < s.length() && ulxr_isspace(s[start]))
00179      ++start;
00180 
00181    unsigned end = s.length();
00182    while (end > start && ulxr_isspace(s[end-1]))
00183      --end;
00184 
00185    return s.substr(start, end-start);
00186 }
00187 
00188 
00189 static ulxr::Char b64_encodetable [64] =
00190 {
00191  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
00192  'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
00193  'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
00194  'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
00195  'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
00196  'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
00197  'w', 'x', 'y', 'z', '0', '1', '2', '3',
00198  '4', '5', '6', '7', '8', '9', '+', '/'
00199 };
00200 
00201 
00202 ULXR_API_IMPL(CppString) encodeBase64(const CppString &normstr, bool add_crlf)
00203 {
00204     CppString ret;
00205     unsigned len = normstr.length();
00206     unsigned idx = 0;
00207     bool hiteof = len == 0;
00208     bool just_nl = false;
00209     unsigned linelen = 0;
00210 
00211     while (!hiteof)
00212     {
00213         unsigned int igroup[3];
00214         unsigned int ogroup[4];
00215         unsigned int n;
00216         unsigned int c;
00217 
00218         igroup[0] = igroup[1] = igroup[2] = 0;
00219         for (n = 0; n < 3; n++)
00220         {
00221             if (idx >= len)
00222             {
00223                 hiteof = true;
00224                 break;
00225             }
00226 #ifndef ULXR_UNICODE
00227             c = normstr[idx++] & 0xFF;
00228 #else
00229             c = normstr[idx++];
00230             if (c > 0xFF)
00231             {
00232               std::basic_ostringstream<wchar_t> os; // std::wostringstream
00233               os << c;
00234               throw ParameterException(ApplicationError,
00235                                        ulxr_i18n(ULXR_PCHAR("encodeBase64(): Illegal character in input: #"))+ os.str());
00236             }
00237 #endif
00238             igroup[n] = c;
00239         }
00240 
00241         if (n > 0)
00242         {
00243             ogroup[0] = b64_encodetable[igroup[0] >> 2];
00244             ogroup[1] = b64_encodetable[((igroup[0] & 3) << 4) | ((igroup[1] & 0xF0) >> 4)];
00245             ogroup[2] = b64_encodetable[((igroup[1] & 0x0F) << 2) | ((igroup[2] & 0xC0) >> 6)];
00246             ogroup[3] = b64_encodetable[igroup[2] & 0x3F];
00247 /*
00248 std::cout << "igroup " << std::hex
00249    << " 0x"  << igroup[0]
00250    << " 0x"  << igroup[1]
00251    << " 0x"  << igroup[2]
00252    << std::endl;
00253 
00254 std::cout << "ogroup " << std::hex
00255    << " 0x"  << ogroup[0]
00256    << " 0x"  << ogroup[1]
00257    << " 0x"  << ogroup[2]
00258    << " 0x"  << ogroup[3]
00259    << std::endl;
00260 */
00261             // Replace characters in output stream with "=" pad
00262             // characters if fewer than three characters were
00263             // read from the end of the input stream.
00264 
00265             if (n < 3)
00266             {
00267                 ogroup[3] = '=';
00268                 if (n < 2)
00269                 {
00270                     ogroup[2] = '=';
00271                 }
00272             }
00273 
00274             just_nl = false;
00275             for (unsigned i = 0; i < 4; i++)
00276             {
00277                 ret += ogroup[i];
00278                 if (++linelen >= 72)
00279                 {
00280 
00281                   linelen = 0;
00282                   just_nl = true;
00283 
00284                   if (add_crlf)
00285                     ret += ULXR_PCHAR("\r\n");
00286                 }
00287             }
00288         }
00289     }
00290 
00291     if (!just_nl && add_crlf)
00292        ret += ULXR_PCHAR("\r\n");
00293 
00294     return ret;
00295 }
00296 
00297 
00298 static int b64_decodetable [256] =
00299 {
00300   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // 00 07
00301   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // 08 0f
00302   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // 10 17
00303   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // 18 2f
00304   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // 20 27
00305   0x80,  0x80,  0x80,  0x3e,  0x80,  0x80,  0x80,  0x3f, // 28 3f
00306   0x34,  0x35,  0x36,  0x37,  0x38,  0x39,  0x3a,  0x3b, // 30 07
00307   0x3c,  0x3d,  0x80,  0x80,  0x80,  0x00,  0x80,  0x80, // 38 0f
00308   0x80,  0x00,  0x01,  0x02,  0x03,  0x04,  0x05,  0x06, // 40 07
00309   0x07,  0x08,  0x09,  0x0a,  0x0b,  0x0c,  0x0d,  0x0e, // 48 0f
00310   0x0f,  0x10,  0x11,  0x12,  0x13,  0x14,  0x15,  0x16, // 50 07
00311   0x17,  0x18,  0x19,  0x80,  0x80,  0x80,  0x80,  0x80, // 58 0f
00312   0x80,  0x1a,  0x1b,  0x1c,  0x1d,  0x1e,  0x1f,  0x20, // 60 07
00313   0x21,  0x22,  0x23,  0x24,  0x25,  0x26,  0x27,  0x28, // 68 0f
00314   0x29,  0x2a,  0x2b,  0x2c,  0x2d,  0x2e,  0x2f,  0x30, // 70 07
00315   0x31,  0x32,  0x33,  0x80,  0x80,  0x80,  0x80,  0x80, // 78 0f
00316   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // 80 07
00317   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // 88 0f
00318   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // 90 07
00319   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // 98 0f
00320   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // a0 07
00321   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // a8 0f
00322   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // b0 07
00323   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // b8 0f
00324   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // c0 07
00325   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // c8 0f
00326   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // d0 07
00327   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // d8 0f
00328   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // e0 07
00329   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // e8 0f
00330   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // f0 07
00331   0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80,  0x80, // f8 0f
00332 };
00333 
00334 
00335 ULXR_API_IMPL(CppString) decodeBase64(const CppString &b64str, bool errcheck)
00336 {
00337     CppString ret;
00338     unsigned len = b64str.length();
00339     unsigned idx = 0;
00340 
00341     if (len == 0)
00342       return ret;
00343 
00344     while (true)
00345     {
00346         unsigned int a[4], b[4], o[3];
00347 
00348         for (unsigned j = 0; j < 4; )
00349         {
00350 //std::cout << "idx " << idx << " 0x" << std::hex << (unsigned) b64str[idx] << " " << std::dec << b64str[idx] << std::endl;
00351             if (idx >= len)
00352             {
00353                 if (errcheck && (j > 0))
00354                     throw ParameterException(ApplicationError,
00355                                              ulxr_i18n(ULXR_PCHAR("decodeBase64(): Input data is incomplete.")));
00356                 return ret;
00357             }
00358 
00359             unsigned c = (unsigned) b64str[idx++];
00360 
00361             if (c != ULXR_CHAR('\r') && c != ULXR_PCHAR('\n') && c != ULXR_PCHAR(' '))
00362             {
00363               if ((c > 0xFF) || (b64_decodetable[c] & 0x80))
00364               {
00365                   if (errcheck)
00366                   {
00367 #ifdef ULXR_UNICODE
00368                     std::basic_ostringstream<wchar_t> os; // std::wostringstream
00369 #else
00370                     std::ostringstream os;
00371 #endif
00372                     os << (unsigned) c;
00373                     throw ParameterException(ApplicationError,
00374                                              ulxr_i18n(ULXR_GET_STRING("decodeBase64(): Illegal character in input: #"))+ os.str());
00375                   }
00376                   // Ignoring errors: discard invalid character.
00377                   j--;
00378                   continue;
00379               }
00380               a[j] = c;
00381               b[j] = b64_decodetable[c];
00382               ++j;
00383             }
00384         }
00385 
00386         o[0] = 0xff & ((b[0] << 2) | (b[1] >> 4));
00387         o[1] = 0xff & ((b[1] << 4) | ((b[2]) >> 2));
00388         o[2] = 0xff & ((b[2] << 6) |  (b[3]));
00389 /*
00390 std::cout << "b " << std::hex
00391    << " 0x"  << b[0]
00392    << " 0x"  << b[1]
00393    << " 0x"  << b[2]
00394    << " 0x"  << b[3]
00395    << std::endl;
00396 
00397 std::cout << "o " << std::hex
00398    << " 0x"  << o[0]
00399    << " 0x"  << o[1]
00400    << " 0x"  << o[2]
00401    << std::endl;
00402 */
00403         int i = a[2] == ULXR_CHAR('=') ? 1 : (a[3] == ULXR_CHAR('=') ? 2 : 3);
00404 
00405         switch (i)
00406         {
00407            case 1:
00408              ret += o[0];
00409            break;
00410 
00411            case 2:
00412              ret += o[0];
00413              ret += o[1];
00414            break;
00415 
00416            case 3:
00417              ret += o[0];
00418              ret += o[1];
00419              ret += o[2];
00420            break;
00421         }
00422 
00423         if (i < 3)
00424           return ret;
00425     }
00426 }
00427 
00428 
00429 ULXR_API_IMPL(CppString) xmlEscape(const CppString &str, bool suppress_non_unicode)
00430 {
00431   CppString ret;
00432   unsigned prev = 0;
00433   unsigned len = str.length();
00434   unsigned curs = 0;
00435   const Char *pc = str.data();
00436 
00437   while (curs != len)
00438   {
00439     Char c = *pc++;
00440 
00441     if (c == ULXR_CHAR('&'))
00442     {
00443       ret += str.substr(prev, curs-prev);
00444       ret += ULXR_PCHAR("&amp;");
00445       prev = curs+1;
00446     }
00447 
00448     else if (c == ULXR_CHAR('<'))
00449     {
00450       ret += str.substr(prev, curs-prev);
00451       ret += ULXR_PCHAR("&lt;");
00452       prev = curs+1;
00453     }
00454 
00455     else if (c == ULXR_CHAR('\n'))
00456     {
00457       ret += str.substr(prev, curs-prev);
00458       ret += ULXR_PCHAR("&#xA;");
00459       prev = curs+1;
00460     }
00461 
00462     else if (c == ULXR_CHAR('\r'))
00463     {
00464       ret += str.substr(prev, curs-prev);
00465       ret += ULXR_PCHAR("&#xD;");
00466       prev = curs+1;
00467     }
00468 
00469     else if (c == ULXR_CHAR('\t'))
00470     {
00471       ret += str.substr(prev, curs-prev);
00472       ret += ULXR_PCHAR("&#x9;");
00473       prev = curs+1;
00474     }
00475 
00476     else if (suppress_non_unicode && c < 0x20) // fffe, ffff, d800 .. dfff
00477     {
00478       // ignore non-unicode
00479       prev = curs+1;
00480     }
00481 
00482     else if (c == ULXR_CHAR('\0'))
00483     {
00484       ret += str.substr(prev, curs-prev);
00485       ret += ULXR_PCHAR("&#x0;");
00486       prev = curs+1;
00487     }
00488 
00489     ++curs;
00490   }
00491   ret += str.substr(prev, curs-prev);
00492   return ret;
00493 }
00494 
00495 
00496 CppString charRefDezCont (ULXR_PCHAR("0123456789"));
00497 CppString charRefHexCont (ULXR_PCHAR("0123456789aAbBcCdDeEfF"));
00498 
00499 
00500 // When we use Unicode, resolve "#xxx;" to Unicode
00501 // otherwise resolve to utf8
00502 static void resolveCharRef(const CppString &ins, unsigned &pos, CppString &outs)
00503 {
00504 
00505   if (pos > ins.length()-3)
00506     throw ParameterException(InvalidCharacterError,
00507                              ulxr_i18n(ULXR_PCHAR("Error in xml character reference.")));
00508 
00509   pos += 2;
00510 
00511   unsigned wc = 0;
00512   if (   ins[pos] == ULXR_CHAR('x')
00513       || ins[pos] == ULXR_CHAR('X'))
00514   {
00515     ++pos;
00516     for (unsigned limit = 0;
00517             (limit < 6)
00518          && (charRefHexCont.find(ins[pos]) != CppString::npos)
00519          && (pos < ins.length());
00520          ++limit)
00521     {
00522       wc <<= 4;
00523       unsigned c = ulxr_toupper(ins[pos]);
00524       if (ulxr_isdigit(c))
00525         wc += c - ULXR_CHAR('0');
00526       else
00527         wc += c - ULXR_CHAR('A') + 0xa;
00528       ++pos;
00529     }
00530   }
00531   else
00532   {
00533     for (unsigned limit = 0;
00534             (limit < 8)
00535          && (charRefDezCont.find(ins[pos]) != CppString::npos)
00536          && (pos < ins.length());
00537          ++limit)
00538     {
00539       wc *= 10;
00540       unsigned c = ins[pos];
00541       if (ulxr_isdigit(c))
00542         wc += c - '0';
00543       else
00544         throw ParameterException(InvalidCharacterError,
00545                                  ulxr_i18n(ULXR_PCHAR("Error in xml character reference.")));
00546       ++pos;
00547     }
00548   }
00549 
00550   if (pos < ins.length()-1 && ins[pos] == ';')
00551   {
00552 #ifdef ULXR_UNICODE
00553     outs = wc;
00554 #else
00555     outs = unicodeToUtf8(wc);
00556 #endif
00557     ++pos;
00558   }
00559   else
00560     throw ParameterException(InvalidCharacterError,
00561                              ulxr_i18n(ULXR_PCHAR("Error in xml character reference.")));
00562 
00563   return;
00564 }
00565 
00566 
00567 #ifdef ULXR_UNICODE
00568 #define STRNCMP(s1, s2, n)  wcsncmp(s1, s2, n)
00569 #else
00570 #define STRNCMP(s1, s2, n)  strncmp(s1, s2, n)
00571 #endif
00572 
00573 
00574 ULXR_API_IMPL(CppString) xmlUnEscape(const CppString &str)
00575 {
00576   CppString ret;
00577   unsigned prev = 0;
00578   unsigned len = str.length();
00579   unsigned curs = 0;
00580   CppString s;
00581 
00582   const Char *amp = ULXR_PCHAR("&amp;");
00583   const Char *lt = ULXR_PCHAR("&lt;");
00584 
00585   while (curs < len)
00586   {
00587     const Char *pc = str.data() + curs;
00588     Char c = *pc;
00589     if (c == '&')
00590     {
00591       if (curs == len-1)
00592         throw ParameterException(InvalidCharacterError,
00593                                  ulxr_i18n(ULXR_PCHAR("Error in xml reference, \"&\" is last character.")));
00594 
00595       if (   curs < len-1
00596           && ULXR_CHAR('#') == *(pc+1))
00597       {
00598         ret += str.substr(prev, curs-prev);
00599         resolveCharRef(str, curs, s);
00600         ret += s;
00601         prev = curs;
00602       }
00603 
00604       else if (len-curs >= 5 && STRNCMP(pc, amp, 5) == 0)
00605       {
00606         ret += str.substr(prev, curs-prev);
00607         ret += ULXR_PCHAR("&");
00608         curs += 5;
00609         prev = curs;
00610       }
00611 
00612       else if (len-curs >= 4 && STRNCMP(pc, lt, 4) == 0)
00613       {
00614         ret += str.substr(prev, curs-prev);
00615         ret += ULXR_PCHAR("<");
00616         curs += 4;
00617         prev = curs;
00618       }
00619       else
00620         throw ParameterException(InvalidCharacterError,
00621                                  ulxr_i18n(ULXR_PCHAR("Error in xml reference.")));
00622     }
00623     else
00624       ++curs;
00625 }
00626   ret += str.substr(prev, curs-prev);
00627 
00628 /*
00629   CppString ret = str;
00630   std::size_t pos = 0;
00631 
00632   while ((pos = ret.find(ULXR_CHAR('&'), pos)) != CppString::npos )
00633   {
00634     if (pos == ret.length()-1)
00635       throw ParameterException(InvalidCharacterError,
00636                                ulxr_i18n(ULXR_PCHAR("Error in xml reference, \"&\" is last character.")));
00637 
00638     if (   pos < ret.length()-1
00639         && ULXR_CHAR('#') == ret[pos+1])
00640       resolveCharRef(ret, pos);
00641 
00642     else if (ret.substr(pos, 5) == ULXR_PCHAR("&amp;"))
00643     {
00644       ret.replace (pos, 5, ULXR_PCHAR("&"));
00645       pos += 1;
00646     }
00647 
00648     else if (ret.substr(pos, 4) == ULXR_PCHAR("&lt;"))
00649     {
00650       ret.replace (pos, 4, ULXR_PCHAR("<"));
00651       pos += 1;
00652     }
00653 
00654     else
00655       throw ParameterException(InvalidCharacterError,
00656                                ulxr_i18n(ULXR_PCHAR("Unrecognized entity.")));
00657   }
00658 */
00659   return ret;
00660 }
00661 
00662 
00663 static unsigned decodeUtf8Group(const Cpp8BitString &val, unsigned &i)
00664 {
00665   static const ulxr::Char* malformed = ULXR_I18N_NOOP(ULXR_PCHAR("Malformed UTF8 encoded string"));
00666 
00667   unsigned charNum = 0;
00668   unsigned short trigger = val[i];
00669   unsigned vl = val.length();
00670   if (trigger >= 0x80)
00671   {
00672     int remain = vl - i;
00673     if ((trigger & 0xE0) == 0xC0)
00674     {         // 110x xxxx
00675       if (   (remain > 1)
00676           && (val[i+1] & 0xC0) == 0x80)
00677       {
00678         charNum = ((val[i]   & 0x1F) << 6)
00679                   | (val[i+1] & 0x3F);
00680         i += 2;
00681       }
00682       else
00683       {
00684         throw ParameterException(ApplicationError, ulxr_i18n(malformed));
00685       }
00686     }
00687 
00688     else if ((trigger & 0xF0) == 0xE0)
00689     {  // 1110 xxxx
00690       if (   (remain > 2)
00691           && ((val[i+1] & 0xC0) == 0x80)
00692           && ((val[i+2] & 0xC0) == 0x80))
00693       {
00694         charNum = ((val[i]   & 0x0F) << 12)
00695                   |((val[i+1] & 0x3F) <<  6)
00696                   | (val[i+2] & 0x3F);
00697         i += 3;
00698       }
00699       else
00700       {
00701         throw ParameterException(ApplicationError, ulxr_i18n(malformed));
00702       }
00703     }
00704 
00705     else if ((trigger & 0xF8) == 0xF0)
00706     {   // 1111 0xxx
00707       if (   (remain > 3)
00708           && ((val[i+1] & 0xC0) == 0x80)
00709           && ((val[i+2] & 0xC0) == 0x80)
00710           && ((val[i+3] & 0xC0) == 0x80))
00711       {
00712         charNum = ((val[i]   & 0x07) << 18)
00713                   |((val[i+1] & 0x3F) << 12)
00714                   |((val[i+2] & 0x3F) <<  6)
00715                   | (val[i+3] & 0x3F);
00716         i += 4;
00717       }
00718       else
00719       {
00720         throw ParameterException(ApplicationError, ulxr_i18n(malformed));
00721       }
00722     }
00723 
00724     else if ((trigger & 0xFC) == 0xF8)
00725     {   // 1111 10xx
00726       if (   (remain > 4)
00727           && ((val[i+1] & 0xC0) == 0x80)
00728           && ((val[i+2] & 0xC0) == 0x80)
00729           && ((val[i+3] & 0xC0) == 0x80)
00730           && ((val[i+4] & 0xC0) == 0x80))
00731       {
00732         charNum = ((val[i]   & 0x03) << 24)
00733                   |((val[i+1] & 0x3F) << 18)
00734                   |((val[i+2] & 0x3F) << 12)
00735                   |((val[i+3] & 0x3F) <<  6)
00736                   | (val[i+4] & 0x3F);
00737         i += 5;
00738       }
00739       else
00740       {
00741           throw ParameterException(ApplicationError, ulxr_i18n(malformed));
00742       }
00743     }
00744 
00745     else if ((trigger & 0xFE) == 0xFC)
00746     {   // 1111 110x
00747       if (   (remain > 5)
00748           && ((val[i+1] & 0xC0) == 0x80)
00749           && ((val[i+2] & 0xC0) == 0x80)
00750           && ((val[i+3] & 0xC0) == 0x80)
00751           && ((val[i+4] & 0xC0) == 0x80)
00752           && ((val[i+5] & 0xC0) == 0x80))
00753       {
00754         charNum = ((val[i]   & 0x01) << 30)
00755                   |((val[i+1] & 0x3F) << 24)
00756                   |((val[i+2] & 0x3F) << 18)
00757                   |((val[i+3] & 0x3F) << 12)
00758                   |((val[i+4] & 0x3F) <<  6)
00759                   | (val[i+5] & 0x3F);
00760         i += 6;
00761       }
00762       else
00763       {
00764           throw ParameterException(ApplicationError, ulxr_i18n(malformed));
00765       }
00766     }
00767     else
00768       throw ParameterException(ApplicationError, ulxr_i18n(malformed));
00769   }
00770 
00771   else
00772   {
00773     i += 1;
00774     charNum = trigger;
00775   }
00776   return charNum;
00777 }
00778 
00779 
00780 ULXR_API_IMPL(Cpp8BitString) utf8ToAscii(const Cpp8BitString &val)
00781 {
00782   Cpp8BitString ret;
00783 
00784   unsigned i = 0;
00785   while (i < val.length())
00786   {
00787     unsigned charNum = decodeUtf8Group(val, i);
00788 
00789     if (charNum < 0x100)
00790       ret += charNum;
00791 
00792     else
00793       throw ParameterException(ApplicationError, ulxr_i18n(ULXR_PCHAR("Ascii character too big: ")) + HtmlFormHandler::makeHexNumber(charNum));
00794   }
00795 
00796   return ret;
00797 }
00798 
00799 #if defined (ULXR_UNICODE) || defined(ULXR_UNICODE_ONLY_HELPERS)
00800 
00801 ULXR_API_IMPL(Cpp16BitString) utf8ToUnicode(const Cpp8BitString &val)
00802 {
00803  Cpp16BitString ret;
00804 
00805   unsigned i = 0;
00806   while (i < val.length())
00807   {
00808     unsigned charNum = decodeUtf8Group(val, i);
00809 
00810     if (charNum < 0x10000)
00811       ret += charNum;
00812 
00813     else if (charNum < 0x110000)
00814     {
00815 
00816       charNum -= 0x10000;
00817       ret += ((charNum >> 10) + 0xD800);
00818       ret += ((charNum & 0x3FF) + 0xDC00);
00819     }
00820 
00821     else
00822       throw ParameterException(ApplicationError, ulxr_i18n(ULXR_PCHAR("In utf8ToUnicode(), unicode character too big: ")) + HtmlFormHandler::makeHexNumber(charNum));
00823   }
00824 
00825   return ret;
00826 }
00827 
00828 
00829 ULXR_API_IMPL(Cpp8BitString) unicodeToUtf8(const Cpp16BitString &newval)
00830 {
00831   Cpp8BitString val;
00832 #ifdef __BORLANDC__
00833   val.reserve(newval.length());
00834 #endif
00835   for (unsigned i = 0; i < newval.length(); ++i)
00836     val += unicodeToUtf8(newval[i]);
00837 
00838   return val;
00839 }
00840 
00841 #endif
00842 
00843 ULXR_API_IMPL(Cpp8BitString) asciiToUtf8(const Cpp8BitString &newval)
00844 {
00845   Cpp8BitString val;
00846 #ifdef __BORLANDC__
00847   val.reserve(newval.length());
00848 #endif
00849   for (unsigned i = 0; i < newval.length(); ++i)
00850     val += unicodeToUtf8((unsigned char) newval[i]);
00851 
00852   return val;
00853 }
00854 
00855 
00856 ULXR_API_IMPL(Cpp8BitString) unicodeToUtf8(const unsigned c)
00857 {
00858   Cpp8BitString val;
00859   if (c < 0x80)
00860     val += c;
00861 
00862   else if (c < 0x800)
00863   {
00864     val += (0xC0 | (c >> 6));
00865     val += (0x80 | (c & 0x3F));
00866   }
00867 
00868 // FIXME: surrogates ??
00869   else if (c < 0x10000)
00870   {
00871     val += (0xE0 |  (c >> 12));
00872     val += (0x80 | ((c >>  6) & 0x3F));
00873     val += (0x80 |  (c & 0x3F));
00874   }
00875 /*
00876     else if (c < 0xFFFF)
00877     {
00878       val += (0xF0 | (c >> 12));
00879       val += (0x80 | ((c >> 6) & 0x3F));
00880       val += (0x80 | (c & 0x3F));
00881     }            else if (c < 0xFFFF)
00882     {
00883       val += (0xF0 | (c >> 12));
00884       val += (0x80 | ((c >> 6) & 0x3F));
00885       val += (0x80 | (c & 0x3F));
00886     }
00887 
00888     else if (c < 0xFFFF)
00889     {
00890       val += (0xF8 | (c >> 12));
00891       val += (0x80 | ((c >> 6) & 0x3F));
00892       val += (0x80 | (c & 0x3F));
00893     }
00894     else if (c < 0xFFFF)
00895     {
00896       val += (0xFC | (c >> 12));
00897       val += (0x80 | ((c >> 6) & 0x3F));
00898       val += (0x80 | (c & 0x3F));
00899     }
00900 */
00901   else
00902     throw ParameterException(ApplicationError, ulxr_i18n(ULXR_PCHAR("unicodeToUtf8(): unicode character too big: ")) + HtmlFormHandler::makeHexNumber(c));
00903 
00904   return val;
00905 }
00906 
00907 #if defined(HAVE_ICONV_H) || defined(HAVE_ICONV)
00908 
00909 ULXR_API_IMPL(Cpp8BitString) convertEncoding(const Cpp8BitString &val,
00910                                           const char *to_encoding,
00911                                           const char *from_encoding)
00912 {
00913   iconv_t con;
00914   if ((ssize_t) (con = iconv_open(to_encoding, from_encoding)) < 0)
00915     throw ParameterException(UnsupportedEncodingError,
00916                              ulxr_i18n(ULXR_PCHAR("Unsupported encoding (iconv_open() reported error)")));
00917 
00918   Cpp8BitString ret;
00919   try
00920   {
00921     ret = convertEncoding(val, con);
00922   }
00923   catch(...)
00924   {
00925     iconv_close(con);
00926     throw;
00927   }
00928   iconv_close(con);
00929   return ret;
00930 }
00931 
00932 
00933 ULXR_API_IMPL(Cpp8BitString) convertEncoding(const Cpp8BitString &val, iconv_t con)
00934 {
00935   Cpp8BitString ret;
00936   char buffer[200];
00937   bool convert = true;
00938   unsigned in_offset = 0;
00939   while (convert && val.length() > in_offset)
00940   {
00941     size_t outbytes = sizeof(buffer);
00942     char *outbuf = buffer;
00943     const char *inbuf = const_cast<char*>(val.data())+in_offset;
00944     size_t inbytes = val.length()-in_offset;
00945     if ((int)iconv(con, (ICONV_CONST char**)&inbuf, &inbytes, &outbuf, &outbytes) <  0)
00946     {
00947       // a bit dangerous, output buffer must take at least a complete
00948       // character sequence, e.g. utf8 has at most 6 bytes.
00949       if (errno != E2BIG)
00950         throw ParameterException(SystemError,
00951                                  ulxr_i18n(ULXR_PCHAR("Error while converting string (iconv() reported error)")));
00952     }
00953     else
00954       convert = false;
00955 
00956     ret.append(buffer, sizeof(buffer)-outbytes);
00957     in_offset += (val.length()-in_offset)-inbytes;
00958   }
00959   return ret;
00960 }
00961 
00962 
00963 ULXR_API_IMPL(Cpp8BitString) encodingToUtf8(const Cpp8BitString &val, const char *encoding)
00964 {
00965   return convertEncoding(val, "UTF-8", encoding);
00966 }
00967 
00968 
00969 ULXR_API_IMPL(Cpp8BitString) utf8ToEncoding(const Cpp8BitString &val, const char *encoding)
00970 {
00971   return convertEncoding(val, encoding, "UTF-8");
00972 }
00973 
00974 #endif
00975 
00976 
00977 ULXR_API_IMPL(void) makeLower( CppString &str)
00978 {
00979   for (unsigned i= 0; i < str.length(); ++i)
00980 #ifdef ULXR_UNICODE
00981     if (str[i] <= 0xff)   // TODO: handle correctly ??
00982 #endif
00983       str[i] = ulxr_tolower(str[i]);
00984 }
00985 
00986 
00987 ULXR_API_IMPL(void) makeUpper( CppString &str)
00988 {
00989   for (unsigned i= 0; i < str.length(); ++i)
00990 #ifdef ULXR_UNICODE
00991     if (str[i] <= 0xff)   // TODO: handle correctly ??
00992 #endif
00993       str[i] = ulxr_toupper(str[i]);
00994 }
00995 
00996 
00997 ULXR_API_IMPL(std::string) getWbXmlExtInt(long int i)
00998 {
00999   std::string s;
01000   s = (char) WbXmlParser::wbxml_EXT_T_0;
01001   if (i <= (1 << 8))
01002     s += (unsigned char) i;
01003   else if (i <= (1 << (8+7)))
01004   {
01005     s += (unsigned char) (i >> 7) & 0x7F | 0x80;
01006     s += (unsigned char) i & 0x7F;
01007   }
01008   else if (i <= (1 << (8+7+7)))
01009   {
01010     s += (unsigned char) (i >> (7+7)) & 0x7F | 0x80;
01011     s += (unsigned char) (i >> 7) & 0x7F | 0x80;
01012     s += (unsigned char) i & 0x7F;
01013   }
01014   else if (i <= (1 << (8+7+7+7)))
01015   {
01016     s += (unsigned char) (i >> (7+7+7)) & 0x7F | 0x80;
01017     s += (unsigned char) (i >> (7+7)) & 0x7F | 0x80;
01018     s += (unsigned char) (i >> 7) & 0x7F | 0x80;
01019     s += (unsigned char) i & 0x7F;
01020   }
01021   else
01022   {
01023     s += (unsigned char) (i >> (7+7+7+7)) & 0x7F | 0x80;
01024     s += (unsigned char) (i >> (7+7+7)) & 0x7F | 0x80;
01025     s += (unsigned char) (i >> (7+7)) & 0x7F | 0x80;
01026     s += (unsigned char) (i >> 7) & 0x7F | 0x80;
01027     s += (unsigned char) i & 0x7F;
01028   }
01029   return s;
01030 }
01031 
01032 
01033 ULXR_API_IMPL(long int) wbXmlToInt(std::string &inp)
01034 {
01035   if (inp.length() < 2)
01036     throw ParameterException(ApplicationError,
01037                              ulxr_i18n(ULXR_PCHAR("wbXmlToInt(): wbxml input string shorter than 2 characters")));
01038 
01039   if (inp[0] != (char) WbXmlParser::wbxml_EXT_T_0)
01040     throw ParameterException(ApplicationError,
01041                              ulxr_i18n(ULXR_PCHAR("wbXmlToInt(): token EXT_T_0 not found at the beginning")));
01042 
01043   unsigned idx = 1;
01044   bool cont = true;
01045   long int i = 0;
01046   while (cont) // at most 5 octets
01047   {
01048     if (idx >= 5+1) // 5 * 7 = 35 bits
01049       throw ParameterException(ApplicationError,
01050                                ulxr_i18n(ULXR_PCHAR("wbXmlToInt(): multibyte sequence is too long for an <int32>")));
01051     if (idx >= inp.length())
01052       throw ParameterException(ApplicationError,
01053                                ulxr_i18n(ULXR_PCHAR("wbXmlToInt(): end of multibyte sequence not found")));
01054     i <<= 7;
01055     unsigned val = (unsigned char) inp[idx];
01056     i |= 0x7f & val;
01057     ++idx;
01058     cont = (0x80 & val) != 0; // last byte: missing bit 7
01059   }
01060   inp.erase(0, idx);
01061   return i;
01062 }
01063 
01064 
01065 ULXR_API_IMPL(std::string) getWbXmlString(const CppString &inp)
01066 {
01067   std::string s;
01068   s = (char) WbXmlParser::wbxml_STR_I;
01069 // TODO: transform in case of '\0' in string
01070 #ifdef ULXR_UNICODE
01071   s += unicodeToUtf8(inp) + '\0';
01072 #else
01073   s += inp + '\0';
01074 #endif
01075   return s;
01076 }
01077 
01078 
01079 ULXR_API_IMPL(CppString) wbXmlToString(std::string &inp)
01080 {
01081   if (inp[0] != WbXmlParser::wbxml_STR_I)
01082     throw ParameterException(ApplicationError,
01083                              ulxr_i18n(ULXR_PCHAR("wbXmlToString(): token wbxml_STR_I not found at the beginning")));
01084 
01085   if (inp.length() < 2)
01086     throw ParameterException(ApplicationError,
01087                              ulxr_i18n(ULXR_PCHAR("wbXmlToString(): wbxml input string shorter than 2 characters")));
01088 
01089   std::string s;
01090   unsigned idx = 1;
01091   bool cont = true;
01092   while (cont)
01093   {
01094     if (idx >= inp.length())
01095       throw ParameterException(ApplicationError,
01096                                ulxr_i18n(ULXR_PCHAR("wbXmlToString(): end of inline string not found")));
01097 
01098     unsigned val = (unsigned char) inp[idx];
01099     if (val == 0)
01100       cont = false;
01101     else
01102       s += char(val);
01103     ++idx;
01104   }
01105   inp.erase(0, idx);
01106 
01107 #ifdef ULXR_UNICODE
01108   return utf8ToUnicode(s);
01109 #else
01110   return s;
01111 #endif
01112 }
01113 
01114 
01115 ULXR_API_IMPL(CppString) binaryDebugOutput (const std::string &s)
01116 {
01117   CppString ret;
01118   unsigned cnt = 0;
01119   for (unsigned i = 0; i < s.length(); ++i)
01120   {
01121     ret += ulxr::HtmlFormHandler::makeHexNumber((unsigned char) s[i]) + ULXR_PCHAR(" ");
01122     if (++cnt >= 20)
01123     {
01124       ret += ULXR_PCHAR("\n");
01125       cnt = 0;
01126     }
01127   }
01128   if (cnt != 0)
01129     ret += ULXR_PCHAR("\n");
01130 
01131   return ret;
01132 }
01133 
01134 
01135 #ifndef ULXR_OMIT_REENTRANT_PROTECTOR
01136 ULXR_API_IMPL(Mutex) randMutex;
01137 ULXR_API_IMPL(Mutex) strerrorMutex;
01138 ULXR_API_IMPL(Mutex) ctimeMutex;
01139 ULXR_API_IMPL(Mutex) localtimeMutex;
01140 ULXR_API_IMPL(Mutex) gethostbynameMutex;
01141 ULXR_API_IMPL(Mutex) gethostbyaddrMutex;
01142 #endif
01143 
01144 
01145 ULXR_API_IMPL(int) getRand()
01146 {
01147 #ifndef ULXR_OMIT_REENTRANT_PROTECTOR
01148   Mutex::Locker lock(randMutex);
01149 #endif
01150 
01151   return std::rand();
01152 }
01153 
01154 
01155 ULXR_API_IMPL(CppString) getLastErrorString(unsigned errornum)
01156 {
01157 #ifndef ULXR_OMIT_REENTRANT_PROTECTOR
01158   Mutex::Locker lock(strerrorMutex);
01159 #endif
01160 
01161   return ULXR_GET_STRING(std::strerror(errornum));
01162 }
01163 
01164 
01165 namespace {
01166 
01167 std::auto_ptr<TcpIpConnection> l4jconn;
01168 std::auto_ptr<Log4JSender>     l4j;
01169 
01170 }
01171 
01172 
01173 ULXR_API_IMPL(Log4JSender*) getLogger4J()
01174 {
01175   return l4j.get();
01176 }
01177 
01178 
01179 ULXR_API_IMPL(void) intializeLog4J(const std::string &appname, const CppString &loghost)
01180 {
01181   std::string s = appname;
01182   unsigned pos;
01183 #ifdef __unix__
01184   while ((pos = s.find("/")) != std::string::npos)
01185 #else
01186   while ((pos = s.find("\\")) != std::string::npos)
01187 #endif
01188     s.erase(0, pos+1);
01189   l4jconn.reset(new TcpIpConnection(false, loghost, 4448));
01190   l4j.reset(new Log4JSender(ULXR_GET_STRING(s), *l4jconn.get()));
01191 }
01192 
01193 
01194 namespace
01195 {
01196   bool pretty_xml  = false;
01197 }
01198 
01199 
01200 ULXR_API_IMPL(void) enableXmlPrettyPrint(bool pretty)
01201 {
01202   pretty_xml = pretty;
01203 }
01204 
01205 
01206 ULXR_API_IMPL(CppString) getXmlLinefeed()
01207 {
01208   static const CppString empty_LF = ULXR_PCHAR("");
01209   static const CppString normal_LF = ULXR_PCHAR("\n");
01210 
01211   if (pretty_xml)
01212     return normal_LF;
01213 
01214   else
01215     return empty_LF;
01216 }
01217 
01218 
01219 ULXR_API_IMPL(CppString) getXmlIndent(unsigned indent)
01220 {
01221   static const CppString empty_Indent = ULXR_PCHAR("");
01222 
01223   if (pretty_xml)
01224     return CppString(indent, ' ');
01225 
01226   else
01227     return empty_Indent;
01228 }
01229 
01230 
01231 }  // namespace ulxr
01232 

Generated on Sun Aug 19 20:08:57 2007 for ulxmlrpcpp by  doxygen 1.5.1