xtag.c

Go to the documentation of this file.
00001 //===========================================================================
00002 //Copyright (C) 2005 Commonwealth Scientific and Industrial Research
00003 //                   Organisation (CSIRO) Australia
00004 //
00005 //Redistribution and use in source and binary forms, with or without
00006 //modification, are permitted provided that the following conditions
00007 //are met:
00008 //
00009 //- Redistributions of source code must retain the above copyright
00010 //  notice, this list of conditions and the following disclaimer.
00011 //
00012 //- Redistributions in binary form must reproduce the above copyright
00013 //  notice, this list of conditions and the following disclaimer in the
00014 //  documentation and/or other materials provided with the distribution.
00015 //
00016 //- Neither the name of Zentaro Kavanagh nor the names of contributors 
00017 //  may be used to endorse or promote products derived from this software 
00018 //  without specific prior written permission.
00019 //
00020 //THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00021 //``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00022 //LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00023 //PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE ORGANISATION OR
00024 //CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00025 //EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00026 //PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00027 //PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00028 //LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00029 //NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00030 //SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 //===========================================================================
00032 
00033 
00034 #include "stdafx.h"
00035 
00036 #include <ctype.h>
00037 #include <string.h>
00038 #include <stdarg.h>
00039 #include <stdio.h>
00040 #include <stdlib.h>
00041 
00042 #include "xlist.h"
00043 
00044 #undef XTAG_DEBUG
00045 
00046 #undef FALSE
00047 #undef TRUE
00048 
00049 #define FALSE (0)
00050 #define TRUE (!FALSE)
00051 
00052 #undef MIN
00053 #define MIN(a,b) ((a)<(b)?(a):(b))
00054 
00055 #undef MAX
00056 #define MAX(a,b) ((a)>(b)?(a):(b))
00057 
00058 typedef struct _XTag XTag;
00059 typedef struct _XAttribute XAttribute;
00060 typedef struct _XTagParser XTagParser;
00061 
00062 #define XTAG_INTERNAL
00063 #include "xtag.h"
00064 
00065 /*
00066  * struct _XTag is kind of a union ... it normally represents a whole
00067  * tag (and its children), but it could alternatively represent some
00068  * PCDATA. Basically, if tag->pcdata is non-NULL, interpret only it and
00069  * ignore the name, attributes and inner_tags.
00070  */
00071 struct _XTag {
00072   char * name;
00073   char * pcdata;
00074   XTag * parent;
00075   XList * attributes;
00076   XList * children;
00077   XList * current_child;
00078 };
00079 
00080 struct _XAttribute {
00081   char * name;
00082   char * value;
00083 };
00084 
00085 struct _XTagParser {
00086   int valid; /* boolean */
00087   XTag * current_tag;
00088   char * start;
00089   char * end;
00090 };
00091 
00092 /* Character classes */
00093 #define X_NONE           0
00094 #define X_WHITESPACE  1<<0
00095 #define X_OPENTAG     1<<1
00096 #define X_CLOSETAG    1<<2
00097 #define X_DQUOTE      1<<3
00098 #define X_SQUOTE      1<<4
00099 #define X_EQUAL       1<<5
00100 #define X_SLASH       1<<6
00101 
00102 static int
00103 xtag_cin (char c, int char_class)
00104 {
00105   if (char_class & X_WHITESPACE)
00106     if (isspace(c)) return TRUE;
00107 
00108   if (char_class & X_OPENTAG)
00109     if (c == '<') return TRUE;
00110 
00111   if (char_class & X_CLOSETAG)
00112     if (c == '>') return TRUE;
00113 
00114   if (char_class & X_DQUOTE)
00115     if (c == '"') return TRUE;
00116 
00117   if (char_class & X_SQUOTE)
00118     if (c == '\'') return TRUE;
00119 
00120   if (char_class & X_EQUAL)
00121     if (c == '=') return TRUE;
00122 
00123   if (char_class & X_SLASH)
00124     if (c == '/') return TRUE;
00125 
00126   return FALSE;
00127 }
00128 
00129 static int
00130 xtag_index (XTagParser * parser, int char_class)
00131 {
00132   char * s;
00133   int i;
00134 
00135   s = parser->start;
00136 
00137   for (i = 0; s[i] && s != parser->end; i++) {
00138     if (xtag_cin(s[i], char_class)) return i;
00139   }
00140 
00141   return -1;
00142 }
00143 
00144 static void
00145 xtag_skip_over (XTagParser * parser, int char_class)
00146 {
00147   char * s;
00148   int i;
00149 
00150   if (!parser->valid) return;
00151 
00152   s = (char *)parser->start;
00153 
00154   for (i = 0; s[i] && s != parser->end; i++) {
00155     if (!xtag_cin(s[i], char_class)) {
00156       parser->start = &s[i];
00157       return;
00158     }
00159   }
00160 
00161   return;
00162 }
00163 
00164 static void
00165 xtag_skip_whitespace (XTagParser * parser)
00166 {
00167   xtag_skip_over (parser, X_WHITESPACE);
00168 }
00169 
00170 #if 0
00171 static void
00172 xtag_skip_to (XTagParser * parser, int char_class)
00173 {
00174   char * s;
00175   int i;
00176 
00177   if (!parser->valid) return;
00178 
00179   s = (char *)parser->start;
00180 
00181   for (i = 0; s[i] && s != parser->end; i++) {
00182     if (xtag_cin(s[i], char_class)) {
00183       parser->start = &s[i];
00184       return;
00185     }
00186   }
00187 
00188   return;  
00189 }
00190 #endif
00191 
00192 static char *
00193 xtag_slurp_to (XTagParser * parser, int good_end, int bad_end)
00194 {
00195   char * s, * ret;
00196   int xi;
00197 
00198   if (!parser->valid) return NULL;
00199 
00200   s = parser->start;
00201 
00202   xi = xtag_index (parser, good_end | bad_end);
00203 
00204   if (xi > 0 && xtag_cin (s[xi], good_end)) {
00205     ret = malloc ((xi+1) * sizeof(char));
00206     strncpy (ret, s, xi);
00207     ret[xi] = '\0';
00208     parser->start = &s[xi];
00209     return ret;
00210   }
00211 
00212   return NULL;
00213 }
00214 
00215 static int
00216 xtag_assert_and_pass (XTagParser * parser, int char_class)
00217 {
00218   char * s;
00219 
00220   if (!parser->valid) return FALSE;
00221 
00222   s = parser->start;
00223 
00224   if (!xtag_cin (s[0], char_class)) {
00225     parser->valid = FALSE;
00226     return FALSE;
00227   }
00228 
00229   parser->start = &s[1];
00230 
00231   return TRUE;
00232 }
00233 
00234 static char *
00235 xtag_slurp_quoted (XTagParser * parser)
00236 {
00237   char * s, * ret;
00238   int quote = X_DQUOTE; /* quote char to match on */
00239   int xi;
00240 
00241   if (!parser->valid) return NULL;
00242 
00243   xtag_skip_whitespace (parser);
00244 
00245   s = parser->start;
00246 
00247   if (xtag_cin (s[0], X_SQUOTE)) quote = X_SQUOTE;
00248 
00249   if (!xtag_assert_and_pass (parser, quote)) return NULL;
00250 
00251   s = parser->start;
00252 
00253   for (xi = 0; s[xi]; xi++) {
00254     if (xtag_cin (s[xi], quote)) {
00255       if (!(xi > 1 && s[xi-1] == '\\')) break;
00256     }
00257   }
00258 
00259   ret = malloc ((xi+1) * sizeof(char));
00260   strncpy (ret, s, xi);
00261   ret[xi] = '\0';
00262   parser->start = &s[xi];
00263 
00264   if (!xtag_assert_and_pass (parser, quote)) return NULL;
00265 
00266   return ret;
00267 }
00268 
00269 static XAttribute *
00270 xtag_parse_attribute (XTagParser * parser)
00271 {
00272   XAttribute * attr;
00273   char * name, * value;
00274   char * s;
00275 
00276   if (!parser->valid) return NULL;
00277 
00278   xtag_skip_whitespace (parser);
00279  
00280   name = xtag_slurp_to (parser, X_WHITESPACE | X_EQUAL, X_SLASH | X_CLOSETAG);
00281 
00282   if (name == NULL) return NULL;
00283 
00284   xtag_skip_whitespace (parser);
00285   s = parser->start;
00286 
00287   if (!xtag_assert_and_pass (parser, X_EQUAL)) {
00288 #ifdef XTAG_DEBUG
00289     printf ("xtag: attr failed EQUAL on <%s>\n", name);
00290 #endif
00291     goto err_free_name;
00292   }
00293 
00294   xtag_skip_whitespace (parser);
00295 
00296   value = xtag_slurp_quoted (parser);
00297 
00298   if (value == NULL) {
00299 #ifdef XTAG_DEBUG
00300     printf ("Got NULL quoted attribute value\n");
00301 #endif
00302     goto err_free_name;
00303   }
00304 
00305   attr = malloc (sizeof (*attr));
00306   attr->name = name;
00307   attr->value = value;
00308 
00309   return attr;
00310 
00311  err_free_name:
00312   free (name);
00313 
00314   parser->valid = FALSE;
00315 
00316   return NULL;
00317 }
00318 
00319 static XTag *
00320 xtag_parse_tag (XTagParser * parser)
00321 {
00322   XTag * tag, * inner;
00323   XAttribute * attr;
00324   char * name;
00325   char * pcdata;
00326   char * s;
00327 
00328   if (!parser->valid) return NULL;
00329 
00330   if ((pcdata = xtag_slurp_to (parser, X_OPENTAG, X_NONE)) != NULL) {
00331     tag = malloc (sizeof (*tag));
00332     tag->name = NULL;
00333     tag->pcdata = pcdata;
00334     tag->parent = parser->current_tag;
00335     tag->attributes = NULL;
00336     tag->children = NULL;
00337     tag->current_child = NULL;
00338 
00339     return tag;
00340   }
00341 
00342   s = parser->start;
00343 
00344   /* if this starts a close tag, return NULL and let the parent take it */
00345   if (xtag_cin (s[0], X_OPENTAG) && xtag_cin (s[1], X_SLASH))
00346     return NULL;
00347 
00348   if (!xtag_assert_and_pass (parser, X_OPENTAG)) return NULL;
00349 
00350   name = xtag_slurp_to (parser, X_WHITESPACE | X_SLASH | X_CLOSETAG, X_NONE);
00351 
00352   if (name == NULL) return NULL;
00353 
00354 #ifdef XTAG_DEBUG
00355   printf ("<%s ...\n", name);
00356 #endif
00357 
00358   tag = malloc (sizeof (*tag));
00359   tag->name = name;
00360   tag->pcdata = NULL;
00361   tag->parent = parser->current_tag;
00362   tag->attributes = NULL;
00363   tag->children = NULL;
00364   tag->current_child = NULL;
00365 
00366   s = parser->start;
00367 
00368   if (xtag_cin (s[0], X_WHITESPACE)) {
00369     while ((attr = xtag_parse_attribute (parser)) != NULL) {
00370       tag->attributes = xlist_append (tag->attributes, attr);
00371     }
00372   }
00373 
00374   xtag_skip_whitespace (parser);
00375 
00376   s = parser->start;
00377 
00378   if (xtag_cin (s[0], X_CLOSETAG)) {
00379     parser->current_tag = tag;
00380 
00381     xtag_assert_and_pass (parser, X_CLOSETAG);
00382 
00383     while ((inner = xtag_parse_tag (parser)) != NULL) {
00384       tag->children = xlist_append (tag->children, inner);
00385     }
00386 
00387     xtag_skip_whitespace (parser);
00388 
00389     xtag_assert_and_pass (parser, X_OPENTAG);
00390     xtag_assert_and_pass (parser, X_SLASH);
00391     name = xtag_slurp_to (parser, X_WHITESPACE | X_CLOSETAG, X_NONE);
00392     if (name) {
00393       if (name && tag->name && strcmp (name, tag->name)) {
00394 #ifdef XTAG_DEBUG
00395         printf ("got %s expected %s\n", name, tag->name);
00396 #endif
00397         parser->valid = FALSE;
00398       }
00399       free (name);
00400     }
00401 
00402     xtag_skip_whitespace (parser);
00403     xtag_assert_and_pass (parser, X_CLOSETAG);
00404 
00405   } else {
00406     xtag_assert_and_pass (parser, X_SLASH);
00407     xtag_assert_and_pass (parser, X_CLOSETAG);
00408   }
00409 
00410 
00411   return tag;
00412 }
00413 
00414 XTag *
00415 xtag_free (XTag * xtag)
00416 {
00417   XList * l;
00418   XAttribute * attr;
00419   XTag * child;
00420 
00421   if (xtag == NULL) return NULL;
00422 
00423   if (xtag->name) free (xtag->name);
00424   if (xtag->pcdata) free (xtag->pcdata);
00425 
00426   for (l = xtag->attributes; l; l = l->next) {
00427     if ((attr = (XAttribute *)l->data) != NULL) {
00428       if (attr->name) free (attr->name);
00429       if (attr->value) free (attr->value);
00430       free (attr);
00431     }
00432   }
00433   xlist_free (xtag->attributes);
00434 
00435   for (l = xtag->children; l; l = l->next) {
00436     child = (XTag *)l->data;
00437     xtag_free (child);
00438   }
00439   xlist_free (xtag->children);
00440 
00441   free (xtag);
00442 
00443   return NULL;
00444 }
00445 
00446 XTag *
00447 xtag_new_parse (const char * s, int n)
00448 {
00449   XTagParser parser;
00450   XTag * tag, * ttag, * wrapper;
00451 
00452   parser.valid = TRUE;
00453   parser.current_tag = NULL;
00454   parser.start = (char *)s;
00455 
00456   if (n == -1)
00457     parser.end = NULL;
00458   else if (n == 0)
00459     return NULL;
00460   else
00461     parser.end = (char *)&s[n];
00462 
00463   tag = xtag_parse_tag (&parser);
00464 
00465   if (!parser.valid) {
00466     xtag_free (tag);
00467     return NULL;
00468   }
00469 
00470   if ((ttag = xtag_parse_tag (&parser)) != NULL) {
00471 
00472     if (!parser.valid) {
00473       xtag_free (ttag);
00474       return tag;
00475     }
00476 
00477     wrapper = malloc (sizeof (XTag));
00478     wrapper->name = NULL;
00479     wrapper->pcdata = NULL;
00480     wrapper->parent = NULL;
00481     wrapper->attributes = NULL;
00482     wrapper->children = NULL;
00483     wrapper->current_child = NULL;
00484 
00485     wrapper->children = xlist_append (wrapper->children, tag);
00486     wrapper->children = xlist_append (wrapper->children, ttag);
00487 
00488     while ((ttag = xtag_parse_tag (&parser)) != NULL) {
00489 
00490       if (!parser.valid) {
00491         xtag_free (ttag);
00492         return wrapper;
00493       }
00494 
00495       wrapper->children = xlist_append (wrapper->children, ttag);
00496     }
00497     return wrapper;
00498   }
00499 
00500   return tag;
00501 }
00502 
00503 char *
00504 xtag_get_name (XTag * xtag)
00505 {
00506   return xtag ? xtag->name : NULL;
00507 }
00508 
00509 char *
00510 xtag_get_pcdata (XTag * xtag)
00511 {
00512   XList * l;
00513   XTag * child;
00514 
00515   if (xtag == NULL) return NULL;
00516 
00517   for (l = xtag->children; l; l = l->next) {
00518     child = (XTag *)l->data;
00519     if (child->pcdata != NULL) {
00520       return child->pcdata;
00521     }
00522   }
00523 
00524   return NULL;
00525 }
00526 
00527 char *
00528 xtag_get_attribute (XTag * xtag, char * attribute)
00529 {
00530   XList * l;
00531   XAttribute * attr;
00532 
00533   if (xtag == NULL) return NULL;
00534 
00535   for (l = xtag->attributes; l; l = l->next) {
00536     if ((attr = (XAttribute *)l->data) != NULL) {
00537       if (attr->name && attribute && !strcmp (attr->name, attribute))
00538         return attr->value;
00539     }
00540   }
00541 
00542   return NULL;
00543 }
00544 
00545 XTag *
00546 xtag_first_child (XTag * xtag, char * name)
00547 {
00548   XList * l;
00549   XTag * child;
00550 
00551   if (xtag == NULL) return NULL;
00552 
00553   if ((l = xtag->children) == NULL) return NULL;
00554 
00555   if (name == NULL) {
00556     xtag->current_child = l;
00557     return (XTag *)l->data;
00558   }
00559 
00560   for (; l; l = l->next) {
00561     child = (XTag *)l->data;
00562 
00563     if (child->name && name && !strcmp(child->name, name)) {
00564       xtag->current_child = l;
00565       return child;
00566     }
00567   }
00568 
00569   xtag->current_child = NULL;
00570 
00571   return NULL;
00572 }
00573 
00574 XTag *
00575 xtag_next_child (XTag * xtag, char * name)
00576 {
00577   XList * l;
00578   XTag * child;
00579 
00580   if (xtag == NULL) return NULL;
00581 
00582   if ((l = xtag->current_child) == NULL)
00583     return xtag_first_child (xtag, name);
00584 
00585   if ((l = l->next) == NULL)
00586     return NULL;
00587 
00588   if (name == NULL) {
00589     xtag->current_child = l;
00590     return (XTag *)l->data;
00591   }
00592 
00593   for (; l; l = l->next) {
00594     child = (XTag *)l->data;
00595 
00596     if (child->name && name && !strcmp(child->name, name)) {
00597       xtag->current_child = l;
00598       return child;
00599     }
00600   }
00601 
00602   xtag->current_child = NULL;
00603 
00604   return NULL;
00605 }
00606 
00607 /*
00608  * This snprints function takes a variable list of char *, the last of
00609  * which must be NULL, and prints each in turn to buf.
00610  * Returns C99-style total length that would have been written, even if
00611  * this is larger than n.
00612  */
00613 static int
00614 xtag_snprints (char * buf, int n, ...)
00615 {
00616   va_list ap;
00617   char * s;
00618   int len, to_copy, total = 0;
00619 
00620   va_start (ap, n);
00621   
00622   for (s = va_arg (ap, char *); s; s = va_arg (ap, char *)) {
00623     len = (int) strlen (s);
00624 
00625     if ((to_copy = MIN (n, len)) > 0) {
00626       memcpy (buf, s, to_copy);
00627       buf += to_copy;
00628       n -= to_copy;
00629     }
00630 
00631     total += len;
00632   }
00633 
00634   va_end (ap);
00635 
00636   return total;
00637 }
00638 
00639 int
00640 xtag_snprint (char * buf, int n, XTag * xtag)
00641 {
00642   int nn, written = 0;
00643   XList * l;
00644   XAttribute * attr;
00645   XTag * child;
00646 
00647 #define FORWARD(N) \
00648   buf += MIN (n, N); \
00649   n = MAX (n-N, 0);  \
00650   written += N;
00651 
00652   if (xtag == NULL) {
00653     if (n > 0) buf[0] = '\0';
00654     return 0;
00655   }
00656 
00657   if (xtag->pcdata) {
00658     nn = xtag_snprints (buf, n, xtag->pcdata, NULL);
00659     FORWARD(nn);
00660 
00661     return written;
00662   }
00663 
00664   if (xtag->name) {
00665     nn = xtag_snprints (buf, n, "<", xtag->name, NULL);
00666     FORWARD(nn);
00667 
00668     for (l = xtag->attributes; l; l = l->next) {
00669       attr = (XAttribute *)l->data;
00670       
00671       nn = xtag_snprints (buf, n, " ", attr->name, "=\"", attr->value, "\"",
00672                           NULL);
00673       FORWARD(nn);
00674     }
00675     
00676     if (xtag->children == NULL) {
00677       nn = xtag_snprints (buf, n, "/>", NULL);
00678       FORWARD(nn);
00679 
00680       return written;
00681     }
00682     
00683     nn = xtag_snprints (buf, n, ">", NULL);
00684     FORWARD(nn);
00685   }
00686 
00687   for (l = xtag->children; l; l = l->next) {
00688     child = (XTag *)l->data;
00689 
00690     nn = xtag_snprint (buf, n, child);
00691     FORWARD(nn);
00692   }
00693 
00694   if (xtag->name) {
00695     nn = xtag_snprints (buf, n, "</", xtag->name, ">", NULL);
00696     FORWARD(nn);
00697   }
00698 
00699   return written;
00700 }
00701 

Generated on Tue Feb 15 14:54:13 2005 for oggdsf by  doxygen 1.3.9