Fri Feb 13 13:56:12 UTC 2009

Author: leigh123linux

Update of /cvs/pkgs/rpms/html2text/F-10
In directory cvs1.fedora.phx.redhat.com:/tmp/cvs-serv23281/F-10

Modified Files:
	.cvsignore sources 
Added Files:
	200-close-files-inside-main-loop.patch 
	400-remove-builtin-http-support.patch 500-utf8-support.patch 
	510-disable-backspaces.patch 600-multiple-meta-tags.patch 
	611-recognize-input-encoding.patch 
	630-recode-output-to-locale-charset.patch 
	800-replace-zeroes-with-null.patch 
	810-fix-deprecated-conversion-warnings.patch html2text.spec 
	import.log 
Log Message:


200-close-files-inside-main-loop.patch:

--- NEW FILE 200-close-files-inside-main-loop.patch ---

Close every file after processing, not at the end of program.
Index: html2text-1.3.2a/html2text.C
===================================================================
--- html2text-1.3.2a.orig/html2text.C	2008-07-13 16:13:16.364186789 +0300
+++ html2text-1.3.2a/html2text.C	2008-07-13 16:13:36.175939260 +0300
@@ -376,6 +376,7 @@
     );
 
     if (parser.yyparse() != 0) exit(1);
+	uis.close();
   }
 
   return 0;


400-remove-builtin-http-support.patch:

--- NEW FILE 400-remove-builtin-http-support.patch ---

Remove limited built-in http support.
Index: html2text-1.3.2a/html2text.C
===================================================================
--- html2text-1.3.2a.orig/html2text.C	2008-09-20 14:02:03.030954214 +0300
+++ html2text-1.3.2a/html2text.C	2008-09-20 14:03:39.181744957 +0300
@@ -36,18 +36,20 @@
 
 
 #include <iostream>
+#include <fstream>
 #include <string.h>
 #include <stdlib.h>
 
 #include "html.h"
 #include "HTMLControl.h"
-#include "urlistream.h"
+//#include "urlistream.h"
 #include "format.h"
 
 #define stringify(x) stringify2(x)
 #define stringify2(x) #x
 
 /* ------------------------------------------------------------------------- */
+using std::ifstream;
 
 class MyParser : public HTMLControl {
 
@@ -55,7 +57,7 @@
   enum { PRINT_AS_ASCII, UNPARSE, SYNTAX_CHECK };
 
   MyParser(
-    urlistream &is_,
+    istream &is_,
     bool       debug_scanner_,
     bool       debug_parser_,
     ostream    &os_,
@@ -352,15 +354,14 @@
     }
 
     istream    *isp;
-    urlistream uis;
+    ifstream     uis;
 
     uis.open(input_url);
     if (!uis.is_open()) {
       std::cerr
-        << "Opening input URL \""
+        << "Cannot open input file \""
 	<< input_url
-        << "\": "
-        << uis.open_error()
+        << "\"."
         << std::endl;
       exit(1);
     }
Index: html2text-1.3.2a/HTMLControl.h
===================================================================
--- html2text-1.3.2a.orig/HTMLControl.h	2008-09-20 14:01:44.527185508 +0300
+++ html2text-1.3.2a/HTMLControl.h	2008-09-20 14:03:39.181744957 +0300
@@ -38,7 +38,6 @@
 /* ------------------------------------------------------------------------- */
 
 #include "HTMLParser.h"
-#include "urlistream.h"
 #include <istream>
 
 using std::istream;
@@ -48,7 +47,7 @@
 class HTMLControl : public HTMLParser {
 
 public:
-  HTMLControl(urlistream &is_, bool debug_scanner_, bool debug_parser_) :
+  HTMLControl(istream &is_, bool debug_scanner_, bool debug_parser_) :
     HTMLParser(),
     current_line(1),
     current_column(0),
@@ -84,7 +83,7 @@
 
   bool debug_scanner;
 
-  urlistream &is;
+  istream &is;
   int     ungotten_chars[5];
   int     number_of_ungotten_chars;
 };
Index: html2text-1.3.2a/Makefile.in
===================================================================
--- html2text-1.3.2a.orig/Makefile.in	2008-09-20 14:01:44.571185514 +0300
+++ html2text-1.3.2a/Makefile.in	2008-09-20 14:03:39.181744957 +0300
@@ -68,7 +68,7 @@
 	@echo '"/usr/local/bin", "/usr/local/man/man1" and "/usr/local/man/man5").';
 	@echo
 
-OBJS = html2text.o html.o HTMLControl.o HTMLParser.o Area.o format.o sgml.o table.o urlistream.o Properties.o cmp_nocase.o
+OBJS = html2text.o html.o HTMLControl.o HTMLParser.o Area.o format.o sgml.o table.o Properties.o cmp_nocase.o
 
 html2text : $(OBJS) $(LIBSTDCXX_LIBS)
 	$(CXX) $(LDFLAGS) $(OBJS) $(LOADLIBES) $(LDLIBS) -o $@


500-utf8-support.patch:

--- NEW FILE 500-utf8-support.patch ---

Support UTF-8 encoding when processing input.
Index: html2text-1.3.2a/Area.C
===================================================================
--- html2text-1.3.2a.orig/Area.C	2008-09-20 14:01:44.259190763 +0300
+++ html2text-1.3.2a/Area.C	2008-09-20 14:06:15.255782998 +0300
@@ -36,10 +36,13 @@
 #include <iostream>
 
 #include "Area.h"
+#include "html.h"
 #include "string.h"
 
 #define LATIN1_nbsp 160
 
+extern int use_encoding;
+
 /* ------------------------------------------------------------------------- */
 
 #define malloc_array(type, size)\
@@ -81,6 +84,53 @@
 
 /* ------------------------------------------------------------------------- */
 
+/*           utf_length() and utf_width()       
+ *
+ *     Very simplified algorithm of calculating length of UTF-8
+ *   string. No check for errors. Counting only ASCII bytes and
+ *   leading bytes of UTF-8 multibyte sequences. All bytes like
+ *   10xxxxxx are dropped. If USE_UTF8 is false then returns
+ *   usual length.               --YS
+ */
+
+size_t utf8_aux_count(char ch)
+{
+	if((ch & 0xe0) == 0xc0)
+	{
+		return 1;
+	}
+	else if((ch & 0xf0) == 0xe0)
+	{
+		return 2;
+	}
+	else if ((ch & 0xf8) == 0xf0)
+	{
+		return 3;
+	}
+	else
+	{
+		return 0;
+	}
+}
+
+unsigned int
+Line::utf_length(size_type f, size_type t) const
+{
+	size_type m = (t < length_ ? t : length_);
+	size_type r = m - f;
+	if(USE_UTF8)
+	{
+		for (int i = f; i < m; i++)
+		{
+			char& ch = cells_[i].character;
+			size_type aux_count = utf8_aux_count(ch);
+			r -= aux_count;
+			i += aux_count;
+		}
+	}
+	return r;
+}
+
 void
 Line::resize(size_type l)
 {
@@ -236,6 +286,28 @@
   return *this;
 }
 
+unsigned int
+Area::utf_width()
+{
+  size_type r = width_;
+  if(USE_UTF8) { r = 0;
+    for (size_type yy = 0; yy < height_; yy++) {
+	  int i = width_ - 1;
+      while((i >= 0) && isspace(cells_[yy][i].character))
+	  {
+		  --i;
+	  }
+      size_type aux_count_sum = 0;
+      for (; i >= 0; i--) {
+		aux_count_sum += utf8_aux_count(cells_[yy][i].character);
+      }
+	  size_type r1 = width_ - aux_count_sum;
+      if(r < r1) r = r1;
+    }
+  }
+  return r;
+}
+
 void
 Area::resize(size_type w, size_type h)
 {
@@ -439,7 +511,7 @@
       char c = p->character;
       char a = p->attribute;
 
-      if (c == (char) LATIN1_nbsp) c = ' ';
+      if (c == (char) LATIN1_nbsp && !USE_UTF8) c = ' ';
 
       if (a == Cell::NONE) {
         os << c;
Index: html2text-1.3.2a/Area.h
===================================================================
--- html2text-1.3.2a.orig/Area.h	2008-09-20 14:01:44.295185701 +0300
+++ html2text-1.3.2a/Area.h	2008-09-20 14:06:15.255782998 +0300
@@ -81,6 +81,8 @@
   Cell       &operator[](size_type x)       { return cells_[x]; }
   const Cell *cells() const { return cells_; }
 
+  unsigned int utf_length(size_type f, size_type t) const;
+
   void resize(size_type l);
   void enlarge(size_type l) { if (l > length_) resize(l); }
 
@@ -134,6 +136,8 @@
   Cell       *operator[](size_type y)       { return cells_[y]; }
   const Area &operator>>=(size_type rs);
 
+  unsigned int utf_width();
+
   void resize(size_type w, size_type h);
   void enlarge(size_type w, size_type h);
 
Index: html2text-1.3.2a/format.C
===================================================================
--- html2text-1.3.2a.orig/format.C	2008-09-20 14:01:44.311190459 +0300
+++ html2text-1.3.2a/format.C	2008-09-20 14:06:15.259781132 +0300
@@ -1210,6 +1210,7 @@
     }
 
     Line::size_type to = from + 1;
+    int to_from;
 
     Line::size_type lbp = (Line::size_type) -1; // "Last break position".
 
@@ -1238,18 +1239,20 @@
         to++;
       }
 
-      if (to - from > w && lbp != (Area::size_type) -1) { to = lbp; break; }
+      if (line.utf_length(from,to) > w && lbp != (Area::size_type) -1) 
+                    { to = lbp; break; }
     }
 
+    to_from = line.utf_length(from,to);
     /*
      * Copy the "from...to" range from the "line" to the bottom of the "res"
      * Area.
      */
     Area::size_type x = 0;
     Area::size_type len = to - from;
-    if (halign == Area::LEFT || len >= w) { ;                   } else
-    if (halign == Area::CENTER)           { x += (w - len) / 2; } else
-    if (halign == Area::RIGHT)            { x += w - len;       }
+    if (halign == Area::LEFT || to_from >= w) { ;                   } else
+    if (halign == Area::CENTER)           { x += (w - to_from) / 2; } else
+    if (halign == Area::RIGHT)            { x += w - to_from;       }
     res->insert(line.cells() + from, len, x, res->height());
 
     /*
Index: html2text-1.3.2a/html2text.C
===================================================================
--- html2text-1.3.2a.orig/html2text.C	2008-09-20 14:03:39.181744957 +0300
+++ html2text-1.3.2a/html2text.C	2008-09-20 14:06:15.259781132 +0300
@@ -150,9 +150,10 @@
   -o <file>      Redirect output into <file>\n\
   -nobs          Do not use backspaces for boldface and underlining\n\
   -ascii         Use plain ASCII for output instead of ISO-8859-1\n\
+  -utf8          Assume both terminal and input stream are in UTF-8 mode\n\
 ";
 
-int use_iso8859 = 1;
+int use_encoding = ISO8859;
 
 int
 main(int argc, char **argv)
@@ -201,7 +202,8 @@
     if (!strcmp(arg, "-width"        )) { width = atoi(argv[++i]);       } else
     if (!strcmp(arg, "-o"            )) { output_file_name = argv[++i];  } else
     if (!strcmp(arg, "-nobs"         )) { use_backspaces = false;        } else
-    if (!strcmp(arg, "-ascii"        )) { use_iso8859 = false;           } else
+    if (!strcmp(arg, "-ascii"        )) { use_encoding = ASCII;          } else
+    if (!strcmp(arg, "-utf8"         )) { use_encoding = UTF8;           } else
     {
       std::cerr
 	<< "Unrecognized command line option \""
Index: html2text-1.3.2a/html.h
===================================================================
--- html2text-1.3.2a.orig/html.h	2008-09-20 14:01:44.343193129 +0300
+++ html2text-1.3.2a/html.h	2008-09-20 14:06:15.259781132 +0300
@@ -61,6 +61,11 @@
 
 /* ------------------------------------------------------------------------- */
 
+enum {ASCII, ISO8859, UTF8};
+#define USE_ISO8859 (use_encoding == ISO8859)
+#define USE_ASCII (use_encoding == ASCII)
+#define USE_UTF8 (use_encoding == UTF8)
+
 #define LATIN1_nbsp   160
 #define LATIN1_iexcl  161
 #define LATIN1_cent   162
Index: html2text-1.3.2a/sgml.C
===================================================================
--- html2text-1.3.2a.orig/sgml.C	2008-09-20 14:01:44.391192735 +0300
+++ html2text-1.3.2a/sgml.C	2008-09-20 14:06:15.259781132 +0300
@@ -62,261 +62,280 @@
   char name[8];
   int  iso8859code;
   char *asciistr;
+  unsigned long unicode;
 } entities[] = {
-  { "AElig",   LATIN1_AElig,  "AE"         },
-  { "AMP",     0,             "&"          },
-  { "Aacute",  LATIN1_Aacute, "A'"         },
-  { "Acirc",   LATIN1_Acirc,  "A^"         },
-  { "Agrave",  LATIN1_Agrave, "A`"         },
-  { "Alpha",   0,             "A"          },
-  { "Aring",   LATIN1_Aring,  "AA"         },
-  { "Atilde",  LATIN1_Atilde, "A~"         },
-  { "Auml",    LATIN1_Auml,   "A\""        },
-  { "Beta",    0,             "B"          },
-  { "Ccedil",  LATIN1_Ccedil, "C,"         },
-  { "Chi",     0,             "H"          },
-  { "Dagger",  0,             "++"         },
-  { "Delta",   0,             "D"          },
-  { "ETH",     LATIN1_ETH,    "D-"         },
-  { "Eacute",  LATIN1_Eacute, "E'"         },
-  { "Ecirc",   LATIN1_Ecirc,  "E^"         },
-  { "Egrave",  LATIN1_Egrave, "E`"         },
-  { "Epsilon", 0,             "E"          },
-  { "Eta",     0,             "E"          },
-  { "Euml",    LATIN1_Euml,   "E\""        },
-  { "GT",      0,             ">"          },
-  { "Gamma",   0,             "G"          },
-  { "Iacute",  LATIN1_Iacute, "I'"         },
-  { "Icirc",   LATIN1_Icirc,  "I^"         },
-  { "Igrave",  LATIN1_Igrave, "I`"         },
-  { "Iota",    0,             "I"          },
-  { "Iuml",    LATIN1_Iuml,   "I\""        },
-  { "Kappa",   0,             "K"          },
-  { "LT",      0,             "<"          },
-  { "Lambda",  0,             "L"          },
-  { "Mu",      0,             "M"          },
-  { "Ntilde",  LATIN1_Ntilde, "N~"         },
-  { "Nu",      0,             "N"          },
-  { "OElig",   0,             "OE"         },
-  { "Oacute",  LATIN1_Oacute, "O'"         },
-  { "Ocirc",   LATIN1_Ocirc,  "O^"         },
-  { "Ograve",  LATIN1_Ograve, "O`"         },
-  { "Omega",   0,             "O"          },
-  { "Omicron", 0,             "O"          },
-  { "Oslash",  LATIN1_Oslash, "O/"         },
-  { "Otilde",  LATIN1_Otilde, "O~"         },
-  { "Ouml",    LATIN1_Ouml,   "O\""        },
-  { "Phi",     0,             "F"          },
-  { "Pi",      0,             "P"          },
-  { "Prime",   0,             "''"         },
-  { "Psi",     0,             "PS"         },
-  { "QUOT",    0,             "\""         },
-  { "Rho",     0,             "R"          },
-  { "Scaron",  0,             "S"          },
-  { "Sigma",   0,             "S"          },
-  { "THORN",   LATIN1_THORN,  "TH"         },
-  { "Tau",     0,             "T"          },
-  { "Theta",   0,             "TH"         },
-  { "Uacute",  LATIN1_Uacute, "U'"         },
-  { "Ucirc",   LATIN1_Ucirc,  "U^"         },
-  { "Ugrave",  LATIN1_Ugrave, "U`"         },
-  { "Upsilon", 0,             "U"          },
-  { "Uuml",    LATIN1_Uuml,   "U\""        },
-  { "Xi",      0,             "X"          },
-  { "Yacute",  LATIN1_Yacute, "Y'"         },
-  { "Yuml",    0,             "Y\""        },
-  { "Zeta",    0,             "Z"          },
-  { "aacute",  LATIN1_aacute, "a'"         },
-  { "acirc",   LATIN1_acirc,  "a^"         },
-  { "acute",   LATIN1_acute,  "'"          },
-  { "aelig",   LATIN1_aelig,  "ae"         },
-  { "agrave",  LATIN1_agrave, "a`"         },
+  { "AElig",   LATIN1_AElig,  "AE",  0x00c6},
+  { "AMP",     0,             "&",   0x0026},
+  { "Aacute",  LATIN1_Aacute, "A'",  0x00c1},
+  { "Acirc",   LATIN1_Acirc,  "A^",  0x00c2},
+  { "Agrave",  LATIN1_Agrave, "A`",  0x00c0},
+  { "Alpha",   0,             "A",   0x0391},
+  { "Aring",   LATIN1_Aring,  "AA",  0x00c5},
+  { "Atilde",  LATIN1_Atilde, "A~",  0x00c3},
+  { "Auml",    LATIN1_Auml,   "A\"", 0x00c4},
+  { "Beta",    0,             "B",   0x0392},
+  { "Ccedil",  LATIN1_Ccedil, "C,",  0x00c7},
+  { "Chi",     0,             "H",   0x03a7},
+  { "Dagger",  0,             "++",  0x2020},
+  { "Delta",   0,             "D",   0x0394},
+  { "ETH",     LATIN1_ETH,    "D-",  0x00d0},
+  { "Eacute",  LATIN1_Eacute, "E'",  0x00c9},
+  { "Ecirc",   LATIN1_Ecirc,  "E^",  0x00ca},
+  { "Egrave",  LATIN1_Egrave, "E`",  0x00c8},
+  { "Epsilon", 0,             "E",   0x0395},
+  { "Eta",     0,             "E",   0x0397},
+  { "Euml",    LATIN1_Euml,   "E\"", 0x00cb},
+  { "GT",      0,             ">",   0x003e},
+  { "Gamma",   0,             "G",   0x0393},
+  { "Iacute",  LATIN1_Iacute, "I'",  0x00cd},
+  { "Icirc",   LATIN1_Icirc,  "I^",  0x00ce},
+  { "Igrave",  LATIN1_Igrave, "I`",  0x00cc},
+  { "Iota",    0,             "I",   0x0399},
+  { "Iuml",    LATIN1_Iuml,   "I\"", 0x00cf},
+  { "Kappa",   0,             "K",   0x039a},
+  { "LT",      0,             "<",   0x003c},
+  { "Lambda",  0,             "L",   0x039b},
+  { "Mu",      0,             "M",   0x039c},
+  { "Ntilde",  LATIN1_Ntilde, "N~",  0x00d1},
+  { "Nu",      0,             "N",   0x039d},
+  { "OElig",   0,             "OE",  0x0152},
+  { "Oacute",  LATIN1_Oacute, "O'",  0x00d3},
+  { "Ocirc",   LATIN1_Ocirc,  "O^",  0x00d4},
+  { "Ograve",  LATIN1_Ograve, "O`",  0x00d2},
+  { "Omega",   0,             "O",   0x03a9},
+  { "Omicron", 0,             "O",   0x039f},
+  { "Oslash",  LATIN1_Oslash, "O/",  0x00d8},
+  { "Otilde",  LATIN1_Otilde, "O~",  0x00d5},
+  { "Ouml",    LATIN1_Ouml,   "O\"", 0x00d6},
+  { "Phi",     0,             "F",   0x03a6},
+  { "Pi",      0,             "P",   0x03a0},
+  { "Prime",   0,             "''",        },
+  { "Psi",     0,             "PS",  0x03a8},
+  { "QUOT",    0,             "\"",        },
+  { "Rho",     0,             "R",   0x03a1},
+  { "Scaron",  0,             "S",   0x0161},
+  { "Sigma",   0,             "S",   0x03a3},
+  { "THORN",   LATIN1_THORN,  "TH",  0x00de},
+  { "Tau",     0,             "T",   0x03a4},
+  { "Theta",   0,             "TH",  0x0398},
+  { "Uacute",  LATIN1_Uacute, "U'",  0x00da},
+  { "Ucirc",   LATIN1_Ucirc,  "U^",  0x00db},
+  { "Ugrave",  LATIN1_Ugrave, "U`",  0x00d9},
+  { "Upsilon", 0,             "U",   0x03a5},
+  { "Uuml",    LATIN1_Uuml,   "U\"", 0x00dc},
+  { "Xi",      0,             "X",   0x039e},
+  { "Yacute",  LATIN1_Yacute, "Y'",  0x00dd},
+  { "Yuml",    0,             "Y\"", 0x0178},
+  { "Zeta",    0,             "Z",   0x0396},
+  { "aacute",  LATIN1_aacute, "a'",  0x00e1},
+  { "acirc",   LATIN1_acirc,  "a^",  0x00e2},
+  { "acute",   LATIN1_acute,  "'",   0x00b4},
+  { "aelig",   LATIN1_aelig,  "ae",  0x00e6},
+  { "agrave",  LATIN1_agrave, "a`",  0x00e0},
   { "alefsym", 0,             "Aleph"      },
-  { "alpha",   0,             "a"          },
+  { "alpha",   0,             "a",   0x03b1},
   { "amp",     0,             "&"          },
   { "and",     0,             "AND"        },
   { "ang",     0,             "-V"         },
   { "apos",    0,             "'"          },
-  { "aring",   LATIN1_aring,  "aa"         },
-  { "asymp",   0,             "~="         },
-  { "atilde",  LATIN1_atilde, "a~"         },
-  { "auml",    LATIN1_auml,   "a\""        },
+  { "aring",   LATIN1_aring,  "aa",  0x00e5},
+  { "asymp",   0,             "~=",  0x2248},
+  { "atilde",  LATIN1_atilde, "a~",  0x00e3},
+  { "auml",    LATIN1_auml,   "a\"", 0x00e5},
   { "bdquo",   0,             "\""         },
-  { "beta",    0,             "b"          },
-  { "brvbar",  LATIN1_brvbar, "|"          },
-  { "bull",    0,             " o "        },
+  { "beta",    0,             "b",   0x03b2},
+  { "brvbar",  LATIN1_brvbar, "|",   0x00a6},
+  { "bull",    0,             " o ", 0x2022},
   { "cap",     0,             "(U"         },
-  { "ccedil",  LATIN1_ccedil, "c,"         },
-  { "cedil",   LATIN1_cedil,  ","          },
-  { "cent",    LATIN1_cent,   "-c-"        },
-  { "chi",     0,             "h"          },
-  { "circ",    0,             "^"          },
+  { "ccedil",  LATIN1_ccedil, "c,",  0x00e7},
+  { "cedil",   LATIN1_cedil,  ",",   0x00b8},
+  { "cent",    LATIN1_cent,   "-c-", 0x00a2},
+  { "chi",     0,             "h",   0x03c7},
+  { "circ",    0,             "^",   0x005e},
 //  { "clubs",   0,             "[clubs]"    },
   { "cong",    0,             "?="         },
-  { "copy",    LATIN1_copy,   "(c)"        },
+  { "copy",    LATIN1_copy,   "(c)", 0x00a9},
   { "crarr",   0,             "<-'"        },
   { "cup",     0,             ")U"         },
-  { "curren",  LATIN1_curren, "CUR"        },
+  { "curren",  LATIN1_curren, "CUR", 0x00a4},
   { "dArr",    0,             "vv"         },
-  { "dagger",  0,             "+"          },
+  { "dagger",  0,             "+",   0x2020},
   { "darr",    0,             "v"          },
-  { "deg",     LATIN1_deg,    "DEG"        },
-  { "delta",   0,             "d"          },
+  { "deg",     LATIN1_deg,    "DEG", 0x00b0},
+  { "delta",   0,             "d",   0x03b4},
 //  { "diams",   0,             "[diamonds]" },
-  { "divide",  LATIN1_divide, "/"          },
-  { "eacute",  LATIN1_eacute, "e'"         },
-  { "ecirc",   LATIN1_ecirc,  "e^"         },
-  { "egrave",  LATIN1_egrave, "e`"         },
+  { "divide",  LATIN1_divide, "/",   0x00f7},
+  { "eacute",  LATIN1_eacute, "e'",  0x00e9},
+  { "ecirc",   LATIN1_ecirc,  "e^",  0x00ea},
+  { "egrave",  LATIN1_egrave, "e`",  0x00e8},
   { "empty",   0,             "{}"         },
-  { "epsilon", 0,             "e"          },
-  { "equiv",   0,             "=="         },
-  { "eta",     0,             "e"          },
-  { "eth",     LATIN1_eth,    "d-"         },
-  { "euml",    LATIN1_euml,   "e\""        },
-  { "euro",    0,             "EUR"        },
+  { "epsilon", 0,             "e",   0x03b5},
+  { "equiv",   0,             "==",  0x2261},
+  { "eta",     0,             "e",   0x03b7},
+  { "eth",     LATIN1_eth,    "d-",  0x00f0},
+  { "euml",    LATIN1_euml,   "e\"", 0x00eb},
+  { "euro",    0,             "EUR", 0x20ac},
   { "exist",   0,             "TE"         },
   { "fnof",    0,             "f"          },
   { "forall",  0,             "FA"         },
-  { "frac12",  LATIN1_frac12, " 1/2"       },
-  { "frac14",  LATIN1_frac14, " 1/4"       },
-  { "frac34",  LATIN1_frac34, " 3/4"       },
+  { "frac12",  LATIN1_frac12, " 1/2",0x00bd},
+  { "frac14",  LATIN1_frac14, " 1/4",0x00bc},
+  { "frac34",  LATIN1_frac34, " 3/4",0x00be},
   { "frasl",   0,             "/"          },
-  { "gamma",   0,             "g"          },
-  { "ge",      0,             ">="         },
-  { "gt",      0,             ">"          },
+  { "gamma",   0,             "g",   0x03b3},
+  { "ge",      0,             ">=",  0x2265},
+  { "gt",      0,             ">",   0x003e},
   { "hArr",    0,             "<=>"        },
   { "harr",    0,             "<->"        },
 //  { "hearts",  0,             "[hearts]"   },
-  { "hellip",  0,             "..."        },
-  { "iacute",  LATIN1_iacute, "i'"         },
-  { "icirc",   LATIN1_icirc,  "i^"         },
-  { "iexcl",   LATIN1_iexcl,  "!"          },
-  { "igrave",  LATIN1_igrave, "i`"         },
+  { "hellip",  0,             "...", 0x2026},
+  { "iacute",  LATIN1_iacute, "i'",  0x00ed},
+  { "icirc",   LATIN1_icirc,  "i^",  0x00ee},
+  { "iexcl",   LATIN1_iexcl,  "!",   0x00a1},
+  { "igrave",  LATIN1_igrave, "i`",  0x00ec},
   { "image",   0,             "Im"         },
-  { "infin",   0,             "oo"         },
-  { "int",     0,             "INT"        },
-  { "iota",    0,             "i"          },
-  { "iquest",  LATIN1_iquest, "?"          },
+  { "infin",   0,             "oo",  0x221e},
+  { "int",     0,             "INT", 0x222b},
+  { "iota",    0,             "i",   0x03b9},
+  { "iquest",  LATIN1_iquest, "?",   0x00bf},
   { "isin",    0,             "(-"         },
-  { "iuml",    LATIN1_iuml,   "i\""        },
-  { "kappa",   0,             "k"          },
+  { "iuml",    LATIN1_iuml,   "i\"", 0x00ef},
+  { "kappa",   0,             "k",   0x03ba},
   { "lArr",    0,             "<="         },
-  { "lambda",  0,             "l"          },
+  { "lambda",  0,             "l",   0x03bb},
   { "lang",    0,             "</"         },
   { "laquo",   LATIN1_laquo,  "<<"         },
-  { "larr",    0,             "<-"         },
+  { "larr",    0,             "<-",  0x2190},
 //  { "lceil",   0,             "<|"         },
   { "ldquo",   0,             "\""         },
-  { "le",      0,             "<="         },
+  { "le",      0,             "<=",  0x2264},
 //  { "lfloor",  0,             "|<"         },
   { "lowast",  0,             "*"          },
   { "loz",     0,             "<>"         },
   { "lsaquo",  0,             "<"          },
   { "lsquo",   0,             "`"          },
-  { "lt",      0,             "<"          },
-  { "macr",    LATIN1_macr,   "-"          },
+  { "lt",      0,             "<",   0x003c},
+  { "macr",    LATIN1_macr,   "-",   0x00af},
   { "mdash",   0,             "--"         },
-  { "micro",   LATIN1_micro,  "my"         },
-  { "middot",  LATIN1_middot, "."          },
-  { "minus",   0,             "-"          },
-  { "mu",      0,             "m"          },
+  { "micro",   LATIN1_micro,  "my",  0x00b5},
+  { "middot",  LATIN1_middot, ".",   0x00b7},
+  { "minus",   0,             "-",   0x2212},
+  { "mu",      0,             "m",   0x03bc},
   { "nabla",   0,             "Nabla"      },
-  { "nbsp",    LATIN1_nbsp,   " "          },
+  { "nbsp",    LATIN1_nbsp,   " ",   0x00a0},
   { "ndash",   0,             "-"          },
-  { "ne",      0,             "!="         },
+  { "ne",      0,             "!=",  0x2260},
   { "ni",      0,             "-)"         },
   { "not",     LATIN1_not,    "NOT"        },
   { "notin",   0,             "!(-"        },
   { "nsub",    0,             "!(C"        },
-  { "ntilde",  LATIN1_ntilde, "n~"         },
-  { "nu",      0,             "n"          },
-  { "oacute",  LATIN1_oacute, "o'"         },
-  { "ocirc",   LATIN1_ocirc,  "o^"         },
+  { "ntilde",  LATIN1_ntilde, "n~",  0x00f1},
+  { "nu",      0,             "n",   0x03bd},
+  { "oacute",  LATIN1_oacute, "o'",  0x00f3},
+  { "ocirc",   LATIN1_ocirc,  "o^",  0x00f4},
   { "oelig",   0,             "oe"         },
-  { "ograve",  LATIN1_ograve, "o`"         },
+  { "ograve",  LATIN1_ograve, "o`",  0x00f2},
   { "oline",   LATIN1_macr,   "-"          },
-  { "omega",   0,             "o"          },
-  { "omicron", 0,             "o"          },
+  { "omega",   0,             "o",   0x03c9},
+  { "omicron", 0,             "o",   0x03bf},
   { "oplus",   0,             "(+)"        },
   { "or",      0,             "OR"         },
-  { "ordf",    LATIN1_ordf,   "-a"         },
-  { "ordm",    LATIN1_ordm,   "-o"         },
-  { "oslash",  LATIN1_oslash, "o/"         },
-  { "otilde",  LATIN1_otilde, "o~"         },
+  { "ordf",    LATIN1_ordf,   "-a",  0x00aa},
+  { "ordm",    LATIN1_ordm,   "-o",  0x00ba},
+  { "oslash",  LATIN1_oslash, "o/",  0x00f8},
+  { "otilde",  LATIN1_otilde, "o~",  0x00f5},
   { "otimes",  0,             "(x)"        },
-  { "ouml",    LATIN1_ouml,   "o\""        },
-  { "para",    LATIN1_para,   "P:"         },
-  { "part",    0,             "PART"       },
-  { "permil",  0,             " 0/00"      },
+  { "ouml",    LATIN1_ouml,   "o\"", 0x00f6},
+  { "para",    LATIN1_para,   "P:",  0x00b6},
+  { "part",    0,             "PART",0x2202},
+  { "permil",  0,             " 0/00",0x2030},
   { "perp",    0,             "-T"         },
-  { "phi",     0,             "f"          },
-  { "pi",      0,             "p"          },
+  { "phi",     0,             "f",   0x03c6},
+  { "pi",      0,             "p",   0x03c0},
   { "piv",     0,             "Pi"         },
-  { "plusmn",  LATIN1_plusmn, "+/-"        },
-  { "pound",   LATIN1_pound,  "-L-"        },
+  { "plusmn",  LATIN1_plusmn, "+/-", 0x00b1},
+  { "pound",   LATIN1_pound,  "-L-", 0x00a3},
   { "prime",   0,             "'"          },
-  { "prod",    0,             "PROD"       },
+  { "prod",    0,             "PROD",0x220f},
   { "prop",    0,             "0("         },
-  { "psi",     0,             "ps"         },
+  { "psi",     0,             "ps",  0x03c8},
   { "quot",    0,             "\""         },
   { "rArr",    0,             "=>"         },
-  { "radic",   0,             "SQRT"       },
+  { "radic",   0,             "SQRT",0x221a},
   { "rang",    0,             "/>"         },
   { "raquo",   LATIN1_raquo,  ">>"         },
-  { "rarr",    0,             "->"         },
+  { "rarr",    0,             "->",  0x2192},
 //  { "rceil",   0,             ">|"         },
   { "rdquo",   0,             "\""         },
   { "real",    0,             "Re"         },
-  { "reg",     LATIN1_reg,    "(R)"        },
+  { "reg",     LATIN1_reg,    "(R)", 0x00ae},
 //  { "rfloor",  0,             "|>"         },
-  { "rho",     0,             "r"          },
+  { "rho",     0,             "r",   0x03c1},
   { "rsaquo",  0,             ">"          },
   { "rsquo",   0,             "'"          },
   { "sbquo",   0,             "'"          },
-  { "scaron",  0,             "s"          },
+  { "scaron",  0,             "s",   0x0161},
   { "sdot",    0,             "DOT"        },
-  { "sect",    LATIN1_sect,   "S:"         },
+  { "sect",    LATIN1_sect,   "S:",  0x00a7},
   { "shy",     LATIN1_shy,    ""           },
-  { "sigma",   0,             "s"          },
-  { "sigmaf",  0,             "s"          },
+  { "sigma",   0,             "s",   0x03c3},
+  { "sigmaf",  0,             "s",   0x03c2},
   { "sim",     0,             "~"          },
 //  { "spades",  0,             "[spades]"   },
   { "sub",     0,             "(C"         },
   { "sube",    0,             "(_"         },
-  { "sum",     0,             "SUM"        },
+  { "sum",     0,             "SUM", 0x2211},
   { "sup",     0,             ")C"         },
-  { "sup1",    LATIN1_sup1,   "^1"         },
-  { "sup2",    LATIN1_sup2,   "^2"         },
-  { "sup3",    LATIN1_sup3,   "^3"         },
+  { "sup1",    LATIN1_sup1,   "^1",  0x00b9},
+  { "sup2",    LATIN1_sup2,   "^2",  0x00b2},
+  { "sup3",    LATIN1_sup3,   "^3",  0x00b3},
   { "supe",    0,             ")_"         },
-  { "szlig",   LATIN1_szlig,  "ss"         },
-  { "tau",     0,             "t"          },
+  { "szlig",   LATIN1_szlig,  "ss",  0x00df},
+  { "tau",     0,             "t",   0x03c4},
   { "there4",  0,             ".:"         },
-  { "theta",   0,             "th"         },
-  { "thorn",   LATIN1_thorn,  "th"         },
-  { "tilde",   0,             "~"          },
-  { "times",   LATIN1_times,  "x"          },
-  { "trade",   0,             "[TM]"       },
+  { "theta",   0,             "th",  0x03b8},
+  { "thorn",   LATIN1_thorn,  "th",  0x00fe},
+  { "tilde",   0,             "~",   0x02dc},
+  { "times",   LATIN1_times,  "x",   0x00d7},
+  { "trade",   0,             "[TM]",0x2122},
   { "uArr",    0,             "^^"         },
-  { "uacute",  LATIN1_uacute, "u'"         },
+  { "uacute",  LATIN1_uacute, "u'",  0x00fa},
   { "uarr",    0,             "^"          },
-  { "ucirc",   LATIN1_ucirc,  "u^"         },
-  { "ugrave",  LATIN1_ugrave, "u`"         },
-  { "uml",     LATIN1_uml,    "\""         },
-  { "upsilon", 0,             "u"          },
-  { "uuml",    LATIN1_uuml,   "u\""        },
+  { "ucirc",   LATIN1_ucirc,  "u^",  0x00fb},
+  { "ugrave",  LATIN1_ugrave, "u`",  0x00f9},
+  { "uml",     LATIN1_uml,    "\"",  0x00a8},
+  { "upsilon", 0,             "u",   0x03c5},
+  { "uuml",    LATIN1_uuml,   "u\"", 0x00fc},
   { "weierp",  0,             "P"          },
-  { "xi",      0,             "x"          },
-  { "yacute",  LATIN1_yacute, "y'"         },
-  { "yen",     LATIN1_yen,    "YEN"        },
-  { "yuml",    LATIN1_yuml,   "y\""        },
-  { "zeta",    0,             "z"          },
+  { "xi",      0,             "x",   0x03be},
+  { "yacute",  LATIN1_yacute, "y'",  0x00fd},
+  { "yen",     LATIN1_yen,    "YEN", 0x00a5},
+  { "yuml",    LATIN1_yuml,   "y\"", 0x00ff},
+  { "zeta",    0,             "z",   0x03b6},
 };
 
-extern int use_iso8859;
+extern int use_encoding;
 
 /* ------------------------------------------------------------------------- */
 
+char ubuf[4];
+
+char *mkutf(unsigned long x)
+{
+  memset(ubuf, 0, 4);
+  if(x < 128) ubuf[0] = x;
+  else if(x < 0x800) {
+     ubuf[0] = (0xc0 | ((x >> 6) & 0x1f));
+     ubuf[1] = (0x80 | (x & 0x3f));
+  }
+  else {
+     ubuf[0] = (0xe0 | ((x >> 12) & 0x0f));
+     ubuf[1] = (0x80 | ((x >> 6) & 0x3f));
+     ubuf[2] = (0x80 | (x & 0x3f));
+  }
+  return ubuf;
+}
+
 void
 replace_sgml_entities(string *s)
 {
@@ -330,9 +349,9 @@
      */
     while (j < l && s->at(j) != '&') ++j;
     /*
-     * We could convert high-bit chars to "é" here if use_iso8859
-     * is off, then let them be translated or not.  Is the purpose of
-     * !use_iso8859 to allow SGML entities to be seen, or to strongly
+     * We could convert high-bit chars to "é" here if USE_ASCII
+     * is on, then let them be translated or not.  Is the purpose of
+     * USE_ASCII to allow SGML entities to be seen, or to strongly
      * filter against high-ASCII chars that might blow up a terminal
      * that doesn't speak ISO8859?  For the moment, "allow SGML entities
      * to be seen" -- no filtering here.
@@ -370,7 +389,11 @@
           if (!isdigit(c)) break;
           x = 10 * x + c - '0';
         }
-        if (use_iso8859 || (x < 128)) {
+        if (USE_UTF8) {
+          s->replace(beg, j - beg, mkutf(x));
+          j = beg + 1;
+        }
+        else if (USE_ISO8859 && (x < 256) || USE_ASCII && (x < 128)) {
         s->replace(beg, j - beg, 1, (char) x);
         j = beg + 1;
         } else {
@@ -408,13 +431,17 @@
         (int (*)(const void *, const void *)) strcmp
       );
       if (entity != NULL) {
-        if (use_iso8859 && entity->iso8859code) {
+        if (USE_ISO8859 && entity->iso8859code) {
           s->replace(beg, j - beg, 1, (char) entity->iso8859code);
           j = beg + 1;
-        } else if (entity->asciistr) {
+        } else if (USE_ASCII && entity->asciistr) {
           s->replace(beg, j - beg, entity->asciistr);
         j = beg + 1;
         } /* else don't replace it at all, we don't have a translation */
+        else if(USE_UTF8 && entity->unicode) {
+        s->replace(beg, j - beg, mkutf(entity->unicode));
+        j = beg + 1;
+        }
       }
     } else {
       ;                         /* EXTENSION: Allow literal '&' sometimes. */
Index: html2text-1.3.2a/table.C
===================================================================
--- html2text-1.3.2a.orig/table.C	2008-09-20 14:01:44.415186916 +0300
+++ html2text-1.3.2a/table.C	2008-09-20 14:06:15.259781132 +0300
@@ -175,7 +175,7 @@
           - (*number_of_columns_return - 1) * (column_spacing + 0),
           Area::LEFT // Yields better results than "p->halign"!
         ));
-	p->width = tmp.get() ? tmp->width() : 0;
+	p->width = tmp.get() ? tmp->utf_width() : 0;
       }
       p->minimized = false;
 
@@ -308,7 +308,7 @@
 	left_of_column + old_column_width - 1,
 	Area::LEFT // Yields better results than "lc.halign"!
       ));
-      w = tmp->width();
+      w = tmp->utf_width();
       if (w >= left_of_column + old_column_width) lc.minimized = true;
     }
     if (w > left_of_column + new_column_width) {


510-disable-backspaces.patch:

--- NEW FILE 510-disable-backspaces.patch ---

Don't use backspaces.
Index: html2text-1.3.2a/html2text.C
===================================================================
--- html2text-1.3.2a.orig/html2text.C	2008-09-20 00:36:07.709605411 +0300
+++ html2text-1.3.2a/html2text.C	2008-09-20 00:50:20.938879802 +0300
@@ -188,7 +188,7 @@
   const char *style            = "compact";
   int        width             = 79;
   const char *output_file_name = "-";
-  bool       use_backspaces    = true;
+  bool       use_backspaces    = false;
 
   int i;
   for (i = 1; i < argc && argv[i][0] == '-' && argv[i][1]; i++) {


600-multiple-meta-tags.patch:

--- NEW FILE 600-multiple-meta-tags.patch ---

Recognize all <meta> tags, not just one.
Index: html2text-1.3.2a/HTMLParser.C
===================================================================
--- html2text-1.3.2a.orig/HTMLParser.C	2008-09-15 21:11:00.658490953 +0300
+++ html2text-1.3.2a/HTMLParser.C	2008-09-15 21:16:14.078550975 +0300
@@ -911,28 +911,28 @@
 #if YY_HTMLParser_DEBUG != 0
 static const short yyrline[] = { 0,
    273,   304,   309,   312,   315,   319,   322,   326,   329,   333,
-   336,   339,   342,   345,   353,   361,   365,   368,   373,   376,
-   379,   384,   392,   396,   399,   407,   415,   420,   423,   426,
-   431,   442,   446,   454,   458,   461,   466,   471,   475,   478,
-   481,   487,   493,   499,   505,   510,   519,   520,   527,   527,
-   534,   534,   541,   541,   550,   554,   557,   563,   570,   575,
-   582,   591,   600,   604,   607,   611,   617,   623,   631,   637,
-   645,   650,   653,   658,   662,   665,   670,   678,   686,   690,
-   693,   701,   705,   708,   714,   721,   727,   737,   742,   747,
-   749,   750,   751,   752,   753,   761,   763,   764,   765,   766,
-   767,   768,   769,   770,   773,   775,   776,   777,   778,   779,
-   780,   781,   784,   795,   800,   808,   814,   819,   824,   832,
-   836,   840,   848,   852,   855,   861,   867,   873,   881,   886,
-   891,   901,   903,   904,   905,   906,   907,   910,   912,   913,
-   914,   915,   916,   921,   921,   922,   922,   923,   923,   924,
-   924,   926,   926,   927,   927,   929,   929,   930,   930,   931,
-   931,   932,   932,   933,   933,   934,   934,   935,   935,   936,
-   936,   937,   937,   938,   938,   939,   939,   940,   940,   941,
-   941,   942,   942,   943,   943,   944,   944,   945,   945,   946,
-   946,   947,   947,   948,   948,   949,   949,   950,   950,   951,
-   951,   952,   952,   953,   953,   954,   954,   955,   955,   956,
-   956,   957,   957,   958,   958,   959,   959,   960,   960,   961,
-   961,   963,   963
+   336,   339,   344,   347,   355,   363,   367,   370,   375,   378,
+   381,   386,   394,   398,   401,   409,   417,   422,   425,   428,
+   433,   444,   448,   456,   460,   463,   468,   473,   477,   480,
+   483,   489,   495,   501,   507,   512,   521,   522,   529,   529,
+   536,   536,   543,   543,   552,   556,   559,   565,   572,   577,
+   584,   593,   602,   606,   609,   613,   619,   625,   633,   639,
+   647,   652,   655,   660,   664,   667,   672,   680,   688,   692,
+   695,   703,   707,   710,   716,   723,   729,   739,   744,   749,
+   751,   752,   753,   754,   755,   763,   765,   766,   767,   768,
+   769,   770,   771,   772,   775,   777,   778,   779,   780,   781,
+   782,   783,   786,   797,   802,   810,   816,   821,   826,   834,
+   838,   842,   850,   854,   857,   863,   869,   875,   883,   888,
+   893,   903,   905,   906,   907,   908,   909,   912,   914,   915,
+   916,   917,   918,   923,   923,   924,   924,   925,   925,   926,
+   926,   928,   928,   929,   929,   931,   931,   932,   932,   933,
+   933,   934,   934,   935,   935,   936,   936,   937,   937,   938,
+   938,   939,   939,   940,   940,   941,   941,   942,   942,   943,
+   943,   944,   944,   945,   945,   946,   946,   947,   947,   948,
+   948,   949,   949,   950,   950,   951,   951,   952,   952,   953,
+   953,   954,   954,   955,   955,   956,   956,   957,   957,   958,
+   958,   959,   959,   960,   960,   961,   961,   962,   962,   963,
+   963,   965,   965
 };
 
 static const char * const yytname[] = {   "$","error","$illegal.","DOCTYPE",
@@ -2044,17 +2044,19 @@
 case 12:
 #line 339 "HTMLParser.y"
 {
-    (yyval.document = yyvsp[-1].document)->head.meta_attributes.reset(yyvsp[0].tag_attributes);
+    auto_ptr<Meta> s(new Meta);
+    s->attributes.reset(yyvsp[0].tag_attributes);
+    (yyval.document = yyvsp[-1].document)->head.metas.push_back(s);
   ;
     break;}
 case 13:
-#line 342 "HTMLParser.y"
+#line 344 "HTMLParser.y"
 {
     (yyval.document = yyvsp[-1].document)->head.link_attributes.reset(yyvsp[0].tag_attributes);
   ;
     break;}
 case 14:
-#line 345 "HTMLParser.y"
+#line 347 "HTMLParser.y"
 {
     auto_ptr<Script> s(new Script);
     s->attributes.reset(yyvsp[0].tag_attributes);
@@ -2065,7 +2067,7 @@
   ;
     break;}
 case 15:
-#line 353 "HTMLParser.y"
+#line 355 "HTMLParser.y"
 {
     auto_ptr<Style> s(new Style);
     s->attributes.reset(yyvsp[0].tag_attributes);
@@ -2076,20 +2078,20 @@
   ;
     break;}
 case 16:
-#line 361 "HTMLParser.y"
+#line 363 "HTMLParser.y"
 {
     delete yyvsp[0].tag_attributes;
     yyval.document = yyvsp[-1].document;
   ;
     break;}
 case 17:
-#line 365 "HTMLParser.y"
+#line 367 "HTMLParser.y"
 {
     yyval.document = yyvsp[-1].document;
   ;
     break;}
 case 18:
-#line 368 "HTMLParser.y"
+#line 370 "HTMLParser.y"
 {
     Paragraph *p = new Paragraph;
     p->texts.reset(yyvsp[0].element_list);
@@ -2097,25 +2099,25 @@
   ;
     break;}
 case 19:
-#line 373 "HTMLParser.y"
+#line 375 "HTMLParser.y"
 {
     (yyval.document = yyvsp[-1].document)->body.content->push_back(auto_ptr<Element>(yyvsp[0].heading));
   ;
     break;}
 case 20:
-#line 376 "HTMLParser.y"
+#line 378 "HTMLParser.y"
 {
     (yyval.document = yyvsp[-1].document)->body.content->push_back(auto_ptr<Element>(yyvsp[0].element));
   ;
     break;}
 case 21:
-#line 379 "HTMLParser.y"
+#line 381 "HTMLParser.y"
 {
     (yyval.document = yyvsp[-1].document)->body.content->push_back(auto_ptr<Element>(yyvsp[0].address));
   ;
     break;}
 case 22:
-#line 385 "HTMLParser.y"
+#line 387 "HTMLParser.y"
 {
     yyval.pcdata = new PCData;
     yyval.pcdata->text = *yyvsp[0].strinG;
@@ -2123,19 +2125,19 @@
   ;
     break;}
 case 23:
-#line 393 "HTMLParser.y"
+#line 395 "HTMLParser.y"
 {
     yyval.element_list = new list<auto_ptr<Element> >;
   ;
     break;}
 case 24:
-#line 396 "HTMLParser.y"
+#line 398 "HTMLParser.y"
 {
     yyval.element_list = yyvsp[-1].element_list;
   ;
     break;}
 case 25:
-#line 399 "HTMLParser.y"
+#line 401 "HTMLParser.y"
 {
     auto_ptr<Script> s(new Script);
     s->attributes.reset(yyvsp[0].tag_attributes);
@@ -2146,7 +2148,7 @@
   ;
     break;}
 case 26:
-#line 407 "HTMLParser.y"
+#line 409 "HTMLParser.y"
 {
     auto_ptr<Style> s(new Style);
     s->attributes.reset(yyvsp[0].tag_attributes);
@@ -2157,7 +2159,7 @@
   ;
     break;}
 case 27:
-#line 415 "HTMLParser.y"
+#line 417 "HTMLParser.y"
 {
     Paragraph *p = new Paragraph;
     p->texts = auto_ptr<list<auto_ptr<Element> > >(yyvsp[0].element_list);
@@ -2165,25 +2167,25 @@
   ;
     break;}
 case 28:
-#line 420 "HTMLParser.y"
+#line 422 "HTMLParser.y"
 {
     (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].heading));
   ;
     break;}
 case 29:
-#line 423 "HTMLParser.y"
+#line 425 "HTMLParser.y"
 {
     (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element));
   ;
     break;}
 case 30:
-#line 426 "HTMLParser.y"
+#line 428 "HTMLParser.y"
 {
     (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].address));
   ;
     break;}
 case 31:
-#line 432 "HTMLParser.y"
+#line 434 "HTMLParser.y"
 {
             /* EXTENSION: Allow paragraph content in heading, not only texts */
     if (yyvsp[-2].heading->level != yyvsp[0].inT) {
@@ -2194,13 +2196,13 @@
   ;
     break;}
 case 32:
-#line 443 "HTMLParser.y"
+#line 445 "HTMLParser.y"
 {
     yyval.element = yyvsp[0].element;
   ;
     break;}
 case 33:
-#line 446 "HTMLParser.y"
+#line 448 "HTMLParser.y"
 {
     Paragraph *p = new Paragraph;
     p->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2209,19 +2211,19 @@
   ;
     break;}
 case 34:
-#line 455 "HTMLParser.y"
+#line 457 "HTMLParser.y"
 {
     yyval.element_list = new list<auto_ptr<Element> >;
   ;
     break;}
 case 35:
-#line 458 "HTMLParser.y"
+#line 460 "HTMLParser.y"
 {
     yyval.element_list = yyvsp[-1].element_list;
   ;
     break;}
 case 36:
-#line 461 "HTMLParser.y"
+#line 463 "HTMLParser.y"
 {
     yyval.element_list = yyvsp[-1].element_list;
     yyval.element_list->splice(yyval.element_list->end(), *yyvsp[0].element_list);
@@ -2229,31 +2231,31 @@
   ;
     break;}
 case 37:
-#line 466 "HTMLParser.y"
+#line 468 "HTMLParser.y"
 {
     (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element));
   ;
     break;}
 case 38:
-#line 472 "HTMLParser.y"
+#line 474 "HTMLParser.y"
 {
     yyval.element = yyvsp[0].element;
   ;
     break;}
 case 39:
-#line 475 "HTMLParser.y"
+#line 477 "HTMLParser.y"
 {
     yyval.element = yyvsp[0].preformatted;
   ;
     break;}
 case 40:
-#line 478 "HTMLParser.y"
+#line 480 "HTMLParser.y"
 {
     yyval.element = yyvsp[0].definition_list;
   ;
     break;}
 case 41:
-#line 481 "HTMLParser.y"
+#line 483 "HTMLParser.y"
 {
     Division *p = new Division;
     p->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2262,7 +2264,7 @@
   ;
     break;}
 case 42:
-#line 487 "HTMLParser.y"
+#line 489 "HTMLParser.y"
 {
     Center *p = new Center;
     delete yyvsp[-2].tag_attributes;       // CENTER has no attributes.
@@ -2271,7 +2273,7 @@
   ;
     break;}
 case 43:
-#line 493 "HTMLParser.y"
+#line 495 "HTMLParser.y"
 {
     delete yyvsp[-2].tag_attributes; // BLOCKQUOTE has no attributes!
     BlockQuote *bq = new BlockQuote;
@@ -2280,7 +2282,7 @@
   ;
     break;}
 case 44:
-#line 499 "HTMLParser.y"
+#line 501 "HTMLParser.y"
 {
     Form *f = new Form;
     f->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2289,7 +2291,7 @@
   ;
     break;}
 case 45:
-#line 505 "HTMLParser.y"
+#line 507 "HTMLParser.y"
 {
     HorizontalRule *h = new HorizontalRule;
     h->attributes.reset(yyvsp[0].tag_attributes);
@@ -2297,7 +2299,7 @@
   ;
     break;}
 case 46:
-#line 510 "HTMLParser.y"
+#line 512 "HTMLParser.y"
 {
     Table *t = new Table;
     t->attributes.reset(yyvsp[-3].tag_attributes);
@@ -2307,11 +2309,11 @@
   ;
     break;}
 case 47:
-#line 520 "HTMLParser.y"
+#line 522 "HTMLParser.y"
 { ++list_nesting; ;
     break;}
 case 48:
-#line 520 "HTMLParser.y"
+#line 522 "HTMLParser.y"
 {
     OrderedList *ol = new OrderedList;
     ol->attributes.reset(yyvsp[-3].tag_attributes);
@@ -2321,11 +2323,11 @@
   ;
     break;}
 case 49:
-#line 527 "HTMLParser.y"
+#line 529 "HTMLParser.y"
 { ++list_nesting; ;
     break;}
 case 50:
-#line 527 "HTMLParser.y"
+#line 529 "HTMLParser.y"
 {
     UnorderedList *ul = new UnorderedList;
     ul->attributes.reset(yyvsp[-3].tag_attributes);
@@ -2335,11 +2337,11 @@
   ;
     break;}
 case 51:
-#line 534 "HTMLParser.y"
+#line 536 "HTMLParser.y"
 { ++list_nesting; ;
     break;}
 case 52:
-#line 534 "HTMLParser.y"
+#line 536 "HTMLParser.y"
 {
     Dir *d = new Dir;
     d->attributes.reset(yyvsp[-3].tag_attributes);
@@ -2349,11 +2351,11 @@
   ;
     break;}
 case 53:
-#line 541 "HTMLParser.y"
+#line 543 "HTMLParser.y"
 { ++list_nesting; ;
     break;}
 case 54:
-#line 541 "HTMLParser.y"
+#line 543 "HTMLParser.y"
 {
     Menu *m = new Menu;
     m->attributes.reset(yyvsp[-3].tag_attributes);
@@ -2363,26 +2365,26 @@
   ;
     break;}
 case 55:
-#line 551 "HTMLParser.y"
+#line 553 "HTMLParser.y"
 {
     yyval.list_items = 0;
   ;
     break;}
 case 56:
-#line 554 "HTMLParser.y"
+#line 556 "HTMLParser.y"
 {
     yyval.list_items = yyvsp[-1].list_items;
   ;
     break;}
 case 57:
-#line 557 "HTMLParser.y"
+#line 559 "HTMLParser.y"
 {
     yyval.list_items = yyvsp[-1].list_items ? yyvsp[-1].list_items : new list<auto_ptr<ListItem> >;
     yyval.list_items->push_back(auto_ptr<ListItem>(yyvsp[0].list_item));
   ;
     break;}
 case 58:
-#line 564 "HTMLParser.y"
+#line 566 "HTMLParser.y"
 {
     ListNormalItem *lni = new ListNormalItem;
     lni->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2391,7 +2393,7 @@
   ;
     break;}
 case 59:
-#line 570 "HTMLParser.y"
+#line 572 "HTMLParser.y"
 {   /* EXTENSION: Handle a "block" in a list as an indented block. */
     ListBlockItem *lbi = new ListBlockItem;
     lbi->block.reset(yyvsp[0].element);
@@ -2399,7 +2401,7 @@
   ;
     break;}
 case 60:
-#line 575 "HTMLParser.y"
+#line 577 "HTMLParser.y"
 {              /* EXTENSION: Treat "texts" in a list as an "<LI>". */
     ListNormalItem *lni = new ListNormalItem;
     lni->flow.reset(yyvsp[0].element_list);
@@ -2407,7 +2409,7 @@
   ;
     break;}
 case 61:
-#line 585 "HTMLParser.y"
+#line 587 "HTMLParser.y"
 {
     delete yyvsp[-4].tag_attributes;
     delete yyvsp[-3].element_list; /* Kludge */
@@ -2415,7 +2417,7 @@
   ;
     break;}
 case 62:
-#line 591 "HTMLParser.y"
+#line 593 "HTMLParser.y"
 {
     DefinitionList *dl = new DefinitionList;
     dl->attributes.reset(yyvsp[-4].tag_attributes);
@@ -2425,33 +2427,33 @@
   ;
     break;}
 case 63:
-#line 601 "HTMLParser.y"
+#line 603 "HTMLParser.y"
 {
     yyval.definition_list_item_list = 0;
   ;
     break;}
 case 64:
-#line 604 "HTMLParser.y"
+#line 606 "HTMLParser.y"
 {
     yyval.definition_list_item_list = yyvsp[0].definition_list_item_list;
   ;
     break;}
 case 65:
-#line 607 "HTMLParser.y"
+#line 609 "HTMLParser.y"
 {
     yyval.definition_list_item_list = yyvsp[-1].definition_list_item_list ? yyvsp[-1].definition_list_item_list : new list<auto_ptr<DefinitionListItem> >;
     yyval.definition_list_item_list->push_back(auto_ptr<DefinitionListItem>(yyvsp[0].term_name));
   ;
     break;}
 case 66:
-#line 611 "HTMLParser.y"
+#line 613 "HTMLParser.y"
 {
     yyval.definition_list_item_list = yyvsp[-1].definition_list_item_list ? yyvsp[-1].definition_list_item_list : new list<auto_ptr<DefinitionListItem> >;
     yyval.definition_list_item_list->push_back(auto_ptr<DefinitionListItem>(yyvsp[0].term_definition));
   ;
     break;}
 case 67:
-#line 618 "HTMLParser.y"
+#line 620 "HTMLParser.y"
 {      /* EXTENSION: Allow "flow" instead of "texts" */
     delete yyvsp[-2].tag_attributes;
     yyval.term_name = new TermName;
@@ -2459,7 +2461,7 @@
   ;
     break;}
 case 68:
-#line 623 "HTMLParser.y"
+#line 625 "HTMLParser.y"
 {/* EXTENSION: Ignore <P> after </DT> */
     delete yyvsp[-4].tag_attributes;
     delete yyvsp[-1].tag_attributes;
@@ -2468,7 +2470,7 @@
   ;
     break;}
 case 69:
-#line 632 "HTMLParser.y"
+#line 634 "HTMLParser.y"
 {
     delete yyvsp[-2].tag_attributes;
     yyval.term_definition = new TermDefinition;
@@ -2476,7 +2478,7 @@
   ;
     break;}
 case 70:
-#line 637 "HTMLParser.y"
+#line 639 "HTMLParser.y"
 {/* EXTENSION: Ignore <P> after </DD> */
     delete yyvsp[-4].tag_attributes;
     delete yyvsp[-1].tag_attributes;
@@ -2485,44 +2487,44 @@
   ;
     break;}
 case 71:
-#line 646 "HTMLParser.y"
+#line 648 "HTMLParser.y"
 {
     yyval.element_list = new list<auto_ptr<Element> >;
     yyval.element_list->push_back(auto_ptr<Element>(yyvsp[0].element));
   ;
     break;}
 case 72:
-#line 650 "HTMLParser.y"
+#line 652 "HTMLParser.y"
 {
     yyval.element_list = yyvsp[-1].element_list;
   ;
     break;}
 case 73:
-#line 653 "HTMLParser.y"
+#line 655 "HTMLParser.y"
 {
     (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element));
   ;
     break;}
 case 74:
-#line 659 "HTMLParser.y"
+#line 661 "HTMLParser.y"
 {
     yyval.element = yyvsp[0].element;
   ;
     break;}
 case 75:
-#line 662 "HTMLParser.y"
+#line 664 "HTMLParser.y"
 {          /* EXTENSION: Allow headings in "flow", i.e. in lists */
     yyval.element = yyvsp[0].heading;
   ;
     break;}
 case 76:
-#line 665 "HTMLParser.y"
+#line 667 "HTMLParser.y"
 {
     yyval.element = yyvsp[0].element;
   ;
     break;}
 case 77:
-#line 671 "HTMLParser.y"
+#line 673 "HTMLParser.y"
 {
     yyval.preformatted = new Preformatted;
     yyval.preformatted->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2530,7 +2532,7 @@
   ;
     break;}
 case 78:
-#line 679 "HTMLParser.y"
+#line 681 "HTMLParser.y"
 {
     yyval.caption = new Caption;
     yyval.caption->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2538,19 +2540,19 @@
   ;
     break;}
 case 79:
-#line 687 "HTMLParser.y"
+#line 689 "HTMLParser.y"
 {
     yyval.table_rows = new list<auto_ptr<TableRow> >;
   ;
     break;}
 case 80:
-#line 690 "HTMLParser.y"
+#line 692 "HTMLParser.y"
 {
     yyval.table_rows = yyvsp[-1].table_rows;
   ;
     break;}
 case 81:
-#line 693 "HTMLParser.y"
+#line 695 "HTMLParser.y"
 {
     TableRow *tr = new TableRow;
     tr->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2559,19 +2561,19 @@
   ;
     break;}
 case 82:
-#line 702 "HTMLParser.y"
+#line 704 "HTMLParser.y"
 {
     yyval.table_cells = new list<auto_ptr<TableCell> >;
   ;
     break;}
 case 83:
-#line 705 "HTMLParser.y"
+#line 707 "HTMLParser.y"
 {
     yyval.table_cells = yyvsp[-1].table_cells;
   ;
     break;}
 case 84:
-#line 708 "HTMLParser.y"
+#line 710 "HTMLParser.y"
 {
     TableCell *tc = new TableCell;
     tc->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2580,7 +2582,7 @@
   ;
     break;}
 case 85:
-#line 714 "HTMLParser.y"
+#line 716 "HTMLParser.y"
 {
                             /* EXTENSION: Allow "</TD>" in place of "</TH>". */
     TableHeadingCell *thc = new TableHeadingCell;
@@ -2590,14 +2592,14 @@
   ;
     break;}
 case 86:
-#line 721 "HTMLParser.y"
+#line 723 "HTMLParser.y"
 {    /* EXTENSION: Ignore <INPUT> between table cells. */
     delete yyvsp[0].tag_attributes;
     yyval.table_cells = yyvsp[-1].table_cells;
   ;
     break;}
 case 87:
-#line 728 "HTMLParser.y"
+#line 730 "HTMLParser.y"
 { /* Should be "address_content"... */
     delete yyvsp[-2].tag_attributes;
     yyval.address = new Address;
@@ -2605,40 +2607,40 @@
   ;
     break;}
 case 88:
-#line 738 "HTMLParser.y"
+#line 740 "HTMLParser.y"
 {
     yyval.element_list = new list<auto_ptr<Element> >;
     yyval.element_list->push_back(auto_ptr<Element>(yyvsp[0].element));
   ;
     break;}
 case 89:
-#line 742 "HTMLParser.y"
+#line 744 "HTMLParser.y"
 {
     (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element));
   ;
     break;}
 case 90:
-#line 748 "HTMLParser.y"
+#line 750 "HTMLParser.y"
 { yyval.element = yyvsp[-1].pcdata; ;
     break;}
 case 91:
-#line 749 "HTMLParser.y"
+#line 751 "HTMLParser.y"
 { yyval.element = yyvsp[-1].element; ;
     break;}
 case 92:
-#line 750 "HTMLParser.y"
+#line 752 "HTMLParser.y"
 { yyval.element = yyvsp[-1].element; ;
     break;}
 case 93:
-#line 751 "HTMLParser.y"
+#line 753 "HTMLParser.y"
 { yyval.element = yyvsp[-1].element; ;
     break;}
 case 94:
-#line 752 "HTMLParser.y"
+#line 754 "HTMLParser.y"
 { yyval.element = yyvsp[-1].element; ;
     break;}
 case 95:
-#line 753 "HTMLParser.y"
+#line 755 "HTMLParser.y"
 { /* EXTENSION: NS 1.1 / IE 2.0 */
     NoBreak *nb = new NoBreak;
     delete yyvsp[-3].tag_attributes;
@@ -2647,75 +2649,75 @@
   ;
     break;}
 case 96:
-#line 762 "HTMLParser.y"
+#line 764 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Font(TT,     yyvsp[-1].element_list); ;
     break;}
 case 97:
-#line 763 "HTMLParser.y"
+#line 765 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Font(I,      yyvsp[-1].element_list); ;
     break;}
 case 98:
-#line 764 "HTMLParser.y"
+#line 766 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Font(B,      yyvsp[-1].element_list); ;
     break;}
 case 99:
-#line 765 "HTMLParser.y"
+#line 767 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Font(U,      yyvsp[-1].element_list); ;
     break;}
 case 100:
-#line 766 "HTMLParser.y"
+#line 768 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Font(STRIKE, yyvsp[-1].element_list); ;
     break;}
 case 101:
-#line 767 "HTMLParser.y"
+#line 769 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Font(BIG,    yyvsp[-1].element_list); ;
     break;}
 case 102:
-#line 768 "HTMLParser.y"
+#line 770 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Font(SMALL,  yyvsp[-1].element_list); ;
     break;}
 case 103:
-#line 769 "HTMLParser.y"
+#line 771 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Font(SUB,    yyvsp[-1].element_list); ;
     break;}
 case 104:
-#line 770 "HTMLParser.y"
+#line 772 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Font(SUP,    yyvsp[-1].element_list); ;
     break;}
 case 105:
-#line 774 "HTMLParser.y"
+#line 776 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(EM,     yyvsp[-1].element_list); ;
     break;}
 case 106:
-#line 775 "HTMLParser.y"
+#line 777 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(STRONG, yyvsp[-1].element_list); ;
     break;}
 case 107:
-#line 776 "HTMLParser.y"
+#line 778 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(DFN,    yyvsp[-1].element_list); ;
     break;}
 case 108:
-#line 777 "HTMLParser.y"
+#line 779 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(CODE,   yyvsp[-1].element_list); ;
     break;}
 case 109:
-#line 778 "HTMLParser.y"
+#line 780 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(SAMP,   yyvsp[-1].element_list); ;
     break;}
 case 110:
-#line 779 "HTMLParser.y"
+#line 781 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(KBD,    yyvsp[-1].element_list); ;
     break;}
 case 111:
-#line 780 "HTMLParser.y"
+#line 782 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(VAR,    yyvsp[-1].element_list); ;
     break;}
 case 112:
-#line 781 "HTMLParser.y"
+#line 783 "HTMLParser.y"
 { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(CITE,   yyvsp[-1].element_list); ;
     break;}
 case 113:
-#line 788 "HTMLParser.y"
+#line 790 "HTMLParser.y"
 {
     delete yyvsp[-2].tag_attributes;
     Anchor *a = new Anchor;
@@ -2725,7 +2727,7 @@
   ;
     break;}
 case 114:
-#line 795 "HTMLParser.y"
+#line 797 "HTMLParser.y"
 {
     Image *i = new Image;
     i->attributes.reset(yyvsp[0].tag_attributes);
@@ -2733,7 +2735,7 @@
   ;
     break;}
 case 115:
-#line 800 "HTMLParser.y"
+#line 802 "HTMLParser.y"
 {
     Applet *a = new Applet;
     a->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2742,7 +2744,7 @@
   ;
     break;}
 case 116:
-#line 808 "HTMLParser.y"
+#line 810 "HTMLParser.y"
 {
     Font2 *f2 = new Font2;
     f2->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2751,7 +2753,7 @@
   ;
     break;}
 case 117:
-#line 814 "HTMLParser.y"
+#line 816 "HTMLParser.y"
 {
     BaseFont *bf = new BaseFont;
     bf->attributes.reset(yyvsp[0].tag_attributes);
@@ -2759,7 +2761,7 @@
   ;
     break;}
 case 118:
-#line 819 "HTMLParser.y"
+#line 821 "HTMLParser.y"
 {
     LineBreak *lb = new LineBreak;
     lb->attributes.reset(yyvsp[0].tag_attributes);
@@ -2767,7 +2769,7 @@
   ;
     break;}
 case 119:
-#line 824 "HTMLParser.y"
+#line 826 "HTMLParser.y"
 {
     Map *m = new Map;
     m->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2776,20 +2778,20 @@
   ;
     break;}
 case 120:
-#line 833 "HTMLParser.y"
+#line 835 "HTMLParser.y"
 {
     yyval.element_list = 0;
   ;
     break;}
 case 121:
-#line 836 "HTMLParser.y"
+#line 838 "HTMLParser.y"
 {
     yyval.element_list = yyvsp[-1].element_list ? yyvsp[-1].element_list : new list<auto_ptr<Element> >;
     yyval.element_list->push_back(auto_ptr<Element>(yyvsp[0].element));
   ;
     break;}
 case 122:
-#line 840 "HTMLParser.y"
+#line 842 "HTMLParser.y"
 {
     yyval.element_list = yyvsp[-1].element_list ? yyvsp[-1].element_list : new list<auto_ptr<Element> >;
     Param *p = new Param;
@@ -2798,26 +2800,26 @@
   ;
     break;}
 case 123:
-#line 849 "HTMLParser.y"
+#line 851 "HTMLParser.y"
 {
     yyval.tag_attributes_list = 0;
   ;
     break;}
 case 124:
-#line 852 "HTMLParser.y"
+#line 854 "HTMLParser.y"
 {
     yyval.tag_attributes_list = yyvsp[-1].tag_attributes_list;
   ;
     break;}
 case 125:
-#line 855 "HTMLParser.y"
+#line 857 "HTMLParser.y"
 {
     yyval.tag_attributes_list = yyvsp[-1].tag_attributes_list ? yyvsp[-1].tag_attributes_list : new list<auto_ptr<list<TagAttribute> > >;
     yyval.tag_attributes_list->push_back(auto_ptr<list<TagAttribute> >(yyvsp[0].tag_attributes));
   ;
     break;}
 case 126:
-#line 862 "HTMLParser.y"
+#line 864 "HTMLParser.y"
 {
     Input *i = new Input;
     i->attributes.reset(yyvsp[0].tag_attributes);
@@ -2825,7 +2827,7 @@
   ;
     break;}
 case 127:
-#line 867 "HTMLParser.y"
+#line 869 "HTMLParser.y"
 {
     Select *s = new Select;
     s->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2834,7 +2836,7 @@
   ;
     break;}
 case 128:
-#line 873 "HTMLParser.y"
+#line 875 "HTMLParser.y"
 {
     TextArea *ta = new TextArea;
     ta->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2843,20 +2845,20 @@
   ;
     break;}
 case 129:
-#line 882 "HTMLParser.y"
+#line 884 "HTMLParser.y"
 {
     yyval.option_list = new list<auto_ptr<Option> >;
     yyval.option_list->push_back(auto_ptr<Option>(yyvsp[0].option));
   ;
     break;}
 case 130:
-#line 886 "HTMLParser.y"
+#line 888 "HTMLParser.y"
 {
     (yyval.option_list = yyvsp[-1].option_list)->push_back(auto_ptr<Option>(yyvsp[0].option));
   ;
     break;}
 case 131:
-#line 892 "HTMLParser.y"
+#line 894 "HTMLParser.y"
 {
     yyval.option = new Option;
     yyval.option->attributes.reset(yyvsp[-2].tag_attributes);
@@ -2864,99 +2866,99 @@
   ;
     break;}
 case 132:
-#line 902 "HTMLParser.y"
+#line 904 "HTMLParser.y"
 { yyval.heading = new Heading; yyval.heading->level = 1; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ;
     break;}
 case 133:
-#line 903 "HTMLParser.y"
+#line 905 "HTMLParser.y"
 { yyval.heading = new Heading; yyval.heading->level = 2; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ;
     break;}
 case 134:
-#line 904 "HTMLParser.y"
+#line 906 "HTMLParser.y"
 { yyval.heading = new Heading; yyval.heading->level = 3; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ;
     break;}
 case 135:
-#line 905 "HTMLParser.y"
+#line 907 "HTMLParser.y"
 { yyval.heading = new Heading; yyval.heading->level = 4; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ;
     break;}
 case 136:
-#line 906 "HTMLParser.y"
+#line 908 "HTMLParser.y"
 { yyval.heading = new Heading; yyval.heading->level = 5; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ;
     break;}
 case 137:
-#line 907 "HTMLParser.y"
+#line 909 "HTMLParser.y"
 { yyval.heading = new Heading; yyval.heading->level = 6; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ;
     break;}
 case 138:
-#line 911 "HTMLParser.y"
+#line 913 "HTMLParser.y"
 { yyval.inT = 1; ;
     break;}
 case 139:
-#line 912 "HTMLParser.y"
+#line 914 "HTMLParser.y"
 { yyval.inT = 2; ;
     break;}
 case 140:
-#line 913 "HTMLParser.y"
+#line 915 "HTMLParser.y"
 { yyval.inT = 3; ;
     break;}
 case 141:
-#line 914 "HTMLParser.y"
+#line 916 "HTMLParser.y"
 { yyval.inT = 4; ;
     break;}
 case 142:
-#line 915 "HTMLParser.y"
+#line 917 "HTMLParser.y"
 { yyval.inT = 5; ;
     break;}
 case 143:
-#line 916 "HTMLParser.y"
+#line 918 "HTMLParser.y"
 { yyval.inT = 6; ;
     break;}
 case 144:
-#line 921 "HTMLParser.y"
+#line 923 "HTMLParser.y"
 { yyval.pcdata = 0; ;
     break;}
 case 145:
-#line 921 "HTMLParser.y"
+#line 923 "HTMLParser.y"
 { yyval.pcdata = yyvsp[0].pcdata; ;
     break;}
 case 146:
-#line 922 "HTMLParser.y"
+#line 924 "HTMLParser.y"
 { yyval.caption = 0; ;
     break;}
 case 147:
-#line 922 "HTMLParser.y"
+#line 924 "HTMLParser.y"
 { yyval.caption = yyvsp[0].caption; ;
     break;}
 case 148:
-#line 923 "HTMLParser.y"
+#line 925 "HTMLParser.y"
 { yyval.element_list = 0; ;
     break;}
 case 149:
-#line 923 "HTMLParser.y"
+#line 925 "HTMLParser.y"
 { yyval.element_list = yyvsp[0].element_list; ;
     break;}
 case 150:
-#line 924 "HTMLParser.y"
+#line 926 "HTMLParser.y"
 { yyval.element_list = 0; ;
     break;}
 case 151:
-#line 924 "HTMLParser.y"
+#line 926 "HTMLParser.y"
 { yyval.element_list = yyvsp[0].element_list; ;
     break;}
 case 152:
-#line 926 "HTMLParser.y"
+#line 928 "HTMLParser.y"
 { yyval.tag_attributes = 0; ;
     break;}
 case 153:
-#line 926 "HTMLParser.y"
+#line 928 "HTMLParser.y"
 { yyval.tag_attributes = yyvsp[0].tag_attributes; ;
     break;}
 case 154:
-#line 927 "HTMLParser.y"
+#line 929 "HTMLParser.y"
 { yyval.tag_attributes = 0; ;
     break;}
 case 155:
-#line 927 "HTMLParser.y"
+#line 929 "HTMLParser.y"
 { yyval.tag_attributes = yyvsp[0].tag_attributes; ;
     break;}
 }
@@ -3158,7 +3160,7 @@
 /* END */
 
 /* #line 891 "/usr/local/lib/bison.cc" */
-#line 965 "HTMLParser.y"
+#line 967 "HTMLParser.y"
  /* } */
 
 /*
Index: html2text-1.3.2a/HTMLParser.y
===================================================================
--- html2text-1.3.2a.orig/HTMLParser.y	2008-09-15 21:11:00.710491137 +0300
+++ html2text-1.3.2a/HTMLParser.y	2008-09-15 21:11:02.082471934 +0300
@@ -337,7 +337,9 @@
     ($$ = $1)->head.base_attributes.reset($2);
   }
   | document_ META {
-    ($$ = $1)->head.meta_attributes.reset($2);
+    auto_ptr<Meta> s(new Meta);
+    s->attributes.reset($2);
+    ($$ = $1)->head.metas.push_back(s);
   }
   | document_ LINK {
     ($$ = $1)->head.link_attributes.reset($2);
Index: html2text-1.3.2a/html.C
===================================================================
--- html2text-1.3.2a.orig/html.C	2008-09-15 21:11:00.726492472 +0300
+++ html2text-1.3.2a/html.C	2008-09-15 21:11:02.086473001 +0300
@@ -68,6 +68,7 @@
 static pack(DefinitionListItem)
 static pack(Script)
 static pack(Style)
+static pack(Meta)
 
 #undef pack
 
@@ -131,12 +132,18 @@
   if (base_attributes.get()) os << "<BASE" << base_attributes << ">" << std::endl;
   foreach(scripts, os, separator);
   foreach(styles, os, separator);
-  if (meta_attributes.get()) os << "<META" << meta_attributes << ">" << std::endl;
+  foreach(metas, os, separator);
   if (link_attributes.get()) os << "<LINK" << link_attributes << ">" << std::endl;
   os << "</HEAD>" << separator;
 }
 
 void
+Meta::unparse(ostream &os, ostream_manipulator separator) const
+{
+  os << "<META" << attributes << ">" << separator;
+}
+
+void
 Script::unparse(ostream &os, ostream_manipulator separator) const
 {
   os
Index: html2text-1.3.2a/html.h
===================================================================
--- html2text-1.3.2a.orig/html.h	2008-09-15 21:11:02.010473259 +0300
+++ html2text-1.3.2a/html.h	2008-09-15 21:11:02.086473001 +0300
@@ -436,13 +436,19 @@
   void unparse(ostream &, ostream_manipulator separator) const;
 };
 
+struct Meta {
+  auto_ptr<list<TagAttribute> > attributes;    // HTTP-EQUIV NAME CONTENT
+
+  void unparse(ostream &, ostream_manipulator separator) const;
+};
+
 struct Head {
   auto_ptr<PCData>              title;
   auto_ptr<list<TagAttribute> > isindex_attributes; // PROMPT
   auto_ptr<list<TagAttribute> > base_attributes;    // HREF
   list<auto_ptr<Script> >       scripts;
   list<auto_ptr<Style> >        styles;
-  auto_ptr<list<TagAttribute> > meta_attributes;    // HTTP-EQUIV NAME CONTENT
+  list<auto_ptr<Meta> >         metas;
   auto_ptr<list<TagAttribute> > link_attributes;    // HREF REL REV TITLE
 
   void unparse(ostream &, ostream_manipulator separator) const;


611-recognize-input-encoding.patch:

--- NEW FILE 611-recognize-input-encoding.patch ---

Recode input according to 'meta http-equiv' in html document.
Index: html2text-1.3.2a/html2text.C
===================================================================
--- html2text-1.3.2a.orig/html2text.C	2008-09-20 14:06:46.787386246 +0300
+++ html2text-1.3.2a/html2text.C	2008-09-20 14:09:32.009308515 +0300
@@ -37,9 +37,15 @@
 
 #include <iostream>
 #include <fstream>
+#include <sstream>
+#include <algorithm>
+#include <iterator>
 #include <string.h>
 #include <stdlib.h>
 
+#include <iconv.h>
+#include <errno.h>
+
 #include "html.h"
 #include "HTMLControl.h"
 //#include "urlistream.h"
@@ -50,11 +56,15 @@
 
 /* ------------------------------------------------------------------------- */
 using std::ifstream;
+using std::stringstream;
+using std::istream_iterator;
+using std::ostream_iterator;
 
 class MyParser : public HTMLControl {
 
 public:
   enum { PRINT_AS_ASCII, UNPARSE, SYNTAX_CHECK };
+  string meta_encoding;
 
   MyParser(
     istream &is_,
@@ -106,6 +116,23 @@
 /*virtual*/ void
 MyParser::process(const Document &document)
 {
+  list<auto_ptr<Meta> >::const_iterator i;
+  for(i = document.head.metas.begin(); i != document.head.metas.end(); ++i) {
+    bool exists = false;
+    get_attribute(i->get()->attributes.get(), "http-equiv", &exists);
+    if (exists) {
+      string content = get_attribute(i->get()->attributes.get(), "content", "");
+	  char to_find[] = "charset=";
+	  string::size_type found_pos = content.find(to_find);
+	  if (found_pos != string::npos)
+	  {
+        this->meta_encoding = content.substr(found_pos + sizeof(to_find) - 1);
+	    //std::cerr << this->meta_encoding << std::endl;
+	  }
+      break;
+    }
+  }
+
   switch (mode) {
 
   case PRINT_AS_ASCII:
@@ -126,6 +153,70 @@
   }
 }
 
+bool recode(stringstream& stream, const char* to_encoding, const char* from_encoding)
+{
+	iconv_t iconv_handle = iconv_open(to_encoding, from_encoding);
+	if (iconv_handle != iconv_t(-1))
+	{
+		stream.seekg(0);
+		string input_string = stream.str();
+		size_t input_size = input_string.size();
+		char* raw_input = new char[input_size+1];
+		char* const orig_raw_input = raw_input;
+		strcpy(raw_input, input_string.data());
+		size_t max_output_size = input_size * 4; // maximum possible overhead
+		char* raw_output = new char[max_output_size+1];
+		char* const orig_raw_output = raw_output;
+		size_t iconv_value =
+			iconv(iconv_handle, &raw_input, &input_size, &raw_output, &max_output_size);
+
+		if (iconv_value != (size_t)-1)
+		{
+			*raw_output = '\0';
+			stream.str(string(orig_raw_output));
+			/* debug */
+			//std::copy(istream_iterator<char>(input_stream), istream_iterator<char>(), ostream_iterator<char>(std::cerr));
+		}
+
+		delete [] orig_raw_input;
+		delete [] orig_raw_output;
+		iconv_close(iconv_handle);
+
+		if (iconv_value == (size_t)-1)
+		{
+			std::cerr << "Input recoding failed due to ";
+			if (errno == EILSEQ)
+			{
+				std::cerr << "invalid input sequence.";
+				/* debug */
+				std::cout << raw_input;
+			}
+			else
+			{
+				std::cerr << "unknown reason.";
+			}
+			std::cerr << std::endl;
+			return false;
+		}
+	}
+	else
+	{
+		if (errno == EINVAL)
+		{
+			std::cerr << "Recoding from '" << from_encoding
+				<< "' to '" << to_encoding << "' is not available." << std::endl;
+			std::cerr << "Check that '" << from_encoding
+				<< "' is a valid encoding." << std::endl;
+		}
+		else
+		{
+			std::cerr << "Error: cannot setup recoding." << std::endl;
+		}
+		return false;
+	}
+	return true;
+}
+
 /* ------------------------------------------------------------------------- */
 
 static const char *usage = "\
@@ -151,6 +242,7 @@
   -nobs          Do not use backspaces for boldface and underlining\n\
   -ascii         Use plain ASCII for output instead of ISO-8859-1\n\
   -utf8          Assume both terminal and input stream are in UTF-8 mode\n\
+  -nometa        Don't try to recode input using 'meta' tag\n\
 ";
 
 int use_encoding = ISO8859;
@@ -188,6 +280,7 @@
   int        width             = 79;
   const char *output_file_name = "-";
   bool       use_backspaces    = false;
+  bool       use_meta          = true;
 
   int i;
   for (i = 1; i < argc && argv[i][0] == '-' && argv[i][1]; i++) {
@@ -204,6 +297,7 @@
     if (!strcmp(arg, "-nobs"         )) { use_backspaces = false;        } else
     if (!strcmp(arg, "-ascii"        )) { use_encoding = ASCII;          } else
     if (!strcmp(arg, "-utf8"         )) { use_encoding = UTF8;           } else
+    if (!strcmp(arg, "-nometa"       )) { use_meta = false;              } else
     {
       std::cerr
 	<< "Unrecognized command line option \""
@@ -356,30 +450,117 @@
     }
 
     istream    *isp;
-    ifstream     uis;
+    istream    *uis;
+	ifstream* infile = NULL;
+	stringstream input_stream;
+
+	if (strcmp(input_url, "-") == 0)
+	{
+		uis = &std::cin;
+	}
+	else
+	{
+		infile = new ifstream(input_url);
+		if (!infile->is_open())
+		{
+		  delete infile;
+		  std::cerr
+			<< "Cannot open input file \""
+			<< input_url
+			<< "\"."
+			<< std::endl;
+		  exit(1);
+		}
+		uis = infile;
+    }
 
-    uis.open(input_url);
-    if (!uis.is_open()) {
-      std::cerr
-        << "Cannot open input file \""
-	<< input_url
-        << "\"."
-        << std::endl;
-      exit(1);
+	*uis >> std::noskipws;
+	std::copy(istream_iterator<char>(*uis), istream_iterator<char>(), ostream_iterator<char>(input_stream));
+
+	if (infile)
+	{
+		infile->close();
+		delete infile;
+	}
+
+	string from_encoding;
+	if (use_meta)
+	{
+		std::ofstream fake_osp("/dev/null");
+		// fake parsing to determine meta
+		MyParser parser(
+		  input_stream,
+		  debug_scanner,
+		  debug_parser,
+		  fake_osp,
+		  mode,
+		  width,
+		  input_url
+        );
+		if (parser.yyparse() != 0) exit(1);
+
+		from_encoding = parser.meta_encoding;
+
+		// don't need to debug twice ...
+		debug_scanner = false;
+		debug_parser = false;
+
+		/*
+		 * It will be good to show warning in this case. But there are too many
+		 * html documents without encoding info, so this branch is commented by
+		 * now.
+		if (parser.meta_encoding.empty())
+		{
+			std::cerr << "Warning: cannot determine encoding from html file." << std::endl;
+			std::cerr << "To remove this warning, use '-nometa' option with, optionally, '-utf8' or '-ascii' options" << std::endl;
+			std::cerr << "to process file \"" << input_url << "\"." << std::endl;
+		}
+		*/
+	}
+	if (from_encoding.empty()) // -nometa supplied or no appropriate tag
+	{
+		if (use_encoding == UTF8)
+		{
+			from_encoding = "UTF-8";
+		}
+		else if (use_encoding == ASCII)
+		{
+			from_encoding = "ASCII";
+		}
+		else
+		{
+			from_encoding = "ISO_8859-1";
+		}
+	}
+
+	// recode input
+	bool result = recode(input_stream, "UTF-8", from_encoding.data());
+	if (!result)
+	{
+		continue;
+	}
+
+    if (number_of_input_urls != 1) {
+      *osp << "###### " << input_url << " ######" << std::endl;
     }
 
-    MyParser parser(
-      uis,
-      debug_scanner,
-      debug_parser,
-      *osp,
-      mode,
-      width,
-      input_url
-    );
+	// real parsing now always process UTF-8
+	use_encoding = UTF8;
 
+	// real parsing
+	input_stream.clear();
+	input_stream.seekg(0);
+	MyParser parser(
+	  input_stream,
+	  debug_scanner,
+	  debug_parser,
+	  *osp,
+	  mode,
+	  width,
+	  input_url
+	);
     if (parser.yyparse() != 0) exit(1);
-	uis.close();
+
   }
 
   return 0;


630-recode-output-to-locale-charset.patch:

--- NEW FILE 630-recode-output-to-locale-charset.patch ---

Convert output to user's locale charset.
Index: html2text-1.3.2a/html2text.C
===================================================================
--- html2text-1.3.2a.orig/html2text.C	2008-09-20 14:09:32.009308515 +0300
+++ html2text-1.3.2a/html2text.C	2008-09-20 14:09:40.133207014 +0300
@@ -45,6 +45,7 @@
 
 #include <iconv.h>
 #include <errno.h>
+#include <langinfo.h>
 
 #include "html.h"
 #include "HTMLControl.h"
@@ -59,6 +60,7 @@
 using std::stringstream;
 using std::istream_iterator;
 using std::ostream_iterator;
+using std::noskipws;
 
 class MyParser : public HTMLControl {
 
@@ -474,7 +476,7 @@
 		uis = infile;
     }
 
-	*uis >> std::noskipws;
+	*uis >> noskipws;
 	std::copy(istream_iterator<char>(*uis), istream_iterator<char>(), ostream_iterator<char>(input_stream));
 
 	if (infile)
@@ -547,6 +549,8 @@
 	// real parsing now always process UTF-8
 	use_encoding = UTF8;
 
+	stringstream output_stream;
+
 	// real parsing
 	input_stream.clear();
 	input_stream.seekg(0);
@@ -554,13 +558,28 @@
 	  input_stream,
 	  debug_scanner,
 	  debug_parser,
-	  *osp,
+	  output_stream,
 	  mode,
 	  width,
 	  input_url
 	);
     if (parser.yyparse() != 0) exit(1);
 
+	// recode output
+	setlocale(LC_CTYPE,"");
+	char output_encoding[64];
+	strcpy(output_encoding, nl_langinfo(CODESET));
+	strcat(output_encoding, "//translit");
+
+	result = recode(output_stream, output_encoding, "UTF-8");
+	if (!result)
+	{
+		continue;
+	}
+	output_stream.clear();
+	output_stream.seekg(0);
+	output_stream >> noskipws;
+	std::copy(istream_iterator<char>(output_stream), istream_iterator<char>(), ostream_iterator<char>(*osp));
   }
 
   return 0;


800-replace-zeroes-with-null.patch:

--- NEW FILE 800-replace-zeroes-with-null.patch ---

Correctly specify NULLs for 64-bit architectures.
unchanged:
Index: html2text-1.3.2a/format.C
===================================================================
--- html2text-1.3.2a.orig/format.C	2008-09-15 20:30:26.413093842 +0300
+++ html2text-1.3.2a/format.C	2008-09-15 20:30:35.692976620 +0300
@@ -560,7 +560,7 @@
     "LEFT",   Area::LEFT,
     "CENTER", Area::CENTER,
     "RIGHT",  Area::RIGHT,
-    0
+    NULL
   );
 
   static char cell_attributes[7];
@@ -682,7 +682,7 @@
     "LEFT",   Area::LEFT,
     "CENTER", Area::CENTER,
     "RIGHT",  Area::RIGHT,
-    0
+    NULL
   );
 
   static BlockFormat bf("P");
@@ -752,7 +752,7 @@
       "LEFT",   Area::LEFT,
       "MIDDLE", Area::CENTER,
       "RIGHT",  Area::RIGHT,
-      0
+      NULL
     );
     Area *a = ::format(content.get(), w, halign);
     if (a) return a;
@@ -802,7 +802,7 @@
     "LEFT",   Area::LEFT,
     "CENTER", Area::CENTER,
     "RIGHT",  Area::RIGHT,
-    0
+    NULL
   ));
 }
 
@@ -1635,7 +1635,7 @@
     "A",         UPPER_ALPHA,
     "i",         LOWER_ROMAN,
     "I",         UPPER_ROMAN,
-    0
+    NULL
   );
 }
 
Index: html2text-1.3.2a/table.C
===================================================================
--- html2text-1.3.2a.orig/table.C	2008-09-15 20:30:26.417093442 +0300
+++ html2text-1.3.2a/table.C	2008-09-15 20:30:35.692976620 +0300
@@ -122,14 +122,14 @@
       "LEFT",   Area::LEFT,
       "CENTER", Area::CENTER,
       "RIGHT",  Area::RIGHT,
-      0
+      NULL
     );
     int row_valign = get_attribute(
       row.attributes.get(), "VALIGN", Area::MIDDLE,
       "TOP",    Area::LEFT,
       "MIDDLE", Area::MIDDLE,
       "BOTTOM", Area::BOTTOM,
-      0
+      NULL
     );
 
     const list<auto_ptr<TableCell> >           &cl(*row.cells);
@@ -158,14 +158,14 @@
         "LEFT",   Area::LEFT,
         "CENTER", Area::CENTER,
         "RIGHT",  Area::RIGHT,
-        0
+        NULL
       );
       p->valign    = get_attribute(
         cell.attributes.get(), "VALIGN", row_valign,
         "TOP",    Area::TOP,
         "MIDDLE", Area::MIDDLE,
         "BOTTOM", Area::BOTTOM,
-        0
+        NULL
       );
       {
 	auto_ptr<Area> tmp(cell.format(
@@ -386,7 +386,7 @@
     "LEFT",   Area::LEFT,
     "CENTER", Area::CENTER,
     "RIGHT",  Area::RIGHT,
-    0
+    NULL
   );
 
   // <TABLE>          => default => no border


810-fix-deprecated-conversion-warnings.patch:

--- NEW FILE 810-fix-deprecated-conversion-warnings.patch ---

Substituted 'char*' with 'const char*' in needed places to avoid
'deprecated conversion from string constant to â€˜char*â€™' warnings.
Index: html2text-1.3.2a/sgml.C
===================================================================
--- html2text-1.3.2a.orig/sgml.C	2008-09-20 14:06:15.259781132 +0300
+++ html2text-1.3.2a/sgml.C	2008-09-20 14:09:53.589039389 +0300
@@ -61,7 +61,7 @@
 static const struct TextToInt {
   char name[8];
   int  iso8859code;
-  char *asciistr;
+  const char *asciistr;
   unsigned long unicode;
 } entities[] = {
   { "AElig",   LATIN1_AElig,  "AE",  0x00c6},
Index: html2text-1.3.2a/HTMLParser.h
===================================================================
--- html2text-1.3.2a.orig/HTMLParser.h	2008-09-20 13:59:22.124974404 +0300
+++ html2text-1.3.2a/HTMLParser.h	2008-09-20 14:09:53.589039389 +0300
@@ -487,7 +487,7 @@
  /* decl const */
 public:
  int YY_HTMLParser_PARSE(YY_HTMLParser_PARSE_PARAM);
- virtual void YY_HTMLParser_ERROR(char *) YY_HTMLParser_ERROR_BODY;
+ virtual void YY_HTMLParser_ERROR(const char *) YY_HTMLParser_ERROR_BODY;
 #ifdef YY_HTMLParser_PURE
 #ifdef YY_HTMLParser_LSP_NEEDED
  virtual int  YY_HTMLParser_LEX(YY_HTMLParser_STYPE *YY_HTMLParser_LVAL,YY_HTMLParser_LTYPE *YY_HTMLParser_LLOC) YY_HTMLParser_LEX_BODY;
Index: html2text-1.3.2a/html2text.C
===================================================================
--- html2text-1.3.2a.orig/html2text.C	2008-09-20 14:09:40.133207014 +0300
+++ html2text-1.3.2a/html2text.C	2008-09-20 14:09:53.589039389 +0300
@@ -85,7 +85,7 @@
   {}
 
 private:
-  /*virtual*/ void yyerror(char *);
+  /*virtual*/ void yyerror(const char *);
   /*virtual*/ void process(const Document &);
 
   ostream &os;
@@ -95,7 +95,7 @@
 };
 
 /*virtual*/ void
-MyParser::yyerror(char *p)
+MyParser::yyerror(const char *p)
 {
 
   /*
Index: html2text-1.3.2a/HTMLParser.C
===================================================================
--- html2text-1.3.2a.orig/HTMLParser.C	2008-09-20 14:08:49.593844329 +0300
+++ html2text-1.3.2a/HTMLParser.C	2008-09-20 14:09:53.593036056 +0300
@@ -600,7 +600,7 @@
  /* decl const */
 public:
  int YY_HTMLParser_PARSE (YY_HTMLParser_PARSE_PARAM);
- virtual void YY_HTMLParser_ERROR(char *msg) YY_HTMLParser_ERROR_BODY;
+ virtual void YY_HTMLParser_ERROR(const char *msg) YY_HTMLParser_ERROR_BODY;
 #ifdef YY_HTMLParser_PURE
 #ifdef YY_HTMLParser_LSP_NEEDED
  virtual int  YY_HTMLParser_LEX (YY_HTMLParser_STYPE *YY_HTMLParser_LVAL,YY_HTMLParser_LTYPE *YY_HTMLParser_LLOC) YY_HTMLParser_LEX_BODY;



--- NEW FILE html2text.spec ---
Name:           html2text
Version:        1.3.2a
Release:        4%{?dist}
Summary:        HTML-to-text converter

Group:          Applications/Text
License:        GPL+
URL:            http://www.mbayer.de/html2text/
Source0:        ftp://ftp.ibiblio.org/pub/linux/apps/www/converters/%{name}-%{version}.tar.gz
# patches from http://patch-tracking.debian.net/package/html2text/1.3.2a-10
#
# http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=381184
# Close every file after processing, not at the end of program.
Patch0:         200-close-files-inside-main-loop.patch
# http://bugs.donarmstrong.com/cgi-bin/bugreport.cgi?bug=285378
# Remove limited built-in http support.
Patch1:         400-remove-builtin-http-support.patch
# http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=178684
# Support UTF-8 encoding when processing input.
Patch2:         500-utf8-support.patch
# Don't use backspaces.
Patch3:         510-disable-backspaces.patch
# http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=496226
# Recognize all <meta> tags, not just one.
Patch4:         600-multiple-meta-tags.patch
# Recode input according to 'meta http-equiv' in html document.
Patch5:         611-recognize-input-encoding.patch
# http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=498797
# Convert output to user's locale charset.
Patch6:         630-recode-output-to-locale-charset.patch
# Correctly specify NULLs for 64-bit architectures.
Patch7:         800-replace-zeroes-with-null.patch
# Substituted 'char*' with 'const char*' in needed places to avoid
# 'deprecated conversion from string constant to â€˜char*â€™' warnings.
Patch8:         810-fix-deprecated-conversion-warnings.patch
BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)


%description
html2text is a command line utility that converts HTML documents into
plain text.
Each HTML document is read from standard input or a (local or remote)
URI, and formatted into a stream of plain text characters that is written
to standard output or into an output-file. The program preserves the
original positions of table fields and accepts also syntactically
incorrect input, attempting to interpret it "reasonably". The rendering
is largely customisable through an RC file.


%prep
%setup -q
%patch0 -p1
%patch1 -p1
%patch2 -p1
%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1


%build
%configure
make %{?_smp_mflags} DEBUG="$RPM_OPT_FLAGS"


%install
rm -rf $RPM_BUILD_ROOT
mkdir -p $RPM_BUILD_ROOT%{_bindir} $RPM_BUILD_ROOT%{_mandir}/{man1,man5}
rm -rf __dist_docs
mkdir -p __dist_docs
for file in README  ; do
  iconv -f latin1 -t utf8 $file -o $file.new
  mv -f $file.new $file
done
for file in html2text.1.gz html2textrc.5.gz; do
  basefile=`basename $file .gz`
  gunzip -c $file > __dist_docs/$basefile
  touch -r $file __dist_docs/$basefile
done
install -m0644 -p __dist_docs/html2text.1  $RPM_BUILD_ROOT%{_mandir}/man1
install -m0644 -p __dist_docs/html2textrc.5  $RPM_BUILD_ROOT%{_mandir}/man5
install -m0755 html2text $RPM_BUILD_ROOT%{_bindir}


%clean
rm -rf $RPM_BUILD_ROOT


%files
%defattr(-,root,root,-)
%doc README CHANGES COPYING TODO CREDITS KNOWN_BUGS RELEASE_NOTES
%{_bindir}/html2text
%{_mandir}/man1/html2text.1*
%{_mandir}/man5/html2textrc.5*


%changelog
* Wed Feb 11 2009  Leigh Scott <leigh123linux at googlemail.com> - 1.3.2a-4
- Rebuild using patches from Debian patch tracking system 

* Thu Jul 31 2008 Leigh Scott <leigh123linux at googlemail.com> - 1.3.2a-3
- Convert README to UTF8 , change license to GPL+ and fix compiling flags

* Tue Jul 29 2008 Leigh Scott <leigh123linux at googlemail.com> - 1.3.2a-2
- Rebuild & patch sgml

* Thu Jul 19 2007 Patrice Dumas <pertusus at free.fr> 1.3.2a-1
- initial packaging


--- NEW FILE import.log ---
html2text-1_3_2a-4_fc10:F-10:html2text-1.3.2a-4.fc10.src.rpm:1234533394


Index: .cvsignore
===================================================================
RCS file: /cvs/pkgs/rpms/html2text/F-10/.cvsignore,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- .cvsignore	13 Feb 2009 06:40:08 -0000	1.1
+++ .cvsignore	13 Feb 2009 13:56:11 -0000	1.2
@@ -0,0 +1 @@
+html2text-1.3.2a.tar.gz


Index: sources
===================================================================
RCS file: /cvs/pkgs/rpms/html2text/F-10/sources,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- sources	13 Feb 2009 06:40:08 -0000	1.1
+++ sources	13 Feb 2009 13:56:12 -0000	1.2
@@ -0,0 +1 @@
+6097fe07b948e142315749e6620c9cfc  html2text-1.3.2a.tar.gz