rpms/sed/devel sed-4.1.5-utf8performance.patch, NONE, 1.1 sed.spec, 1.36, 1.37 sed-4.1.5-str_append.patch, 1.1, NONE

fedora-cvs-commits at redhat.com fedora-cvs-commits at redhat.com
Thu Aug 3 13:36:26 UTC 2006


Author: pmachata

Update of /cvs/dist/rpms/sed/devel
In directory cvs.devel.redhat.com:/tmp/cvs-serv17618/devel

Modified Files:
	sed.spec 
Added Files:
	sed-4.1.5-utf8performance.patch 
Removed Files:
	sed-4.1.5-str_append.patch 
Log Message:
- remove superfluous multibyte processing in str_append for UTF-8
  encoding (thanks Paolo Bonzini, #177246)


sed-4.1.5-utf8performance.patch:
 execute.c |   35 ++++++++++++++++++-----------------
 mbcs.c    |   25 +++++++++++++++++++++++++
 sed.h     |    1 +
 3 files changed, 44 insertions(+), 17 deletions(-)

--- NEW FILE sed-4.1.5-utf8performance.patch ---
* looking for bonzini at gnu.org--2004b/sed--stable--4.1--patch-69 to compare with
* comparing to bonzini at gnu.org--2004b/sed--stable--4.1--patch-69
M  sed/mbcs.c
M  sed/sed.h
M  sed/execute.c

* modified files

--- orig/sed/execute.c
+++ mod/sed/execute.c
@@ -235,25 +235,26 @@ str_append(to, string, length)
   to->length = new_length;
 
 #ifdef HAVE_MBRTOWC
-  if (mb_cur_max == 1)
-    return;
-
-  while (length)
-    {
-      int n = MBRLEN (string, length, &to->mbstate);
+  if (mb_cur_max > 1 && !is_utf8)
+    while (length)
+      {
+        size_t n = MBRLEN (string, length, &to->mbstate);
 
-      /* An invalid sequence is treated like a singlebyte character. */
-      if (n == -1)
-	{
-	  memset (&to->mbstate, 0, sizeof (to->mbstate));
-	  n = 1;
-	}
+        /* An invalid sequence is treated like a singlebyte character. */
+        if (n == (size_t) -1)
+	  {
+	    memset (&to->mbstate, 0, sizeof (to->mbstate));
+	    n = 1;
+	  }
 
-      if (n > 0)
-	length -= n;
-      else
-	break;
-    }
+        if (n > 0)
+	  {
+	    string += n;
+	    length -= n;
+	  }
+        else
+	  break;
+      }
 #endif
 }
 


--- orig/sed/mbcs.c
+++ mod/sed/mbcs.c
@@ -18,7 +18,12 @@
 #include "sed.h"
 #include <stdlib.h>
 
+#ifdef HAVE_LANGINFO_CODESET
+#include <langinfo.h>
+#endif
+
 int mb_cur_max;
+bool is_utf8;
 
 #ifdef HAVE_MBRTOWC
 /* Add a byte to the multibyte character represented by the state
@@ -47,6 +52,26 @@ int brlen (ch, cur_stat)
 void
 initialize_mbcs ()
 {
+  /* For UTF-8, we know that the encoding is stateless.  */
+  const char *codeset_name;
+
+#ifdef HAVE_LANGINFO_CODESET
+  codeset_name = nl_langinfo (CODESET);
+#else
+  codeset_name = getenv ("LC_ALL");
+  if (codeset_name == NULL || codeset_name[0] == '\0')
+    codeset_name = getenv ("LC_CTYPE");
+  if (codeset_name == NULL || codeset_name[0] == '\0')
+    codeset_name = getenv ("LANG");
+  if (codeset_name == NULL)
+    codeset_name = "";
+  else if (strchr (codeset_name, '.') !=  NULL)
+    codeset_name = strchr (codeset_name, '.') + 1;
+#endif
+
+  is_utf8 = (strcasecmp (codeset_name, "UTF-8") == 0
+	     || strcasecmp (codeset_name, "UTF8") == 0);
+
 #ifdef HAVE_MBRTOWC
   mb_cur_max = MB_CUR_MAX;
 #else


--- orig/sed/sed.h
+++ mod/sed/sed.h
@@ -233,6 +233,7 @@ extern bool use_extended_syntax_p;
 
 /* Declarations for multibyte character sets.  */
 extern int mb_cur_max;
+extern bool is_utf8;
 
 #ifdef HAVE_MBRTOWC
 #ifdef HAVE_BTOWC





Index: sed.spec
===================================================================
RCS file: /cvs/dist/rpms/sed/devel/sed.spec,v
retrieving revision 1.36
retrieving revision 1.37
diff -u -r1.36 -r1.37
--- sed.spec	2 Aug 2006 11:47:08 -0000	1.36
+++ sed.spec	3 Aug 2006 13:36:23 -0000	1.37
@@ -10,8 +10,8 @@
 Group: Applications/Text
 Source0: ftp://ftp.gnu.org/pub/gnu/sed/sed-%{version}.tar.gz
 Source1: http://sed.sourceforge.net/sedfaq.txt
-Patch0: sed-4.1.5-bz185374.patch
-Patch1: sed-4.1.5-str_append.patch
+Patch0: sed-4.1.5-utf8performance.patch
+Patch1: sed-4.1.5-bz185374.patch
 Prereq: /sbin/install-info
 Prefix: %{_prefix}
 Buildroot: %{_tmppath}/%{name}-root
@@ -67,8 +67,9 @@
 %{_mandir}/man*/*
 
 %changelog
-* Wed Aug  2 2006 Petr Machata <pmachata at redhat.com> - 4.1.5-4
-- remove superfluous multibyte processing in str_append (#177246)
+* Wed Aug  3 2006 Petr Machata <pmachata at redhat.com> - 4.1.5-4
+- remove superfluous multibyte processing in str_append for UTF-8
+  encoding (thanks Paolo Bonzini, #177246)
 
 * Mon Jul 17 2006 Petr Machata <pmachata at redhat.com> - 4.1.5-3
 - use dist tag


--- sed-4.1.5-str_append.patch DELETED ---




More information about the fedora-cvs-commits mailing list