HTTrack Website Copier
Free software offline browser - FORUM
Subject: Re: Can't follow meta refresh (v3.33-16)
Author: Xavier Roche
Date: 12/03/2005 12:11
 
Here's the diff:

diff -rudb httrack-3.39.53.orig/src/htsparse.c httrack-3.39.53/src/htsparse.c
--- httrack-3.39.53.orig/src/htsparse.c 2005-11-19 14:51:25.000000000 +0100
+++ httrack-3.39.53/src/htsparse.c      2005-12-03 12:08:50.811480978 +0100
@@ -709,6 +709,24 @@
             intag_start=adr; intag_start_valid=1;
             codebase[0]='\0';    // effacer Ă©ventuel codebase

+                                               /* Meta ? */
+                                               if (check_tag(intag_start,
"meta")) {
+                                                       int pos;
+                                                       // <meta
http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+                                                       if ((pos =
rech_tageq_all(adr, "http-equiv"))) {
+                                                               const char*
token = NULL;
+                                                               int len =
rech_endtoken(adr + pos, &token);
+                                                               if (len > 0)
{
+                                                                       if
(strfield(token, "content-type")) {
+                                                                              
intag_ctype=1;
+                                                                       }
+                                                                       else
if (strfield(token, "refresh")) {
+                                                                              
intag_ctype=2;
+                                                                       }
+                                                               }
+                                                       }
+                                               }
+
             if (opt->getmode & 1) {  // sauver html
               p=strfield(adr,"</html");
               if (p==0) p=strfield(adr,"<head>");
@@ -1204,26 +1222,6 @@
                         }
                       }

-                      // charset meta tags
-                      if (p==0) {
-                        if ((intag_start_valid) &&
check_tag(intag_start,"meta")) {
-                          int pos;
-                          // <meta http-equiv="Content-Type"
content="text/html; charset=UTF-8" />
-                          if ((pos=rech_tageq(adr, "http-equiv"))) {
-                            const char* token = NULL;
-                            int len = rech_endtoken(adr + pos, &token);
-                            if (len > 0) {
-                              if (strfield(token, "content-type")) {
-                                intag_ctype=1;
-                              }
-                              else if (strfield(token, "refresh")) {
-                                intag_ctype=2;
-                              }
-                            }
-                          }
-                        }
-                      }
-
                       // entrée dans une applet javascript
                       /*if (!inscript) {  // sinon on est dans un
obj.write("..
                       if (p==0)
diff -rudb httrack-3.39.53.orig/src/htstools.c httrack-3.39.53/src/htstools.c
--- httrack-3.39.53.orig/src/htstools.c 2005-11-19 14:51:26.000000000 +0100
+++ httrack-3.39.53/src/htstools.c      2005-12-03 12:08:50.814480140 +0100
@@ -492,6 +492,40 @@
   return 0;
 }

+HTS_INLINE int rech_tageq_all(const char* adr, const char* s) {
+       int p;
+       char quot = 0;
+       const char *token = NULL;
+       int s_len = (int) strlen(s);
+       if (adr == NULL) {
+               return 0;
+       }
+       for(p = 0 ; adr[p] != 0 ; p++) {
+               if (quot == 0) {
+                       if (adr[p] == '"' || adr[p] == '\'' ) {
+                               quot = adr[p];
+                       } else if (adr[p] == '=' || is_realspace(adr[p]) ) {
+                               token = NULL;
+                       } else if (adr[p] == '>') {
+                               break;
+                       } else {                                /* note: bogus
for bogus foo = bar */
+                               if (token == NULL) {
+                                       if (strncasecmp(&adr[p], s, s_len) ==
0
+                                               && (is_realspace(adr[p +
s_len]) || adr[p + s_len] == '=')
+                                               ) {
+                                               for( p += s_len ;
is_realspace(adr[p]) || adr[p] == '=' ; p++ );
+                                               return p;
+                                       }
+                                       token = &adr[p];
+                               }
+                       }
+               } else if (adr[p] == quot) {
+                       quot = 0;
+               }
+       }
+  return 0;
+}
+
 HTS_INLINE int rech_endtoken(const char* adr, const char** start) {
   char quote = '\0';
   int length = 0;
diff -rudb httrack-3.39.53.orig/src/htstools.h httrack-3.39.53/src/htstools.h
--- httrack-3.39.53.orig/src/htstools.h 2004-12-11 11:00:51.000000000 +0100
+++ httrack-3.39.53/src/htstools.h      2005-12-03 11:41:50.000000000 +0100
@@ -87,6 +87,7 @@
 void longfile_to_83(int mode,char* n83,char* save);
 HTS_INLINE int __rech_tageq(const char* adr,const char* s);
 HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s);
+HTS_INLINE int rech_tageq_all(const char* adr, const char* s);
 #define rech_tageq(adr,s) \
   ( \
     ( (*((adr)-1)=='<') || (is_space(*((adr)-1))) ) ? \
 
Reply Create subthread


All articles

Subject Author Date
Re: Can't follow meta refresh (v3.33-16)

12/03/2005 12:06
Re: Can't follow meta refresh (v3.33-16)

12/03/2005 12:11




2

Created with FORUM 2.0.11