| Here's the diff:
diff -rudb httrack-3.39.53.orig/src/htsparse.c httrack-3.39.53/src/htsparse.c
--- httrack-3.39.53.orig/src/htsparse.c 2005-11-19 14:51:25.000000000 +0100
+++ httrack-3.39.53/src/htsparse.c 2005-12-03 12:08:50.811480978 +0100
@@ -709,6 +709,24 @@
intag_start=adr; intag_start_valid=1;
codebase[0]='\0'; // effacer Ă©ventuel codebase
+ /* Meta ? */
+ if (check_tag(intag_start,
"meta")) {
+ int pos;
+ // <meta
http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+ if ((pos =
rech_tageq_all(adr, "http-equiv"))) {
+ const char*
token = NULL;
+ int len =
rech_endtoken(adr + pos, &token);
+ if (len > 0)
{
+ if
(strfield(token, "content-type")) {
+
intag_ctype=1;
+ }
+ else
if (strfield(token, "refresh")) {
+
intag_ctype=2;
+ }
+ }
+ }
+ }
+
if (opt->getmode & 1) { // sauver html
p=strfield(adr,"</html");
if (p==0) p=strfield(adr,"<head>");
@@ -1204,26 +1222,6 @@
}
}
- // charset meta tags
- if (p==0) {
- if ((intag_start_valid) &&
check_tag(intag_start,"meta")) {
- int pos;
- // <meta http-equiv="Content-Type"
content="text/html; charset=UTF-8" />
- if ((pos=rech_tageq(adr, "http-equiv"))) {
- const char* token = NULL;
- int len = rech_endtoken(adr + pos, &token);
- if (len > 0) {
- if (strfield(token, "content-type")) {
- intag_ctype=1;
- }
- else if (strfield(token, "refresh")) {
- intag_ctype=2;
- }
- }
- }
- }
- }
-
// entrée dans une applet javascript
/*if (!inscript) { // sinon on est dans un
obj.write("..
if (p==0)
diff -rudb httrack-3.39.53.orig/src/htstools.c httrack-3.39.53/src/htstools.c
--- httrack-3.39.53.orig/src/htstools.c 2005-11-19 14:51:26.000000000 +0100
+++ httrack-3.39.53/src/htstools.c 2005-12-03 12:08:50.814480140 +0100
@@ -492,6 +492,40 @@
return 0;
}
+HTS_INLINE int rech_tageq_all(const char* adr, const char* s) {
+ int p;
+ char quot = 0;
+ const char *token = NULL;
+ int s_len = (int) strlen(s);
+ if (adr == NULL) {
+ return 0;
+ }
+ for(p = 0 ; adr[p] != 0 ; p++) {
+ if (quot == 0) {
+ if (adr[p] == '"' || adr[p] == '\'' ) {
+ quot = adr[p];
+ } else if (adr[p] == '=' || is_realspace(adr[p]) ) {
+ token = NULL;
+ } else if (adr[p] == '>') {
+ break;
+ } else { /* note: bogus
for bogus foo = bar */
+ if (token == NULL) {
+ if (strncasecmp(&adr[p], s, s_len) ==
0
+ && (is_realspace(adr[p +
s_len]) || adr[p + s_len] == '=')
+ ) {
+ for( p += s_len ;
is_realspace(adr[p]) || adr[p] == '=' ; p++ );
+ return p;
+ }
+ token = &adr[p];
+ }
+ }
+ } else if (adr[p] == quot) {
+ quot = 0;
+ }
+ }
+ return 0;
+}
+
HTS_INLINE int rech_endtoken(const char* adr, const char** start) {
char quote = '\0';
int length = 0;
diff -rudb httrack-3.39.53.orig/src/htstools.h httrack-3.39.53/src/htstools.h
--- httrack-3.39.53.orig/src/htstools.h 2004-12-11 11:00:51.000000000 +0100
+++ httrack-3.39.53/src/htstools.h 2005-12-03 11:41:50.000000000 +0100
@@ -87,6 +87,7 @@
void longfile_to_83(int mode,char* n83,char* save);
HTS_INLINE int __rech_tageq(const char* adr,const char* s);
HTS_INLINE int __rech_tageqbegdigits(const char* adr,const char* s);
+HTS_INLINE int rech_tageq_all(const char* adr, const char* s);
#define rech_tageq(adr,s) \
( \
( (*((adr)-1)=='<') || (is_space(*((adr)-1))) ) ? \
| |