1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2026-01-31 04:41:23 -08:00

* mm-util.el, nnultimate.el, nnweb.el, nnslashdot.el: Sync with

the Gnus CVS.

	* mm-util.el (mm-mime-mule-charset-alist): Move down and call
	mm-coding-system-p. Don't correct it only in XEmacs.
	(mm-charset-to-coding-system): Use mm-coding-system-p and
	mm-get-coding-system-list.
	(mm-emacs-mule, mm-mule4-p): New.
	(mm-enable-multibyte, mm-disable-multibyte,
	mm-enable-multibyte-mule4, mm-disable-multibyte-mule4,
	mm-with-unibyte-current-buffer,
	mm-with-unibyte-current-buffer-mule4): Use them.
	(mm-find-mime-charset-region): Treat iso-2022-jp.

	From  Dave Love  <fx@gnu.org>:

	* mm-util.el (mm-mime-mule-charset-alist): Make it correct by
	construction.
	(mm-charset-synonym-alist): Remove windows-125[02].  Make other
	entries conditional on not having a coding system defined for
	them.
	(mm-mule-charset-to-mime-charset): Use
	find-coding-systems-for-charsets if defined.
	(mm-charset-to-coding-system): Don't use
	mm-get-coding-system-list.  Look in mm-charset-synonym-alist
	later.  Add last resort search of coding systems.
	(mm-enable-multibyte-mule4, mm-disable-multibyte-mule4)
	(mm-with-unibyte-current-buffer-mule4): Just treat Mule 5 like
	Mule 4.
	(mm-find-mime-charset-region): Re-write.
	(mm-with-unibyte-current-buffer): Restore buffer as well as
	multibyteness.
This commit is contained in:
ShengHuo ZHU 2001-10-31 04:16:51 +00:00
parent bf9bb76fe5
commit 95fa1ff74a
5 changed files with 737 additions and 441 deletions

View file

@ -1,3 +1,38 @@
2001-10-30 ShengHuo ZHU <zsh@cs.rochester.edu>
* mm-util.el, nnultimate.el, nnweb.el, nnslashdot.el: Sync with
the Gnus CVS.
* mm-util.el (mm-mime-mule-charset-alist): Move down and call
mm-coding-system-p. Don't correct it only in XEmacs.
(mm-charset-to-coding-system): Use mm-coding-system-p and
mm-get-coding-system-list.
(mm-emacs-mule, mm-mule4-p): New.
(mm-enable-multibyte, mm-disable-multibyte,
mm-enable-multibyte-mule4, mm-disable-multibyte-mule4,
mm-with-unibyte-current-buffer,
mm-with-unibyte-current-buffer-mule4): Use them.
(mm-find-mime-charset-region): Treat iso-2022-jp.
From Dave Love <fx@gnu.org>:
* mm-util.el (mm-mime-mule-charset-alist): Make it correct by
construction.
(mm-charset-synonym-alist): Remove windows-125[02]. Make other
entries conditional on not having a coding system defined for
them.
(mm-mule-charset-to-mime-charset): Use
find-coding-systems-for-charsets if defined.
(mm-charset-to-coding-system): Don't use
mm-get-coding-system-list. Look in mm-charset-synonym-alist
later. Add last resort search of coding systems.
(mm-enable-multibyte-mule4, mm-disable-multibyte-mule4)
(mm-with-unibyte-current-buffer-mule4): Just treat Mule 5 like
Mule 4.
(mm-find-mime-charset-region): Re-write.
(mm-with-unibyte-current-buffer): Restore buffer as well as
multibyteness.
2001-10-30 Simon Josefsson <jas@extundo.com>
* nnimap.el (nnimap-date-days-ago): Defeat locale.

View file

@ -1,4 +1,4 @@
;;; mm-util.el --- utility functions for MIME things
;;; mm-util.el --- Utility functions for Mule and low level things
;; Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
@ -27,11 +27,145 @@
(eval-when-compile (require 'cl))
(require 'mail-prsvr)
(eval-and-compile
(mapcar
(lambda (elem)
(let ((nfunc (intern (format "mm-%s" (car elem)))))
(if (fboundp (car elem))
(defalias nfunc (car elem))
(defalias nfunc (cdr elem)))))
'((decode-coding-string . (lambda (s a) s))
(encode-coding-string . (lambda (s a) s))
(encode-coding-region . ignore)
(coding-system-list . ignore)
(decode-coding-region . ignore)
(char-int . identity)
(device-type . ignore)
(coding-system-equal . equal)
(annotationp . ignore)
(set-buffer-file-coding-system . ignore)
(make-char
. (lambda (charset int)
(int-to-char int)))
(read-charset
. (lambda (prompt)
"Return a charset."
(intern
(completing-read
prompt
(mapcar (lambda (e) (list (symbol-name (car e))))
mm-mime-mule-charset-alist)
nil t))))
(subst-char-in-string
. (lambda (from to string) ;; stolen (and renamed) from nnheader.el
"Replace characters in STRING from FROM to TO."
(let ((string (substring string 0)) ;Copy string.
(len (length string))
(idx 0))
;; Replace all occurrences of FROM with TO.
(while (< idx len)
(when (= (aref string idx) from)
(aset string idx to))
(setq idx (1+ idx)))
string)))
(string-as-unibyte . identity)
(string-as-multibyte . identity)
(multibyte-string-p . ignore))))
(eval-and-compile
(defalias 'mm-char-or-char-int-p
(cond
((fboundp 'char-or-char-int-p) 'char-or-char-int-p)
((fboundp 'char-valid-p) 'char-valid-p)
(t 'identity))))
(eval-and-compile
(defalias 'mm-read-coding-system
(cond
((fboundp 'read-coding-system)
(if (and (featurep 'xemacs)
(<= (string-to-number emacs-version) 21.1))
(lambda (prompt &optional default-coding-system)
(read-coding-system prompt))
'read-coding-system))
(t (lambda (prompt &optional default-coding-system)
"Prompt the user for a coding system."
(completing-read
prompt (mapcar (lambda (s) (list (symbol-name (car s))))
mm-mime-mule-charset-alist)))))))
(defvar mm-coding-system-list nil)
(defun mm-get-coding-system-list ()
"Get the coding system list."
(or mm-coding-system-list
(setq mm-coding-system-list (mm-coding-system-list))))
(defun mm-coding-system-p (sym)
"Return non-nil if SYM is a coding system."
(or (and (fboundp 'coding-system-p) (coding-system-p sym))
(memq sym (mm-get-coding-system-list))))
(defvar mm-charset-synonym-alist
`(
;; Perfectly fine? A valid MIME name, anyhow.
,(unless (mm-coding-system-p 'big5)
'(big5 . cn-big5))
;; Not in XEmacs, but it's not a proper MIME charset anyhow.
,(unless (mm-coding-system-p 'x-ctext)
'(x-ctext . ctext))
;; Apparently not defined in Emacs 20, but is a valid MIME name.
,(unless (mm-coding-system-p 'gb2312)
'(gb2312 . cn-gb-2312))
;; Windows-1252 is actually a superset of Latin-1. See also
;; `gnus-article-dumbquotes-map'.
;;,(unless (mm-coding-system-p 'windows-1252)
; should be defined eventually
;; '(windows-1252 . iso-8859-1))
;; ISO-8859-15 is very similar to ISO-8859-1.
;;,(unless (mm-coding-system-p 'iso-8859-15) ; Emacs 21 defines it.
;; '(iso-8859-15 . iso-8859-1))
;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
;; Outlook users in Czech republic. Use this to allow reading of their
;; e-mails. cp1250 should be defined by M-x codepage-setup.
;;,(unless (mm-coding-system-p 'windows-1250)
; should be defined eventually
;; '(windows-1250 . cp1250))
)
"A mapping from invalid charset names to the real charset names.")
(defvar mm-binary-coding-system
(cond
((mm-coding-system-p 'binary) 'binary)
((mm-coding-system-p 'no-conversion) 'no-conversion)
(t nil))
"100% binary coding system.")
(defvar mm-text-coding-system
(or (if (memq system-type '(windows-nt ms-dos ms-windows))
(and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos)
(and (mm-coding-system-p 'raw-text) 'raw-text))
mm-binary-coding-system)
"Text-safe coding system (For removing ^M).")
(defvar mm-text-coding-system-for-write nil
"Text coding system for write.")
(defvar mm-auto-save-coding-system
(cond
((mm-coding-system-p 'emacs-mule)
(if (memq system-type '(windows-nt ms-dos ms-windows))
(if (mm-coding-system-p 'emacs-mule-dos)
'emacs-mule-dos mm-binary-coding-system)
'emacs-mule))
((mm-coding-system-p 'escape-quoted) 'escape-quoted)
(t mm-binary-coding-system))
"Coding system of auto save file.")
(defvar mm-universal-coding-system mm-auto-save-coding-system
"The universal Coding system.")
;; Fixme: some of the cars here aren't valid MIME charsets. That
;; should only matter with XEmacs, though.
(defvar mm-mime-mule-charset-alist
`((us-ascii ascii)
(iso-8859-1 latin-iso8859-1)
@ -40,7 +174,7 @@
(iso-8859-4 latin-iso8859-4)
(iso-8859-5 cyrillic-iso8859-5)
;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters.
;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default
;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default
;; charset is koi8-r, not iso-8859-5.
(koi8-r cyrillic-iso8859-5 gnus-koi8-r)
(iso-8859-6 arabic-iso8859-6)
@ -76,108 +210,32 @@
chinese-cns11643-3 chinese-cns11643-4
chinese-cns11643-5 chinese-cns11643-6
chinese-cns11643-7)
;; utf-8 comes either from Mule-UCS or Mule 5+.
,@(if (mm-coding-system-p 'utf-8)
(list (cons 'utf-8 (delete 'ascii
(coding-system-get
'mule-utf-8
'safe-charsets))))))
,(if (or (not (fboundp 'charsetp)) ;; non-Mule case
(charsetp 'unicode-a)
(not (mm-coding-system-p 'mule-utf-8)))
'(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)
;; If we have utf-8 we're in Mule 5+.
(append '(utf-8)
(delete 'ascii
(coding-system-get 'mule-utf-8 'safe-charsets)))))
"Alist of MIME-charset/MULE-charsets.")
(eval-and-compile
(mapcar
(lambda (elem)
(let ((nfunc (intern (format "mm-%s" (car elem)))))
(if (fboundp (car elem))
(defalias nfunc (car elem))
(defalias nfunc (cdr elem)))))
'((decode-coding-string . (lambda (s a) s))
(encode-coding-string . (lambda (s a) s))
(encode-coding-region . ignore)
(coding-system-list . ignore)
(decode-coding-region . ignore)
(char-int . identity)
(device-type . ignore)
(coding-system-equal . equal)
(annotationp . ignore)
(set-buffer-file-coding-system . ignore)
(make-char
. (lambda (charset int)
(int-to-char int)))
(read-coding-system
. (lambda (prompt)
"Prompt the user for a coding system."
(completing-read
prompt (mapcar (lambda (s) (list (symbol-name (car s))))
mm-mime-mule-charset-alist))))
(read-charset
. (lambda (prompt)
"Return a charset."
(intern
(completing-read
prompt
(mapcar (lambda (e) (list (symbol-name (car e))))
mm-mime-mule-charset-alist)
nil t))))
(string-as-unibyte . identity)
(multibyte-string-p . ignore)
)))
(eval-and-compile
(defalias 'mm-char-or-char-int-p
(cond
((fboundp 'char-or-char-int-p) 'char-or-char-int-p)
((fboundp 'char-valid-p) 'char-valid-p)
(t 'identity))))
(defvar mm-coding-system-list nil)
(defun mm-get-coding-system-list ()
"Get the coding system list."
(or mm-coding-system-list
(setq mm-coding-system-list (mm-coding-system-list))))
(defvar mm-charset-synonym-alist
`((big5 . cn-big5)
(gb2312 . cn-gb-2312)
;; Windows-1252 is actually a superset of Latin-1. See also
;; `gnus-article-dumbquotes-map'.
,(unless (mm-coding-system-p 'windows-1252) ; should be defined eventually
'(windows-1252 . iso-8859-1))
;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
;; Outlook users in Czech republic. Use this to allow reading of their
;; e-mails. cp1250 should be defined by M-x codepage-setup.
,(unless (mm-coding-system-p 'windows-1250) ; should be defined eventually
'(windows-1250 . cp1250))
(x-ctext . ctext))
"A mapping from invalid charset names to the real charset names.")
(defvar mm-binary-coding-system
(cond
((mm-coding-system-p 'binary) 'binary)
((mm-coding-system-p 'no-conversion) 'no-conversion)
(t nil))
"100% binary coding system.")
(defvar mm-text-coding-system
(or (if (memq system-type '(windows-nt ms-dos ms-windows))
(and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos)
(and (mm-coding-system-p 'raw-text) 'raw-text))
mm-binary-coding-system)
"Text-safe coding system (For removing ^M).")
(defvar mm-text-coding-system-for-write nil
"Text coding system for write.")
(defvar mm-auto-save-coding-system
(cond
((mm-coding-system-p 'emacs-mule)
(if (memq system-type '(windows-nt ms-dos ms-windows))
(if (mm-coding-system-p 'emacs-mule-dos)
'emacs-mule-dos mm-binary-coding-system)
'emacs-mule))
((mm-coding-system-p 'escape-quoted) 'escape-quoted)
(t mm-binary-coding-system))
"Coding system of auto save file.")
;; Correct by construction, but should be unnecessary:
;; XEmacs hates it.
(when (and (not (featurep 'xemacs))
(fboundp 'coding-system-list)
(fboundp 'sort-coding-systems))
(setq mm-mime-mule-charset-alist
(apply
'nconc
(mapcar
(lambda (cs)
(when (and (coding-system-get cs 'mime-charset)
(not (eq t (coding-system-get cs 'safe-charsets))))
(list (cons (coding-system-get cs 'mime-charset)
(delq 'ascii
(coding-system-get cs 'safe-charsets))))))
(sort-coding-systems (coding-system-list 'base-only))))))
;;; Internal variables:
@ -185,14 +243,21 @@
(defun mm-mule-charset-to-mime-charset (charset)
"Return the MIME charset corresponding to the given Mule CHARSET."
(let ((alist mm-mime-mule-charset-alist)
out)
(while alist
(when (memq charset (cdar alist))
(setq out (caar alist)
alist nil))
(pop alist))
out))
(if (fboundp 'find-coding-systems-for-charsets)
(let (mime)
(dolist (cs (find-coding-systems-for-charsets (list charset)))
(unless mime
(when cs
(setq mime (coding-system-get cs 'mime-charset)))))
mime)
(let ((alist mm-mime-mule-charset-alist)
out)
(while alist
(when (memq charset (cdar alist))
(setq out (caar alist)
alist nil))
(pop alist))
out)))
(defun mm-charset-to-coding-system (charset &optional lbt)
"Return coding-system corresponding to CHARSET.
@ -201,9 +266,6 @@ If optional argument LBT (`unix', `dos' or `mac') is specified, it is
used as the line break code type of the coding system."
(when (stringp charset)
(setq charset (intern (downcase charset))))
(setq charset
(or (cdr (assq charset mm-charset-synonym-alist))
charset))
(when lbt
(setq charset (intern (format "%s-%s" charset lbt))))
(cond
@ -215,58 +277,73 @@ used as the line break code type of the coding system."
'ascii)
;; Check to see whether we can handle this charset. (This depends
;; on there being some coding system matching each `mime-charset'
;; coding sysytem property defined, as there should be.)
((memq charset (mm-get-coding-system-list))
;; property defined, as there should be.)
((and (mm-coding-system-p charset)
;;; Doing this would potentially weed out incorrect charsets.
;;; charset
;;; (eq charset (coding-system-get charset 'mime-charset))
)
charset)
;; Nope.
(t
nil)))
;; Translate invalid charsets.
((mm-coding-system-p (setq charset
(cdr (assq charset
mm-charset-synonym-alist))))
charset)
;; Last resort: search the coding system list for entries which
;; have the right mime-charset in case the canonical name isn't
;; defined (though it should be).
((let (cs)
;; mm-get-coding-system-list returns a list of cs without lbt.
;; Do we need -lbt?
(dolist (c (mm-get-coding-system-list))
(if (and (null cs)
(eq charset (coding-system-get c 'mime-charset)))
(setq cs c)))
cs))))
(if (fboundp 'subst-char-in-string)
(defsubst mm-replace-chars-in-string (string from to)
(subst-char-in-string from to string))
(defun mm-replace-chars-in-string (string from to)
"Replace characters in STRING from FROM to TO."
(let ((string (substring string 0)) ;Copy string.
(len (length string))
(idx 0))
;; Replace all occurrences of FROM with TO.
(while (< idx len)
(when (= (aref string idx) from)
(aset string idx to))
(setq idx (1+ idx)))
string)))
(defsubst mm-replace-chars-in-string (string from to)
(mm-subst-char-in-string from to string))
(defsubst mm-enable-multibyte ()
"Set the multibyte flag of the current buffer.
(eval-and-compile
(defvar mm-emacs-mule (and (not (featurep 'xemacs))
(boundp 'default-enable-multibyte-characters)
default-enable-multibyte-characters
(fboundp 'set-buffer-multibyte))
"Emacs mule.")
(defvar mm-mule4-p (and mm-emacs-mule
(fboundp 'charsetp)
(not (charsetp 'eight-bit-control)))
"Mule version 4.")
(if mm-emacs-mule
(defun mm-enable-multibyte ()
"Set the multibyte flag of the current buffer.
Only do this if the default value of `enable-multibyte-characters' is
non-nil. This is a no-op in XEmacs."
(when (and (fboundp 'set-buffer-multibyte)
(boundp 'enable-multibyte-characters)
(default-value 'enable-multibyte-characters))
(set-buffer-multibyte t)))
(set-buffer-multibyte t))
(defalias 'mm-enable-multibyte 'ignore))
(defsubst mm-disable-multibyte ()
"Unset the multibyte flag of in the current buffer.
(if mm-emacs-mule
(defun mm-disable-multibyte ()
"Unset the multibyte flag of in the current buffer.
This is a no-op in XEmacs."
(when (fboundp 'set-buffer-multibyte)
(set-buffer-multibyte nil)))
(set-buffer-multibyte nil))
(defalias 'mm-disable-multibyte 'ignore))
(defsubst mm-enable-multibyte-mule4 ()
"Enable multibyte in the current buffer.
(if mm-mule4-p
(defun mm-enable-multibyte-mule4 ()
"Enable multibyte in the current buffer.
Only used in Emacs Mule 4."
(when (and (fboundp 'set-buffer-multibyte)
(boundp 'enable-multibyte-characters)
(default-value 'enable-multibyte-characters)
(not (charsetp 'eight-bit-control)))
(set-buffer-multibyte t)))
(defsubst mm-disable-multibyte-mule4 ()
"Disable multibyte in the current buffer.
(set-buffer-multibyte t))
(defalias 'mm-enable-multibyte-mule4 'ignore))
(if mm-mule4-p
(defun mm-disable-multibyte-mule4 ()
"Disable multibyte in the current buffer.
Only used in Emacs Mule 4."
(when (and (fboundp 'set-buffer-multibyte)
(not (charsetp 'eight-bit-control)))
(set-buffer-multibyte nil)))
(set-buffer-multibyte nil))
(defalias 'mm-disable-multibyte-mule4 'ignore)))
(defun mm-preferred-coding-system (charset)
;; A typo in some Emacs versions.
@ -294,10 +371,10 @@ If the charset is `composition', return the actual one."
(progn
(setq mail-parse-mule-charset
(and (boundp 'current-language-environment)
(car (last
(assq 'charset
(assoc current-language-environment
language-info-alist))))))
(car (last
(assq 'charset
(assoc current-language-environment
language-info-alist))))))
(if (or (not mail-parse-mule-charset)
(eq mail-parse-mule-charset 'ascii))
(setq mail-parse-mule-charset
@ -309,6 +386,8 @@ If the charset is `composition', return the actual one."
(defun mm-mime-charset (charset)
"Return the MIME charset corresponding to the given Mule CHARSET."
(if (eq charset 'unknown)
(error "The message contains non-printable characters, please use attachment"))
(if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property))
;; This exists in Emacs 20.
(or
@ -317,6 +396,7 @@ If the charset is `composition', return the actual one."
(mm-preferred-coding-system charset) 'mime-charset))
(and (eq charset 'ascii)
'us-ascii)
(mm-preferred-coding-system charset)
(mm-mule-charset-to-mime-charset charset))
;; This is for XEmacs.
(mm-mule-charset-to-mime-charset charset)))
@ -330,21 +410,8 @@ If the charset is `composition', return the actual one."
(setq result (cons head result)))
(nreverse result)))
(defun mm-find-mime-charset-region (b e)
"Return the MIME charsets needed to encode the region between B and E."
(let ((charsets (mapcar 'mm-mime-charset
(delq 'ascii
(mm-find-charset-region b e)))))
(when (memq 'iso-2022-jp-2 charsets)
(setq charsets (delq 'iso-2022-jp charsets)))
(setq charsets (mm-delete-duplicates charsets))
(if (and (> (length charsets) 1)
(fboundp 'find-coding-systems-region)
(let ((cs (find-coding-systems-region b e)))
(or (memq 'utf-8 cs) (memq 'mule-utf-8 cs))))
'(utf-8)
charsets)))
;; It's not clear whether this is supposed to mean the global or local
;; setting. I think it's used inconsistently. -- fx
(defsubst mm-multibyte-p ()
"Say whether multibyte is enabled."
(if (and (not (featurep 'xemacs))
@ -352,6 +419,39 @@ If the charset is `composition', return the actual one."
enable-multibyte-characters
(featurep 'mule)))
(defun mm-find-mime-charset-region (b e)
"Return the MIME charsets needed to encode the region between B and E.
Nil means ASCII, a single-element list represents an appropriate MIME
charset, and a longer list means no appropriate charset."
;; The return possibilities of this function are a mess...
(or (and
(mm-multibyte-p)
(fboundp 'find-coding-systems-region)
;; Find the mime-charset of the most preferred coding
;; system that has one.
(let ((systems (find-coding-systems-region b e))
result)
;; Fixme: The `mime-charset' (`x-ctext') of `compound-text'
;; is not in the IANA list.
(setq systems (delq 'compound-text systems))
(unless (equal systems '(undecided))
(while systems
(let ((cs (coding-system-get (pop systems) 'mime-charset)))
(if cs
(setq systems nil
result (list cs))))))
result))
;; Otherwise we're not multibyte, XEmacs or a single coding
;; system won't cover it.
(let ((charsets
(mm-delete-duplicates
(mapcar 'mm-mime-charset
(delq 'ascii
(mm-find-charset-region b e))))))
(if (memq 'iso-2022-jp-2 charsets)
(delq 'iso-2022-jp charsets)
charsets))))
(defmacro mm-with-unibyte-buffer (&rest forms)
"Create a temporary buffer, and evaluate FORMS there like `progn'.
Use unibyte mode for this."
@ -364,15 +464,18 @@ Use unibyte mode for this."
"Evaluate FORMS with current current buffer temporarily made unibyte.
Also bind `default-enable-multibyte-characters' to nil.
Equivalent to `progn' in XEmacs"
(let ((multibyte (make-symbol "multibyte")))
`(if (fboundp 'set-buffer-multibyte)
(let ((,multibyte enable-multibyte-characters))
(let ((multibyte (make-symbol "multibyte"))
(buffer (make-symbol "buffer")))
`(if mm-emacs-mule
(let ((,multibyte enable-multibyte-characters)
(,buffer (current-buffer)))
(unwind-protect
(let (default-enable-multibyte-characters)
(set-buffer-multibyte nil)
,@forms)
(set-buffer ,buffer)
(set-buffer-multibyte ,multibyte)))
(progn
(let (default-enable-multibyte-characters)
,@forms))))
(put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0)
(put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body))
@ -380,22 +483,19 @@ Equivalent to `progn' in XEmacs"
(defmacro mm-with-unibyte-current-buffer-mule4 (&rest forms)
"Evaluate FORMS there like `progn' in current buffer.
Mule4 only."
(let ((multibyte (make-symbol "multibyte")))
`(if (or (featurep 'xemacs)
(not (fboundp 'set-buffer-multibyte))
(charsetp 'eight-bit-control)) ;; For Emacs Mule 4 only.
(progn
,@forms)
(let ((,multibyte (default-value 'enable-multibyte-characters)))
(unwind-protect
(let ((buffer-file-coding-system mm-binary-coding-system)
(coding-system-for-read mm-binary-coding-system)
(coding-system-for-write mm-binary-coding-system))
(set-buffer-multibyte nil)
(setq-default enable-multibyte-characters nil)
,@forms)
(setq-default enable-multibyte-characters ,multibyte)
(set-buffer-multibyte ,multibyte))))))
(let ((multibyte (make-symbol "multibyte"))
(buffer (make-symbol "buffer")))
`(if mm-mule4-p
(let ((,multibyte enable-multibyte-characters)
(,buffer (current-buffer)))
(unwind-protect
(let (default-enable-multibyte-characters)
(set-buffer-multibyte nil)
,@forms)
(set-buffer ,buffer)
(set-buffer-multibyte ,multibyte)))
(let (default-enable-multibyte-characters)
,@forms))))
(put 'mm-with-unibyte-current-buffer-mule4 'lisp-indent-function 0)
(put 'mm-with-unibyte-current-buffer-mule4 'edebug-form-spec '(body))
@ -410,9 +510,14 @@ Mule4 only."
"Return a list of Emacs charsets in the region B to E."
(cond
((and (mm-multibyte-p)
(fboundp 'find-charset-region))
(fboundp 'find-charset-region))
;; Remove composition since the base charsets have been included.
(delq 'composition (find-charset-region b e)))
;; Remove eight-bit-*, treat them as ascii.
(let ((css (find-charset-region b e)))
(mapcar (lambda (cs) (setq css (delq cs css)))
'(composition eight-bit-control eight-bit-graphic
control-1))
css))
(t
;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit.
(save-excursion
@ -425,8 +530,8 @@ Mule4 only."
(let (charset)
(setq charset
(and (boundp 'current-language-environment)
(car (last (assq 'charset
(assoc current-language-environment
(car (last (assq 'charset
(assoc current-language-environment
language-info-alist))))))
(if (eq charset 'ascii) (setq charset nil))
(or charset
@ -476,15 +581,15 @@ If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers.
(auto-mode-alist (if inhibit nil (mm-auto-mode-alist)))
(default-major-mode 'fundamental-mode)
(enable-local-variables nil)
(after-insert-file-functions nil)
(after-insert-file-functions nil)
(enable-local-eval nil)
(find-file-hooks nil)
(inhibit-file-name-operation (if inhibit
(inhibit-file-name-operation (if inhibit
'insert-file-contents
inhibit-file-name-operation))
(inhibit-file-name-handlers
(if inhibit
(append mm-inhibit-file-name-handlers
(append mm-inhibit-file-name-handlers
inhibit-file-name-handlers)
inhibit-file-name-handlers)))
(insert-file-contents filename visit beg end replace)))
@ -497,37 +602,47 @@ saying what text to write.
Optional fourth argument specifies the coding system to use when
encoding the file.
If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers."
(let ((coding-system-for-write
(or codesys mm-text-coding-system-for-write
(let ((coding-system-for-write
(or codesys mm-text-coding-system-for-write
mm-text-coding-system))
(inhibit-file-name-operation (if inhibit
(inhibit-file-name-operation (if inhibit
'append-to-file
inhibit-file-name-operation))
(inhibit-file-name-handlers
(if inhibit
(append mm-inhibit-file-name-handlers
(append mm-inhibit-file-name-handlers
inhibit-file-name-handlers)
inhibit-file-name-handlers)))
(append-to-file start end filename)))
(defun mm-write-region (start end filename &optional append visit lockname
(defun mm-write-region (start end filename &optional append visit lockname
coding-system inhibit)
"Like `write-region'.
If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers."
(let ((coding-system-for-write
(or coding-system mm-text-coding-system-for-write
(let ((coding-system-for-write
(or coding-system mm-text-coding-system-for-write
mm-text-coding-system))
(inhibit-file-name-operation (if inhibit
(inhibit-file-name-operation (if inhibit
'write-region
inhibit-file-name-operation))
(inhibit-file-name-handlers
(if inhibit
(append mm-inhibit-file-name-handlers
(append mm-inhibit-file-name-handlers
inhibit-file-name-handlers)
inhibit-file-name-handlers)))
(write-region start end filename append visit lockname)))
(defun mm-image-load-path (&optional package)
(let (dir result)
(dolist (path load-path (nreverse result))
(if (file-directory-p
(setq dir (concat (file-name-directory
(directory-file-name path))
"etc/" (or package "gnus/"))))
(push dir result))
(push path result))))
(provide 'mm-util)
;;; mm-util.el ends here

View file

@ -1,5 +1,5 @@
;;; nnslashdot.el --- interfacing with Slashdot
;; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
;; Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; Keywords: news
@ -57,6 +57,9 @@
"http://slashdot.org/article.pl?sid=%s&mode=nocomment"
"Where nnslashdot will fetch the article from.")
(defvoo nnslashdot-backslash-url "http://slashdot.org/slashdot.xml"
"Where nnslashdot will fetch the stories from.")
(defvoo nnslashdot-threshold -1
"The article threshold.")
@ -86,19 +89,17 @@
(nnslashdot-possibly-change-server group server)
(condition-case why
(unless gnus-nov-is-evil
(if nnslashdot-threaded
(nnslashdot-threaded-retrieve-headers articles group)
(nnslashdot-sane-retrieve-headers articles group)))
(nnslashdot-retrieve-headers-1 articles group))
(search-failed (nnslashdot-lose why))))
(deffoo nnslashdot-threaded-retrieve-headers (articles group)
(let ((last (car (last articles)))
(did nil)
(start 1)
(sid (caddr (assoc group nnslashdot-groups)))
(first-comments t)
(startats '(1))
headers article subject score from date lines parent point s)
(deffoo nnslashdot-retrieve-headers-1 (articles group)
(let* ((last (car (last articles)))
(start (if nnslashdot-threaded 1 (pop articles)))
(entry (assoc group nnslashdot-groups))
(sid (nth 2 entry))
(first-comments t)
headers article subject score from date lines parent point cid
s startats changed)
(save-excursion
(set-buffer nnslashdot-buffer)
(let ((case-fold-search t))
@ -107,10 +108,10 @@
(nnweb-insert (format nnslashdot-article-url
(nnslashdot-sid-strip sid)) t)
(goto-char (point-min))
(search-forward "Posted by ")
(when (looking-at "<a[^>]+>\\([^<]+\\)")
(setq from (nnweb-decode-entities-string (match-string 1))))
(search-forward " on ")
(re-search-forward "Posted by[ \t\r\n]+")
(when (looking-at "\\(<a[^>]+>\\)?[ \t\r\n]*\\([^<\r\n]+\\)")
(setq from (nnweb-decode-entities-string (match-string 2))))
(search-forward "on ")
(setq date (nnslashdot-date-to-date
(buffer-substring (point) (1- (search-forward "<")))))
(setq lines (/ (- (point)
@ -123,16 +124,16 @@
1 group from date
(concat "<" (nnslashdot-sid-strip sid) "%1@slashdot>")
"" 0 lines nil nil))
headers))
(while (and (setq start (pop startats))
(< start last))
headers)
(setq start (if nnslashdot-threaded 2 (pop articles))))
(while (and start (<= start last))
(setq point (goto-char (point-max)))
(nnweb-insert
(format nnslashdot-comments-url
(nnslashdot-sid-strip sid)
nnslashdot-threshold 0 start)
nnslashdot-threshold 0 (- start 2))
t)
(when first-comments
(when (and nnslashdot-threaded first-comments)
(setq first-comments nil)
(goto-char (point-max))
(while (re-search-backward "startat=\\([0-9]+\\)" nil t)
@ -140,58 +141,68 @@
(unless (memq s startats)
(push s startats)))
(setq startats (sort startats '<)))
(setq article (if (and article (< start article)) article start))
(goto-char point)
(while (re-search-forward
"<a name=\"\\([0-9]+\\)\"><\\(b\\|H4\\)>\\([^<]+\\)</\\(b\\|H4\\)>.*score:\\([^)]+\\))"
nil t)
(setq article (string-to-number (match-string 1))
(setq cid (match-string 1)
subject (match-string 3)
score (match-string 5))
(unless (assq article (nth 4 entry))
(setcar (nthcdr 4 entry) (cons (cons article cid) (nth 4 entry)))
(setq changed t))
(when (string-match "^Re: *" subject)
(setq subject (concat "Re: " (substring subject (match-end 0)))))
(setq subject (nnweb-decode-entities-string subject))
(forward-line 1)
(setq subject (nnweb-decode-entities-string subject))
(search-forward "<BR>")
(if (looking-at
"by <a[^>]+>\\([^<]+\\)</a>[ \t\n]*.*(\\([^)]+\\))")
"by[ \t\n]+<a[^>]+>\\([^<]+\\)</a>[ \t\n]*(\\(<[^>]+>\\)*\\([^<>)]+\\))")
(progn
(goto-char (- (match-end 0) 5))
(setq from (concat
(setq from (concat
(nnweb-decode-entities-string (match-string 1))
" <" (match-string 2) ">")))
" <" (match-string 3) ">")))
(setq from "")
(when (looking-at "by \\(.+\\) on ")
(when (looking-at "by \\([^<>]*\\) on ")
(goto-char (- (match-end 0) 5))
(setq from (nnweb-decode-entities-string (match-string 1)))))
(search-forward " on ")
(setq date
(nnslashdot-date-to-date
(buffer-substring (point) (progn (end-of-line) (point)))))
(setq lines (/ (abs (- (search-forward "<td ")
(buffer-substring (point) (progn (skip-chars-forward "^()<>\n\r") (point)))))
(setq lines (/ (abs (- (search-forward "<td")
(search-forward "</td>")))
70))
(forward-line 4)
(setq parent
(if (looking-at ".*cid=\\([0-9]+\\)")
(match-string 1)
nil))
(setq did t)
(if (not
(re-search-forward ".*cid=\\([0-9]+\\)\">Parent</A>" nil t))
(setq parent nil)
(setq parent (match-string 1))
(when (string= parent "0")
(setq parent nil)))
(push
(cons
(1+ article)
article
(make-full-mail-header
(1+ article)
article
(concat subject " (" score ")")
from date
(concat "<" (nnslashdot-sid-strip sid) "%"
(number-to-string (1+ article))
"@slashdot>")
(concat "<" (nnslashdot-sid-strip sid) "%" cid "@slashdot>")
(if parent
(concat "<" (nnslashdot-sid-strip sid) "%"
(number-to-string (1+ (string-to-number parent)))
"@slashdot>")
(concat "<" (nnslashdot-sid-strip sid) "%"
parent "@slashdot>")
"")
0 lines nil nil))
headers)))))
headers)
(while (and articles (<= (car articles) article))
(pop articles))
(setq article (1+ article)))
(if nnslashdot-threaded
(progn
(setq start (pop startats))
(if start (setq start (+ start 2))))
(setq start (pop articles))))))
(if changed (nnslashdot-write-groups))
(setq nnslashdot-headers (sort headers 'car-less-than-car))
(save-excursion
(set-buffer nntp-server-buffer)
@ -201,108 +212,6 @@
(nnheader-insert-nov (cdr header)))))
'nov))
(deffoo nnslashdot-sane-retrieve-headers (articles group)
(let ((last (car (last articles)))
(did nil)
(start (max (1- (car articles)) 1))
(sid (caddr (assoc group nnslashdot-groups)))
headers article subject score from date lines parent point)
(save-excursion
(set-buffer nnslashdot-buffer)
(erase-buffer)
(when (= start 1)
(nnweb-insert (format nnslashdot-article-url
(nnslashdot-sid-strip sid)) t)
(goto-char (point-min))
(search-forward "Posted by ")
(when (looking-at "<a[^>]+>\\([^<]+\\)")
(setq from (nnweb-decode-entities-string (match-string 1))))
(search-forward " on ")
(setq date (nnslashdot-date-to-date
(buffer-substring (point) (1- (search-forward "<")))))
(forward-line 2)
(setq lines (count-lines (point)
(re-search-forward
"A href=\"\\(http://slashdot.org\\)?/article")))
(push
(cons
1
(make-full-mail-header
1 group from date (concat "<" (nnslashdot-sid-strip sid)
"%1@slashdot>")
"" 0 lines nil nil))
headers))
(while (or (not article)
(and did
(< article last)))
(when article
(setq start (1+ article)))
(setq point (goto-char (point-max)))
(nnweb-insert
(format nnslashdot-comments-url (nnslashdot-sid-strip sid)
nnslashdot-threshold 4 start)
t)
(goto-char point)
(while (re-search-forward
"<a name=\"\\([0-9]+\\)\"><\\(b\\|H4\\)>\\([^<]+\\)</\\(b\\|H4\\)>.*score:\\([^)]+\\))"
nil t)
(setq article (string-to-number (match-string 1))
subject (match-string 3)
score (match-string 5))
(when (string-match "^Re: *" subject)
(setq subject (concat "Re: " (substring subject (match-end 0)))))
(setq subject (nnweb-decode-entities-string subject))
(forward-line 1)
(if (looking-at
"by <a[^>]+>\\([^<]+\\)</a>[ \t\n]*.*(\\([^)]+\\))")
(progn
(goto-char (- (match-end 0) 5))
(setq from (concat
(nnweb-decode-entities-string (match-string 1))
" <" (match-string 2) ">")))
(setq from "")
(when (looking-at "by \\(.+\\) on ")
(goto-char (- (match-end 0) 5))
(setq from (nnweb-decode-entities-string (match-string 1)))))
(search-forward " on ")
(setq date
(nnslashdot-date-to-date
(buffer-substring (point) (progn (end-of-line) (point)))))
(setq lines (/ (abs (- (search-forward "<td ")
(search-forward "</td>")))
70))
(forward-line 2)
(setq parent
(if (looking-at ".*cid=\\([0-9]+\\)")
(match-string 1)
nil))
(setq did t)
(push
(cons
(1+ article)
(make-full-mail-header
(1+ article) (concat subject " (" score ")")
from date
(concat "<" (nnslashdot-sid-strip sid) "%"
(number-to-string (1+ article))
"@slashdot>")
(if parent
(concat "<" (nnslashdot-sid-strip sid) "%"
(number-to-string (1+ (string-to-number parent)))
"@slashdot>")
"")
0 lines nil nil))
headers))))
(setq nnslashdot-headers
(sort headers (lambda (s1 s2) (< (car s1) (car s2)))))
(save-excursion
(set-buffer nntp-server-buffer)
(erase-buffer)
(mm-with-unibyte-current-buffer
(dolist (header nnslashdot-headers)
(nnheader-insert-nov (cdr header)))))
'nov))
(deffoo nnslashdot-request-group (group &optional server dont-check)
(nnslashdot-possibly-change-server nil server)
(let ((elem (assoc group nnslashdot-groups)))
@ -325,7 +234,7 @@
(deffoo nnslashdot-request-article (article &optional group server buffer)
(nnslashdot-possibly-change-server group server)
(let (contents)
(let (contents cid)
(condition-case why
(save-excursion
(set-buffer nnslashdot-buffer)
@ -333,23 +242,32 @@
(goto-char (point-min))
(when (and (stringp article)
(string-match "%\\([0-9]+\\)@" article))
(setq article (string-to-number (match-string 1 article))))
(setq cid (match-string 1 article))
(let ((map (nth 4 (assoc group nnslashdot-groups))))
(while map
(if (equal (cdar map) cid)
(setq article (caar map)
map nil)
(setq map (cdr map))))))
(when (numberp article)
(if (= article 1)
(progn
(re-search-forward "Posted by *<[^>]+>[^>]*<[^>]+> *on ")
(re-search-forward
"Posted by")
(search-forward "<BR>")
(setq contents
(buffer-substring
(point)
(progn
(re-search-forward
"<p>.*A href=\"\\(http://slashdot.org\\)?/article")
"&lt;&nbsp;[ \t\r\n]*<A HREF=\"\\(\\(http:\\)?//slashdot\\.org\\)?/article")
(match-beginning 0)))))
(search-forward (format "<a name=\"%d\">" (1- article)))
(setq cid (cdr (assq article
(nth 4 (assoc group nnslashdot-groups)))))
(search-forward (format "<a name=\"%s\">" cid))
(setq contents
(buffer-substring
(re-search-forward "<td[^>]+>")
(re-search-forward "<td[^>]*>")
(search-forward "</td>")))))))
(search-failed (nnslashdot-lose why)))
@ -384,10 +302,10 @@
(let ((number 0)
sid elem description articles gname)
(condition-case why
;; First we do the Ultramode to get info on all the latest groups.
(progn
;; First we do the Ultramode to get info on all the latest groups.
(progn
(mm-with-unibyte-buffer
(nnweb-insert "http://slashdot.org/slashdot.xml" t)
(nnweb-insert nnslashdot-backslash-url t)
(goto-char (point-min))
(while (search-forward "<story>" nil t)
(narrow-to-region (point) (search-forward "</story>"))
@ -404,7 +322,8 @@
(setq gname (concat description " (" sid ")"))
(if (setq elem (assoc gname nnslashdot-groups))
(setcar (cdr elem) articles)
(push (list gname articles sid) nnslashdot-groups))
(push (list gname articles sid (current-time) nil)
nnslashdot-groups))
(goto-char (point-max))
(widen)))
;; Then do the older groups.
@ -425,13 +344,14 @@
(setq gname (concat description " (" sid ")"))
(if (setq elem (assoc gname nnslashdot-groups))
(setcar (cdr elem) articles)
(push (list gname articles sid) nnslashdot-groups)))))
(push (list gname articles sid (current-time) nil)
nnslashdot-groups)))))
(incf number 30)))
(search-failed (nnslashdot-lose why)))
(nnslashdot-write-groups)
(nnslashdot-generate-active)
t))
(deffoo nnslashdot-request-newgroups (date &optional server)
(nnslashdot-possibly-change-server nil server)
(nnslashdot-generate-active)
@ -496,6 +416,24 @@
(setq nnslashdot-headers nil
nnslashdot-groups nil))
(deffoo nnslashdot-request-expire-articles
(articles group &optional server force)
(nnslashdot-possibly-change-server group server)
(let ((item (assoc group nnslashdot-groups)))
(when item
(if (fourth item)
(when (and (>= (length articles) (cadr item)) ;; All are expirable.
(nnmail-expired-article-p
group
(fourth item)
force))
(setq nnslashdot-groups (delq item nnslashdot-groups))
(nnslashdot-write-groups)
(setq articles nil)) ;; all expired.
(setcdr (cddr item) (list (current-time)))
(nnslashdot-write-groups))))
articles)
(nnoo-define-skeleton nnslashdot)
;;; Internal functions
@ -508,18 +446,32 @@
(unless nnslashdot-groups
(nnslashdot-read-groups)))
(defun nnslashdot-make-tuple (tuple n)
(prog1
tuple
(while (> n 1)
(unless (cdr tuple)
(setcdr tuple (list nil)))
(setq tuple (cdr tuple)
n (1- n)))))
(defun nnslashdot-read-groups ()
(let ((file (expand-file-name "groups" nnslashdot-directory)))
(when (file-exists-p file)
(mm-with-unibyte-buffer
(insert-file-contents file)
(goto-char (point-min))
(setq nnslashdot-groups (read (current-buffer)))))))
(setq nnslashdot-groups (read (current-buffer))))
(if (and nnslashdot-groups (< (length (car nnslashdot-groups)) 5))
(let ((groups nnslashdot-groups))
(while groups
(nnslashdot-make-tuple (car groups) 5)
(setq groups (cdr groups))))))))
(defun nnslashdot-write-groups ()
(with-temp-file (expand-file-name "groups" nnslashdot-directory)
(prin1 nnslashdot-groups (current-buffer))))
(gnus-prin1 nnslashdot-groups)))
(defun nnslashdot-init (server)
"Initialize buffers and such."
(unless (file-exists-p nnslashdot-directory)
@ -528,7 +480,8 @@
(setq nnslashdot-buffer
(save-excursion
(nnheader-set-temp-buffer
(format " *nnslashdot %s*" server))))))
(format " *nnslashdot %s*" server))))
(push nnslashdot-buffer gnus-buffers)))
(defun nnslashdot-date-to-date (sdate)
(condition-case err
@ -552,11 +505,6 @@
(defun nnslashdot-lose (why)
(error "Slashdot HTML has changed; please get a new version of nnslashdot"))
;(defun nnslashdot-sid-strip (sid)
; (if (string-match "^00/" sid)
; (substring sid (match-end 0))
; sid))
(defalias 'nnslashdot-sid-strip 'identity)
(provide 'nnslashdot)

View file

@ -56,6 +56,8 @@
(defvoo nnultimate-groups nil)
(defvoo nnultimate-headers nil)
(defvoo nnultimate-articles nil)
(defvar nnultimate-table-regexp
"postings.*editpost\\|forumdisplay\\|Forum[0-9]+/HTML\\|getbio")
;;; Interface functions
@ -74,13 +76,17 @@
(old-total (or (nth 6 entry) 1))
(furl "forumdisplay.cgi?action=topics&number=%d&DaysPrune=1000")
(furls (list (concat nnultimate-address (format furl sid))))
(nnultimate-table-regexp
"postings.*editpost\\|forumdisplay\\|getbio")
headers article subject score from date lines parent point
contents tinfo fetchers map elem a href garticles topic old-max
inc datel table string current-page total-contents pages
inc datel table current-page total-contents pages
farticles forum-contents parse furl-fetched mmap farticle)
(setq map mapping)
(while (and (setq article (car articles))
map)
;; Skip past the articles in the map until we reach the
;; article we're looking for.
(while (and map
(or (> article (caar map))
(< (cadar map) (caar map))))
@ -101,7 +107,7 @@
fetchers))
(pop articles)
(setq article (car articles)))))
;; Now we have the mapping from/to Gnus/nnultimate article numbers,
;; Now we have the mapping from/to Gnus/nnultimate article numbers,
;; so we start fetching the topics that we need to satisfy the
;; request.
(if (not fetchers)
@ -128,22 +134,27 @@
(setq contents
(ignore-errors (w3-parse-buffer (current-buffer))))
(setq table (nnultimate-find-forum-table contents))
(setq string (mapconcat 'identity (nnweb-text table) ""))
(when (string-match "topic is \\([0-9]\\) pages" string)
(setq pages (string-to-number (match-string 1 string)))
(setcdr table nil)
(setq table (nnultimate-find-forum-table contents)))
(goto-char (point-min))
(when (re-search-forward "topic is \\([0-9]+\\) pages" nil t)
(setq pages (string-to-number (match-string 1))))
(setq contents (cdr (nth 2 (car (nth 2 table)))))
(setq total-contents (nconc total-contents contents))
(incf current-page))
;;(setq total-contents (nreverse total-contents))
(dolist (art (cdr elem))
(if (not (nth (1- (cdr art)) total-contents))
() ;(debug)
(push (list (car art)
(nth (1- (cdr art)) total-contents)
subject)
nnultimate-articles)))))
(when t
(let ((i 0))
(dolist (co total-contents)
(push (list (or (nnultimate-topic-article-to-article
group (car elem) (incf i))
1)
co subject)
nnultimate-articles))))
(when nil
(dolist (art (cdr elem))
(when (nth (1- (cdr art)) total-contents)
(push (list (car art)
(nth (1- (cdr art)) total-contents)
subject)
nnultimate-articles))))))
(setq nnultimate-articles
(sort nnultimate-articles 'car-less-than-car))
;; Now we have all the articles, conveniently in an alist
@ -161,17 +172,26 @@
(setq date (substring (car datel) (match-end 0))
datel nil))
(pop datel))
(setq date (delete "" (split-string date "[- \n\t\r    ]")))
(if (or (member "AM" date)
(member "PM" date))
(when date
(setq date (delete "" (split-string
date "[-, \n\t\r    ]")))
(if (or (member "AM" date)
(member "PM" date))
(setq date (format
"%s %s %s %s"
(nth 1 date)
(if (and (>= (length (nth 0 date)) 3)
(assoc (downcase
(substring (nth 0 date) 0 3))
parse-time-months))
(substring (nth 0 date) 0 3)
(car (rassq (string-to-number (nth 0 date))
parse-time-months)))
(nth 2 date) (nth 3 date)))
(setq date (format "%s %s %s %s"
(car (rassq (string-to-number (nth 0 date))
(car (rassq (string-to-number (nth 1 date))
parse-time-months))
(nth 1 date) (nth 2 date) (nth 3 date)))
(setq date (format "%s %s %s %s"
(car (rassq (string-to-number (nth 1 date))
parse-time-months))
(nth 0 date) (nth 2 date) (nth 3 date))))
(nth 0 date) (nth 2 date) (nth 3 date)))))
(push
(cons
article
@ -180,7 +200,7 @@
from (or date "")
(concat "<" (number-to-string sid) "%"
(number-to-string article)
"@ultimate>")
"@ultimate." server ">")
"" 0
(/ (length (mapconcat
'identity
@ -199,6 +219,16 @@
(nnheader-insert-nov (cdr header))))))
'nov)))
(defun nnultimate-topic-article-to-article (group topic article)
(catch 'found
(dolist (elem (nth 5 (assoc group nnultimate-groups)))
(when (and (= topic (nth 2 elem))
(>= article (nth 3 elem))
(< article (+ (- (nth 1 elem) (nth 0 elem)) 1
(nth 3 elem))))
(throw 'found
(+ (nth 0 elem) (- article (nth 3 elem))))))))
(deffoo nnultimate-request-group (group &optional server dont-check)
(nnultimate-possibly-change-server nil server)
(when (not nnultimate-groups)
@ -330,7 +360,7 @@
;; the group is entered, there's 2 new articles in topic one
;; and 1 in topic three. Then Gnus article number 8-9 be 5-6
;; in topic one and 10 will be the 2 in topic three.
(dolist (row (reverse forum-contents))
(dolist (row (nreverse forum-contents))
(setq row (nth 2 row))
(when (setq a (nnweb-parse-find 'a row))
(setq subject (car (last (nnweb-text a)))
@ -403,7 +433,7 @@
nnultimate-groups-alist)
(with-temp-file (expand-file-name "groups" nnultimate-directory)
(prin1 nnultimate-groups-alist (current-buffer))))
(defun nnultimate-init (server)
"Initialize buffers and such."
(unless (file-exists-p nnultimate-directory)
@ -438,9 +468,7 @@
(nth 2 parse))))
(let ((href (cdr (assq 'href (nth 1 (nnweb-parse-find 'a parse 20)))))
case-fold-search)
(when (and href (string-match
"postings\\|forumdisplay\\|Forum[0-9]+/HTML\\|getbio"
href))
(when (and href (string-match nnultimate-table-regexp href))
t))))
(provide 'nnultimate)

View file

@ -1,5 +1,5 @@
;;; nnweb.el --- retrieving articles via web search engines
;; Copyright (C) 1996, 1997, 1998, 1999, 2000
;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001
;; Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
@ -55,25 +55,48 @@
(defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/")
"Where nnweb will save its files.")
(defvoo nnweb-type 'dejanews
(defvoo nnweb-type 'google
"What search engine type is being used.
Valid types include `dejanews', `dejanewsold', `reference',
Valid types include `google', `dejanews', `dejanewsold', `reference',
and `altavista'.")
(defvar nnweb-type-definition
'((dejanews
'(
(google
;;(article . nnweb-google-wash-article)
;;(id . "http://groups.google.com/groups?as_umsgid=%s")
(article . ignore)
(id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text")
(map . nnweb-dejanews-create-mapping)
(search . nnweb-dejanews-search)
(address . "http://www.deja.com/=dnc/qs.xp")
(identifier . nnweb-dejanews-identity))
(dejanewsold
(id . "http://groups.google.com/groups?selm=%s&output=gplain")
;;(reference . nnweb-google-reference)
(reference . identity)
(map . nnweb-google-create-mapping)
(search . nnweb-google-search)
(address . "http://groups.google.com/groups")
(identifier . nnweb-google-identity))
(dejanews ;; alias of google
;;(article . nnweb-google-wash-article)
;;(id . "http://groups.google.com/groups?as_umsgid=%s")
(article . ignore)
(map . nnweb-dejanews-create-mapping)
(search . nnweb-dejanewsold-search)
(address . "http://www.deja.com/dnquery.xp")
(identifier . nnweb-dejanews-identity))
(id . "http://groups.google.com/groups?selm=%s&output=gplain")
;;(reference . nnweb-google-reference)
(reference . identity)
(map . nnweb-google-create-mapping)
(search . nnweb-google-search)
(address . "http://groups.google.com/groups")
(identifier . nnweb-google-identity))
;;; (dejanews
;;; (article . ignore)
;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text")
;;; (map . nnweb-dejanews-create-mapping)
;;; (search . nnweb-dejanews-search)
;;; (address . "http://www.deja.com/=dnc/qs.xp")
;;; (identifier . nnweb-dejanews-identity))
;;; (dejanewsold
;;; (article . ignore)
;;; (map . nnweb-dejanews-create-mapping)
;;; (search . nnweb-dejanewsold-search)
;;; (address . "http://www.deja.com/dnquery.xp")
;;; (identifier . nnweb-dejanews-identity))
(reference
(article . nnweb-reference-wash-article)
(map . nnweb-reference-create-mapping)
@ -124,6 +147,8 @@ and `altavista'.")
(deffoo nnweb-request-scan (&optional group server)
(nnweb-possibly-change-server group server)
(if nnweb-ephemeral-p
(setq nnweb-hashtb (gnus-make-hashtable 4095)))
(funcall (nnweb-definition 'map))
(unless nnweb-ephemeral-p
(nnweb-write-active)
@ -134,9 +159,10 @@ and `altavista'.")
(when (and group
(not (equal group nnweb-group))
(not nnweb-ephemeral-p))
(setq nnweb-group group
nnweb-articles nil)
(let ((info (assoc group nnweb-group-alist)))
(when info
(setq nnweb-group group)
(setq nnweb-type (nth 2 info))
(setq nnweb-search (nth 3 info))
(unless dont-check
@ -175,17 +201,19 @@ and `altavista'.")
(and (stringp article)
(nnweb-definition 'id t)
(let ((fetch (nnweb-definition 'id))
art)
art active)
(when (string-match "^<\\(.*\\)>$" article)
(setq art (match-string 1 article)))
(and fetch
art
(mm-with-unibyte-current-buffer
(nnweb-fetch-url
(format fetch article)))))))
(when (and fetch art)
(setq url (format fetch art))
(mm-with-unibyte-current-buffer
(nnweb-fetch-url url))
(if (nnweb-definition 'reference t)
(setq article
(funcall (nnweb-definition
'reference) article)))))))
(unless nnheader-callback-function
(funcall (nnweb-definition 'article))
(nnweb-decode-entities))
(funcall (nnweb-definition 'article)))
(nnheader-report 'nnweb "Fetched article %s" article)
(cons group (and (numberp article) article))))))
@ -290,10 +318,11 @@ and `altavista'.")
(nnweb-open-server server)))
(unless nnweb-group-alist
(nnweb-read-active))
(unless nnweb-hashtb
(setq nnweb-hashtb (gnus-make-hashtable 4095)))
(when group
(when (and (not nnweb-ephemeral-p)
(not (equal group nnweb-group)))
(setq nnweb-hashtb (gnus-make-hashtable 4095))
(equal group nnweb-group))
(nnweb-request-group group nil t))))
(defun nnweb-init (server)
@ -393,7 +422,7 @@ and `altavista'.")
(car (rassq (string-to-number
(match-string 2 date))
parse-time-months))
(match-string 3 date)
(match-string 3 date)
(match-string 1 date)))
(setq date "Jan 1 00:00:00 0000"))
(incf i)
@ -559,6 +588,7 @@ and `altavista'.")
(while (search-forward "," nil t)
(replace-match " " t t)))
(widen)
(nnweb-decode-entities)
(set-marker body nil))))
(defun nnweb-reference-search (search)
@ -663,7 +693,8 @@ and `altavista'.")
(while (re-search-forward "<A.*\\?id@\\([^\"]+\\)\">[0-9]+</A>" nil t)
(replace-match "&lt;\\1&gt; " t)))
(widen)
(nnweb-remove-markup)))
(nnweb-remove-markup)
(nnweb-decode-entities)))
(defun nnweb-altavista-search (search &optional part)
(url-insert-file-contents
@ -682,6 +713,140 @@ and `altavista'.")
(setq buffer-file-name nil)
t)
;;;
;;; Deja bought by google.com
;;;
(defun nnweb-google-wash-article ()
(let ((case-fold-search t) url)
(goto-char (point-min))
(re-search-forward "^<pre>" nil t)
(narrow-to-region (point-min) (point))
(search-backward "<table " nil t 2)
(delete-region (point-min) (point))
(if (re-search-forward "Search Result [0-9]+" nil t)
(replace-match ""))
(if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t)
(replace-match ""))
(goto-char (point-min))
(while (search-forward "<br>" nil t)
(replace-match "\n"))
(nnweb-remove-markup)
(goto-char (point-min))
(while (re-search-forward "^[ \t]*\n" nil t)
(replace-match ""))
(goto-char (point-max))
(insert "\n")
(widen)
(narrow-to-region (point) (point-max))
(search-forward "</pre>" nil t)
(delete-region (point) (point-max))
(nnweb-remove-markup)
(widen)))
(defun nnweb-google-parse-1 (&optional Message-ID)
(let ((i 0)
(case-fold-search t)
(active (cadr (assoc nnweb-group nnweb-group-alist)))
Subject Score Date Newsgroups From
map url mid)
(unless active
(push (list nnweb-group (setq active (cons 1 0))
nnweb-type nnweb-search)
nnweb-group-alist))
;; Go through all the article hits on this page.
(goto-char (point-min))
(while (re-search-forward
"a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
(setq mid (match-string 2)
url (format
"http://groups.google.com/groups?selm=%s&output=gplain" mid))
(narrow-to-region (search-forward ">" nil t)
(search-forward "</a>" nil t))
(nnweb-remove-markup)
(nnweb-decode-entities)
(setq Subject (buffer-string))
(goto-char (point-max))
(widen)
(forward-line 1)
(when (looking-at "<br><font[^>]+>")
(goto-char (match-end 0)))
(if (not (looking-at "<a[^>]+>"))
(skip-chars-forward " \t")
(narrow-to-region (point)
(search-forward "</a>" nil t))
(nnweb-remove-markup)
(nnweb-decode-entities)
(setq Newsgroups (buffer-string))
(goto-char (point-max))
(widen)
(skip-chars-forward "- \t"))
(when (looking-at
"\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a")
(setq From (match-string 2)
Date (match-string 1)))
(forward-line 1)
(incf i)
(unless (nnweb-get-hashtb url)
(push
(list
(incf (cdr active))
(make-full-mail-header
(cdr active) (if Newsgroups
(concat "(" Newsgroups ") " Subject)
Subject)
From Date (or Message-ID mid)
nil 0 0 url))
map)
(nnweb-set-hashtb (cadar map) (car map))))
map))
(defun nnweb-google-reference (id)
(let ((map (nnweb-google-parse-1 id)) header)
(setq nnweb-articles
(nconc nnweb-articles map))
(when (setq header (cadar map))
(mm-with-unibyte-current-buffer
(nnweb-fetch-url (mail-header-xref header)))
(caar map))))
(defun nnweb-google-create-mapping ()
"Perform the search and create an number-to-url alist."
(save-excursion
(set-buffer nnweb-buffer)
(erase-buffer)
(when (funcall (nnweb-definition 'search) nnweb-search)
(let ((more t))
(while more
(setq nnweb-articles
(nconc nnweb-articles (nnweb-google-parse-1)))
;; FIXME: There is more.
(setq more nil))
;; Return the articles in the right order.
(setq nnweb-articles
(sort nnweb-articles 'car-less-than-car))))))
(defun nnweb-google-search (search)
(nnweb-insert
(concat
(nnweb-definition 'address)
"?"
(nnweb-encode-www-form-urlencoded
`(("q" . ,search)
("num". "100")
("hq" . "")
("hl" . "")
("lr" . "")
("safe" . "off")
("sites" . "groups")))))
t)
(defun nnweb-google-identity (url)
"Return an unique identifier based on URL."
(if (string-match "selm=\\([^ &>]+\\)" url)
(match-string 1 url)
url))
;;;
;;; General web/w3 interface utility functions
;;;
@ -689,7 +854,7 @@ and `altavista'.")
(defun nnweb-insert-html (parse)
"Insert HTML based on a w3 parse tree."
(if (stringp parse)
(insert parse)
(insert (nnheader-string-as-multibyte parse))
(insert "<" (symbol-name (car parse)) " ")
(insert (mapconcat
(lambda (param)
@ -729,7 +894,7 @@ and `altavista'.")
(while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t)
(let ((elem (if (eq (aref (match-string 1) 0) ?\#)
(let ((c
(string-to-number (substring
(string-to-number (substring
(match-string 1) 1))))
(if (mm-char-or-char-int-p c) c 32))
(or (cdr (assq (intern (match-string 1))
@ -739,9 +904,9 @@ and `altavista'.")
(setq elem (char-to-string elem)))
(replace-match elem t t))))
(defun nnweb-decode-entities-string (str)
(defun nnweb-decode-entities-string (string)
(with-temp-buffer
(insert str)
(insert string)
(nnweb-decode-entities)
(buffer-substring (point-min) (point-max))))
@ -760,12 +925,12 @@ and `altavista'.")
"Insert the contents from an URL in the current buffer.
If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
(let ((name buffer-file-name))
(if follow-refresh
(if follow-refresh
(save-restriction
(narrow-to-region (point) (point))
(url-insert-file-contents url)
(goto-char (point-min))
(when (re-search-forward
(when (re-search-forward
"<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t)
(let ((url (match-string 1)))
(delete-region (point-min) (point-max))
@ -822,6 +987,11 @@ If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
(listp (cdr element)))
(nnweb-text-1 element)))))
(defun nnweb-replace-in-string (string match newtext)
(while (string-match match string)
(setq string (replace-match newtext t t string)))
string)
(provide 'nnweb)
;;; nnweb.el ends here