mirror of
git://git.sv.gnu.org/emacs.git
synced 2025-12-15 10:30:25 -08:00
Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Revision: emacs@sv.gnu.org/emacs--devo--0--patch-911
This commit is contained in:
parent
ccae01a639
commit
01c52d3165
166 changed files with 27871 additions and 9376 deletions
|
|
@ -122,6 +122,7 @@
|
|||
|
||||
|
||||
;;; Code:
|
||||
(require 'mail-parse)
|
||||
|
||||
(defvar gnus-original-article-buffer)
|
||||
|
||||
|
|
@ -163,17 +164,53 @@ This variable says how many characters this will be."
|
|||
:group 'spam-stat)
|
||||
|
||||
(defcustom spam-stat-split-fancy-spam-group "mail.spam"
|
||||
"Name of the group where spam should be stored, if
|
||||
`spam-stat-split-fancy' is used in fancy splitting rules. Has no
|
||||
effect when spam-stat is invoked through spam.el."
|
||||
"Name of the group where spam should be stored.
|
||||
If `spam-stat-split-fancy' is used in fancy splitting rules. Has
|
||||
no effect when spam-stat is invoked through spam.el."
|
||||
:type 'string
|
||||
:group 'spam-stat)
|
||||
|
||||
(defcustom spam-stat-split-fancy-spam-threshhold 0.9
|
||||
"Spam score threshhold in spam-stat-split-fancy."
|
||||
(defcustom spam-stat-split-fancy-spam-threshold 0.9
|
||||
"Spam score threshold in spam-stat-split-fancy."
|
||||
:type 'number
|
||||
:group 'spam-stat)
|
||||
|
||||
(defcustom spam-stat-washing-hook nil
|
||||
"Hook applied to each message before analysis."
|
||||
:type 'hook
|
||||
:group 'spam-stat)
|
||||
|
||||
(defcustom spam-stat-score-buffer-user-functions nil
|
||||
"List of additional scoring functions.
|
||||
Called one by one on the buffer.
|
||||
|
||||
If all of these functions return non-nil answers, these numerical
|
||||
answers are added to the computed spam stat score on the buffer. If
|
||||
you defun such functions, make sure they don't return the buffer in a
|
||||
narrowed state or such: use, for example, `save-excursion'. Each of
|
||||
your functions is also passed the initial spam-stat score which might
|
||||
aid in your scoring.
|
||||
|
||||
Also be careful when defining such functions. If they take a long
|
||||
time, they will slow down your mail splitting. Thus, if the buffer is
|
||||
large, don't forget to use smaller regions, by wrapping your work in,
|
||||
say, `with-spam-stat-max-buffer-size'."
|
||||
:type '(repeat sexp)
|
||||
:group 'spam-stat)
|
||||
|
||||
(defcustom spam-stat-process-directory-age 90
|
||||
"Max. age of files to be processed in directory, in days.
|
||||
When using `spam-stat-process-spam-directory' or
|
||||
`spam-stat-process-non-spam-directory', only files that have
|
||||
been touched in this many days will be considered. Without
|
||||
this filter, re-training spam-stat with several thousand messages
|
||||
will start to take a very long time."
|
||||
:type 'number
|
||||
:group 'spam-stat)
|
||||
|
||||
(defvar spam-stat-last-saved-at nil
|
||||
"Time stamp of last change of spam-stat-file on this run")
|
||||
|
||||
(defvar spam-stat-syntax-table
|
||||
(let ((table (copy-syntax-table text-mode-syntax-table)))
|
||||
(modify-syntax-entry ?- "w" table)
|
||||
|
|
@ -196,52 +233,24 @@ This is set by hooking into Gnus.")
|
|||
(defvar spam-stat-buffer-name " *spam stat buffer*"
|
||||
"Name of the `spam-stat-buffer'.")
|
||||
|
||||
;; Functions missing in Emacs 20
|
||||
|
||||
(when (memq nil (mapcar 'fboundp
|
||||
'(gethash hash-table-count make-hash-table
|
||||
mapc puthash)))
|
||||
(require 'cl)
|
||||
(unless (fboundp 'puthash)
|
||||
;; alias puthash is missing from Emacs 20 cl-extra.el
|
||||
(defalias 'puthash 'cl-puthash)))
|
||||
|
||||
(eval-when-compile
|
||||
(unless (fboundp 'with-syntax-table)
|
||||
;; Imported from Emacs 21.2
|
||||
(defmacro with-syntax-table (table &rest body) "\
|
||||
Evaluate BODY with syntax table of current buffer set to a copy of TABLE.
|
||||
The syntax table of the current buffer is saved, BODY is evaluated, and the
|
||||
saved table is restored, even in case of an abnormal exit.
|
||||
Value is what BODY returns."
|
||||
(let ((old-table (make-symbol "table"))
|
||||
(old-buffer (make-symbol "buffer")))
|
||||
`(let ((,old-table (syntax-table))
|
||||
(,old-buffer (current-buffer)))
|
||||
(unwind-protect
|
||||
(progn
|
||||
(set-syntax-table (copy-syntax-table ,table))
|
||||
,@body)
|
||||
(save-current-buffer
|
||||
(set-buffer ,old-buffer)
|
||||
(set-syntax-table ,old-table))))))))
|
||||
(defvar spam-stat-coding-system
|
||||
(if (mm-coding-system-p 'emacs-mule) 'emacs-mule 'raw-text)
|
||||
"Coding system used for `spam-stat-file'.")
|
||||
|
||||
;; Hooking into Gnus
|
||||
|
||||
(defun spam-stat-store-current-buffer ()
|
||||
"Store a copy of the current buffer in `spam-stat-buffer'."
|
||||
(save-excursion
|
||||
(let ((str (buffer-string)))
|
||||
(set-buffer (get-buffer-create spam-stat-buffer-name))
|
||||
(let ((buf (current-buffer)))
|
||||
(with-current-buffer (get-buffer-create spam-stat-buffer-name)
|
||||
(erase-buffer)
|
||||
(insert str)
|
||||
(insert-buffer-substring buf)
|
||||
(setq spam-stat-buffer (current-buffer)))))
|
||||
|
||||
(defun spam-stat-store-gnus-article-buffer ()
|
||||
"Store a copy of the current article in `spam-stat-buffer'.
|
||||
This uses `gnus-article-buffer'."
|
||||
(save-excursion
|
||||
(set-buffer gnus-original-article-buffer)
|
||||
(with-current-buffer gnus-original-article-buffer
|
||||
(spam-stat-store-current-buffer)))
|
||||
|
||||
;; Data -- not using defstruct in order to save space and time
|
||||
|
|
@ -259,6 +268,9 @@ Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good',
|
|||
(defvar spam-stat-nbad 0
|
||||
"The number of bad mails in the dictionary.")
|
||||
|
||||
(defvar spam-stat-error-holder nil
|
||||
"A holder for condition-case errors while scoring buffers.")
|
||||
|
||||
(defsubst spam-stat-good (entry)
|
||||
"Return the number of times this word belongs to good mails."
|
||||
(aref entry 0))
|
||||
|
|
@ -313,7 +325,7 @@ Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good',
|
|||
;; Parsing
|
||||
|
||||
(defmacro with-spam-stat-max-buffer-size (&rest body)
|
||||
"Narrows the buffer down to the first 4k characters, then evaluates BODY."
|
||||
"Narrow the buffer down to the first 4k characters, then evaluate BODY."
|
||||
`(save-restriction
|
||||
(when (> (- (point-max)
|
||||
(point-min))
|
||||
|
|
@ -324,6 +336,7 @@ Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good',
|
|||
|
||||
(defun spam-stat-buffer-words ()
|
||||
"Return a hash table of words and number of occurrences in the buffer."
|
||||
(run-hooks 'spam-stat-washing-hook)
|
||||
(with-spam-stat-max-buffer-size
|
||||
(with-syntax-table spam-stat-syntax-table
|
||||
(goto-char (point-min))
|
||||
|
|
@ -372,7 +385,7 @@ Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good',
|
|||
(lambda (word count)
|
||||
(let ((entry (gethash word spam-stat)))
|
||||
(if (not entry)
|
||||
(error "This buffer has unknown words in it")
|
||||
(gnus-message 8 "This buffer has unknown words in it")
|
||||
(spam-stat-set-good entry (- (spam-stat-good entry) count))
|
||||
(spam-stat-set-bad entry (+ (spam-stat-bad entry) count))
|
||||
(spam-stat-set-score entry (spam-stat-compute-score entry))
|
||||
|
|
@ -388,7 +401,7 @@ Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good',
|
|||
(lambda (word count)
|
||||
(let ((entry (gethash word spam-stat)))
|
||||
(if (not entry)
|
||||
(error "This buffer has unknown words in it")
|
||||
(gnus-message 8 "This buffer has unknown words in it")
|
||||
(spam-stat-set-good entry (+ (spam-stat-good entry) count))
|
||||
(spam-stat-set-bad entry (- (spam-stat-bad entry) count))
|
||||
(spam-stat-set-score entry (spam-stat-compute-score entry))
|
||||
|
|
@ -403,28 +416,38 @@ Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good',
|
|||
With a prefix argument save unconditionally."
|
||||
(interactive "P")
|
||||
(when (or force spam-stat-dirty)
|
||||
(with-temp-buffer
|
||||
(let ((standard-output (current-buffer))
|
||||
(font-lock-maximum-size 0))
|
||||
(insert "(setq spam-stat-ngood "
|
||||
(number-to-string spam-stat-ngood)
|
||||
" spam-stat-nbad "
|
||||
(number-to-string spam-stat-nbad)
|
||||
" spam-stat (spam-stat-to-hash-table '(")
|
||||
(maphash (lambda (word entry)
|
||||
(prin1 (list word
|
||||
(spam-stat-good entry)
|
||||
(spam-stat-bad entry))))
|
||||
spam-stat)
|
||||
(insert ")))")
|
||||
(write-file spam-stat-file)))
|
||||
(setq spam-stat-dirty nil)))
|
||||
(let ((coding-system-for-write spam-stat-coding-system))
|
||||
(with-temp-file spam-stat-file
|
||||
(let ((standard-output (current-buffer))
|
||||
(font-lock-maximum-size 0))
|
||||
(insert (format ";-*- coding: %s; -*-\n" spam-stat-coding-system))
|
||||
(insert (format "(setq spam-stat-ngood %d spam-stat-nbad %d
|
||||
spam-stat (spam-stat-to-hash-table '(" spam-stat-ngood spam-stat-nbad))
|
||||
(maphash (lambda (word entry)
|
||||
(prin1 (list word
|
||||
(spam-stat-good entry)
|
||||
(spam-stat-bad entry))))
|
||||
spam-stat)
|
||||
(insert ")))"))))
|
||||
(message "Saved %s." spam-stat-file)
|
||||
(setq spam-stat-dirty nil
|
||||
spam-stat-last-saved-at (nth 5 (file-attributes spam-stat-file)))))
|
||||
|
||||
(defun spam-stat-load ()
|
||||
"Read the `spam-stat' hash table from disk."
|
||||
;; TODO: maybe we should warn the user if spam-stat-dirty is t?
|
||||
(load-file spam-stat-file)
|
||||
(setq spam-stat-dirty nil))
|
||||
(let ((coding-system-for-read spam-stat-coding-system))
|
||||
(cond (spam-stat-dirty (message "Spam stat not loaded: spam-stat-dirty t"))
|
||||
((or (not (boundp 'spam-stat-last-saved-at))
|
||||
(null spam-stat-last-saved-at)
|
||||
(not (equal spam-stat-last-saved-at
|
||||
(nth 5 (file-attributes spam-stat-file)))))
|
||||
(progn
|
||||
(load-file spam-stat-file)
|
||||
(setq spam-stat-dirty nil
|
||||
spam-stat-last-saved-at
|
||||
(nth 5 (file-attributes spam-stat-file)))))
|
||||
(t (message "Spam stat file not loaded: no change in disk..")))))
|
||||
|
||||
(defun spam-stat-to-hash-table (entries)
|
||||
"Turn list ENTRIES into a hash table and store as `spam-stat'.
|
||||
|
|
@ -433,7 +456,8 @@ the word string, NGOOD is the number of good mails it has appeared in,
|
|||
NBAD is the number of bad mails it has appeared in, GOOD is the number
|
||||
of times it appeared in good mails, and BAD is the number of times it
|
||||
has appeared in bad mails."
|
||||
(let ((table (make-hash-table :test 'equal)))
|
||||
(let ((table (make-hash-table :size (length entries)
|
||||
:test 'equal)))
|
||||
(mapc (lambda (l)
|
||||
(puthash (car l)
|
||||
(spam-stat-make-entry (nth 1 l) (nth 2 l))
|
||||
|
|
@ -466,46 +490,73 @@ The default score for unknown words is stored in
|
|||
These are the words whose spam-stat differs the most from 0.5.
|
||||
The list returned contains elements of the form \(WORD SCORE DIFF),
|
||||
where DIFF is the difference between SCORE and 0.5."
|
||||
(with-spam-stat-max-buffer-size
|
||||
(with-syntax-table spam-stat-syntax-table
|
||||
(let (result word score)
|
||||
(maphash (lambda (word ignore)
|
||||
(setq score (spam-stat-score-word word)
|
||||
result (cons (list word score (abs (- score 0.5)))
|
||||
result)))
|
||||
(spam-stat-buffer-words))
|
||||
(setq result (sort result (lambda (a b) (< (nth 2 b) (nth 2 a)))))
|
||||
(setcdr (nthcdr 14 result) nil)
|
||||
result))))
|
||||
(let (result word score)
|
||||
(maphash (lambda (word ignore)
|
||||
(setq score (spam-stat-score-word word)
|
||||
result (cons (list word score (abs (- score 0.5)))
|
||||
result)))
|
||||
(spam-stat-buffer-words))
|
||||
(setq result (sort result (lambda (a b) (< (nth 2 b) (nth 2 a)))))
|
||||
(setcdr (nthcdr 14 result) nil)
|
||||
result))
|
||||
|
||||
(defun spam-stat-score-buffer ()
|
||||
"Return a score describing the spam-probability for this buffer."
|
||||
"Return a score describing the spam-probability for this buffer.
|
||||
Add user supplied modifications if supplied."
|
||||
(interactive) ; helps in debugging.
|
||||
(setq spam-stat-score-data (spam-stat-buffer-words-with-scores))
|
||||
(let* ((probs (mapcar (lambda (e) (cadr e)) spam-stat-score-data))
|
||||
(prod (apply #'* probs)))
|
||||
(/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x))
|
||||
probs))))))
|
||||
(let* ((probs (mapcar 'cadr spam-stat-score-data))
|
||||
(prod (apply #'* probs))
|
||||
(score0
|
||||
(/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x))
|
||||
probs)))))
|
||||
(score1s
|
||||
(condition-case
|
||||
spam-stat-error-holder
|
||||
(spam-stat-score-buffer-user score0)
|
||||
(error nil)))
|
||||
(ans
|
||||
(if score1s (+ score0 score1s) score0)))
|
||||
(when (interactive-p)
|
||||
(message "%S" ans))
|
||||
ans))
|
||||
|
||||
(defun spam-stat-score-buffer-user (&rest args)
|
||||
(let* ((scores
|
||||
(mapcar
|
||||
(lambda (fn)
|
||||
(apply fn args))
|
||||
spam-stat-score-buffer-user-functions)))
|
||||
(if (memq nil scores) nil
|
||||
(apply #'+ scores))))
|
||||
|
||||
(defun spam-stat-split-fancy ()
|
||||
"Return the name of the spam group if the current mail is spam.
|
||||
Use this function on `nnmail-split-fancy'. If you are interested in
|
||||
the raw data used for the last run of `spam-stat-score-buffer',
|
||||
check the variable `spam-stat-score-data'."
|
||||
(condition-case var
|
||||
(condition-case spam-stat-error-holder
|
||||
(progn
|
||||
(set-buffer spam-stat-buffer)
|
||||
(goto-char (point-min))
|
||||
(when (> (spam-stat-score-buffer) spam-stat-split-fancy-spam-threshhold)
|
||||
(when (> (spam-stat-score-buffer) spam-stat-split-fancy-spam-threshold)
|
||||
(when (boundp 'nnmail-split-trace)
|
||||
(mapc (lambda (entry)
|
||||
(push entry nnmail-split-trace))
|
||||
spam-stat-score-data))
|
||||
spam-stat-split-fancy-spam-group))
|
||||
(error (message "Error in spam-stat-split-fancy: %S" var)
|
||||
(error (message "Error in spam-stat-split-fancy: %S" spam-stat-error-holder)
|
||||
nil)))
|
||||
|
||||
;; Testing
|
||||
|
||||
(defun spam-stat-strip-xref ()
|
||||
"Strip the the Xref header."
|
||||
(save-restriction
|
||||
(mail-narrow-to-head)
|
||||
(when (re-search-forward "^Xref:.*\n" nil t)
|
||||
(delete-region (match-beginning 0) (match-end 0)))))
|
||||
|
||||
(defun spam-stat-process-directory (dir func)
|
||||
"Process all the regular files in directory DIR using function FUNC."
|
||||
(let* ((files (directory-files dir t "^[^.]"))
|
||||
|
|
@ -515,10 +566,13 @@ check the variable `spam-stat-score-data'."
|
|||
(dolist (f files)
|
||||
(when (and (file-readable-p f)
|
||||
(file-regular-p f)
|
||||
(> (nth 7 (file-attributes f)) 0))
|
||||
(> (nth 7 (file-attributes f)) 0)
|
||||
(< (time-to-number-of-days (time-since (nth 5 (file-attributes f))))
|
||||
spam-stat-process-directory-age))
|
||||
(setq count (1+ count))
|
||||
(message "Reading %s: %.2f%%" dir (/ count max))
|
||||
(insert-file-contents f)
|
||||
(insert-file-contents-literally f)
|
||||
(spam-stat-strip-xref)
|
||||
(funcall func)
|
||||
(erase-buffer))))))
|
||||
|
||||
|
|
@ -537,13 +591,19 @@ check the variable `spam-stat-score-data'."
|
|||
(interactive)
|
||||
(hash-table-count spam-stat))
|
||||
|
||||
(defun spam-stat-test-directory (dir)
|
||||
(defun spam-stat-test-directory (dir &optional verbose)
|
||||
"Test all the regular files in directory DIR for spam.
|
||||
If the result is 1.0, then all files are considered spam.
|
||||
If the result is 0.0, non of the files is considered spam.
|
||||
You can use this to determine error rates."
|
||||
(interactive "D")
|
||||
You can use this to determine error rates.
|
||||
|
||||
If VERBOSE is non-nil display names of files detected as spam or
|
||||
non-spam in a temporary buffer. If it is the symbol `ham',
|
||||
display non-spam files; otherwise display spam files."
|
||||
(interactive "DDirectory: ")
|
||||
(let* ((files (directory-files dir t "^[^.]"))
|
||||
display-files
|
||||
buffer-score
|
||||
(total (length files))
|
||||
(score 0.0); float
|
||||
(max (/ total 100.0)); float
|
||||
|
|
@ -554,12 +614,22 @@ You can use this to determine error rates."
|
|||
(file-regular-p f)
|
||||
(> (nth 7 (file-attributes f)) 0))
|
||||
(setq count (1+ count))
|
||||
(message "Reading %.2f%%, score %.2f%%"
|
||||
(/ count max) (/ score count))
|
||||
(insert-file-contents f)
|
||||
(when (> (spam-stat-score-buffer) 0.9)
|
||||
(message "Reading %.2f%%, score %.2f"
|
||||
(/ count max) (/ score count))
|
||||
(insert-file-contents-literally f)
|
||||
(setq buffer-score (spam-stat-score-buffer))
|
||||
(when (> buffer-score 0.9)
|
||||
(setq score (1+ score)))
|
||||
(when verbose
|
||||
(if (> buffer-score 0.9)
|
||||
(unless (eq verbose 'ham) (push f display-files))
|
||||
(when (eq verbose 'ham) (push f display-files))))
|
||||
(erase-buffer))))
|
||||
(when display-files
|
||||
(with-output-to-temp-buffer "*spam-stat results*"
|
||||
(dolist (file display-files)
|
||||
(princ file)
|
||||
(terpri))))
|
||||
(message "Final score: %d / %d = %f" score total (/ score total))))
|
||||
|
||||
;; Shrinking the dictionary
|
||||
|
|
@ -579,7 +649,7 @@ COUNT defaults to 5"
|
|||
(setq spam-stat-dirty t))
|
||||
|
||||
(defun spam-stat-install-hooks-function ()
|
||||
"Install the spam-stat function hooks"
|
||||
"Install the spam-stat function hooks."
|
||||
(interactive)
|
||||
(add-hook 'nnmail-prepare-incoming-message-hook
|
||||
'spam-stat-store-current-buffer)
|
||||
|
|
@ -590,7 +660,7 @@ COUNT defaults to 5"
|
|||
(spam-stat-install-hooks-function))
|
||||
|
||||
(defun spam-stat-unload-hook ()
|
||||
"Uninstall the spam-stat function hooks"
|
||||
"Uninstall the spam-stat function hooks."
|
||||
(interactive)
|
||||
(remove-hook 'nnmail-prepare-incoming-message-hook
|
||||
'spam-stat-store-current-buffer)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue