mirror of
git://git.sv.gnu.org/emacs.git
synced 2025-12-05 22:20:24 -08:00
Better detection of potentially malicious bidi text
* src/bidi.c (bidi_find_first_overridden): Extend to detect more subtle effects of directional formatting controls, to include embeddings and isolates. * src/xdisp.c (Fbidi_find_overridden_directionality): Accept an additional argument BASE-DIR to specify the base direction of the paragraphs. * lisp/international/mule-cmds.el (confusingly-reordered): New face. (reorder-starters, reorder-enders): New variables. (highlight-confusing-reorderings): New command to detect and highlight suspiciously reordered text. * test/src/xdisp-tests.el (xdisp-tests--find-directional-overrides): New test. * etc/NEWS: Announce the new and improved features. * etc/tutorials/TUTORIAL.he: Fix embeddings with incorrect directions.
This commit is contained in:
parent
f875ed53f6
commit
b96855310e
6 changed files with 102 additions and 11 deletions
8
etc/NEWS
8
etc/NEWS
|
|
@ -93,6 +93,14 @@ Image specifiers can now use ':type webp'.
|
|||
For example, an alist entry as '(window-width . (body-columns . 40))'
|
||||
will make the body of the chosen window 40 columns wide.
|
||||
|
||||
** Better detection of text suspiciously reordered on display.
|
||||
The function 'bidi-find-overridden-directionality' has been extended
|
||||
to detect reordering effects produced by embeddings and isolates
|
||||
(started by directional formatting control characters such as RLO and
|
||||
LRI). The new command 'highlight-confusing-reorderings' finds and
|
||||
highlights segments of buffer text whose reordering for display is
|
||||
suspicious and could be malicious.
|
||||
|
||||
|
||||
* Editing Changes in Emacs 29.1
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
שיעור ראשון בשימוש ב־Emacs. זכויות שימוש ראה בסוף המסמך.
|
||||
שיעור ראשון בשימוש ב־Emacs. זכויות שימוש ראה בסוף המסמך.
|
||||
|
||||
פקודות רבות של Emacs משתמשות במקש CONTROL (בדרך־כלל מסומן ב־CTRL)
|
||||
או במקש META (בדרך־כלל מסומן ALT). במקום לציין את כל השמות האפשריים
|
||||
|
|
@ -24,7 +24,7 @@
|
|||
שימו לב לחפיפה של שתי שורות כאשר אתם עוברים ממסך למסך, מה שמבטיח רציפות
|
||||
מסוימת בעת קריאת הטקסט.
|
||||
|
||||
הטקסט שלפניכם הינו עותק של שיעור בשימוש ב־Emacs שהותאם קלות עבורכם.
|
||||
הטקסט שלפניכם הינו עותק של שיעור בשימוש ב־Emacs שהותאם קלות עבורכם.
|
||||
בהמשך תקבלו הוראות לנסות פקודות שונות כדי לבצע שינויים בטקסט הזה. אם
|
||||
במקרה תשנו את הטקסט לפני שנבקש, אל דאגה: זוהי "עריכה" שהיא יעודו של
|
||||
Emacs.
|
||||
|
|
|
|||
|
|
@ -3259,4 +3259,51 @@ as names, not numbers."
|
|||
(define-obsolete-function-alias 'ucs-insert 'insert-char "24.3")
|
||||
(define-key ctl-x-map "8\r" 'insert-char)
|
||||
|
||||
(defface confusingly-reordered
|
||||
'((t :inherit underline :underline (:style wave :color "Red1")))
|
||||
"Face for highlighting text that was bidi-reordered in confusing ways."
|
||||
:version "29.1")
|
||||
|
||||
(defvar reorder-starters "[\u202A\u202B\u202D\u202E\u2066-\u2068]+"
|
||||
"Regular expression for characters that start forced-reordered text.")
|
||||
(defvar reorder-enders "[\u202C\u2069]+\\|\n"
|
||||
"Regular expression for characters that end forced-reordered text.")
|
||||
|
||||
(defun highlight-confusing-reorderings (beg end)
|
||||
"Highlight text in region that might be bidi-reordered in suspicious ways.
|
||||
This command find and highlights segments of buffer text that could have
|
||||
been reordered on display by using directional control characters, such
|
||||
as RLO and LRI, in a way that their display is deliberately meant to
|
||||
confuse the reader. These techniques can be used for obfuscating
|
||||
malicious source code. The suspicious stretches of buffer text are
|
||||
highlighted using the `confusingly-reordered' face.
|
||||
|
||||
If the region is active, check the text inside the region. Otherwise
|
||||
check the entire buffer. When called from Lisp, pass BEG and END to
|
||||
specify the portion of the buffer to check."
|
||||
(interactive
|
||||
(if (use-region-p)
|
||||
(list (region-beginning) (region-end))
|
||||
(list (point-min) (point-max))))
|
||||
(save-excursion
|
||||
(let (next)
|
||||
(goto-char beg)
|
||||
(while (setq next
|
||||
(bidi-find-overridden-directionality
|
||||
(point) end nil
|
||||
(current-bidi-paragraph-direction)))
|
||||
(goto-char next)
|
||||
(let ((start
|
||||
(save-excursion
|
||||
(re-search-backward reorder-starters nil t)))
|
||||
(finish
|
||||
(save-excursion
|
||||
(re-search-forward reorder-enders nil t))))
|
||||
(with-silent-modifications
|
||||
(add-text-properties start (1- finish)
|
||||
'(font-lock-face
|
||||
'confusingly-reordered
|
||||
face 'confusingly-reordered)))
|
||||
(goto-char finish))))))
|
||||
|
||||
;;; mule-cmds.el ends here
|
||||
|
|
|
|||
20
src/bidi.c
20
src/bidi.c
|
|
@ -3564,11 +3564,17 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
|
|||
}
|
||||
|
||||
/* Utility function for looking for strong directional characters
|
||||
whose bidi type was overridden by a directional override. */
|
||||
whose bidi type was overridden by directional override or embedding
|
||||
or isolate control characters. */
|
||||
ptrdiff_t
|
||||
bidi_find_first_overridden (struct bidi_it *bidi_it)
|
||||
{
|
||||
ptrdiff_t found_pos = ZV;
|
||||
/* Maximum bidi levels we allow for L2R and R2L characters. Note
|
||||
that these are levels after resolving explicit embeddings,
|
||||
overrides, and isolates, i.e. before resolving implicit levels. */
|
||||
int max_l2r = bidi_it->paragraph_dir == L2R ? 0 : 2;
|
||||
int max_r2l = 1;
|
||||
|
||||
do
|
||||
{
|
||||
|
|
@ -3577,10 +3583,20 @@ bidi_find_first_overridden (struct bidi_it *bidi_it)
|
|||
former. */
|
||||
bidi_type_t type = bidi_resolve_weak (bidi_it);
|
||||
|
||||
/* Detect strong L or R types that have been overridden by
|
||||
explicit overrides. */
|
||||
if ((type == STRONG_R && bidi_it->orig_type == STRONG_L)
|
||||
|| (type == STRONG_L
|
||||
&& (bidi_it->orig_type == STRONG_R
|
||||
|| bidi_it->orig_type == STRONG_AL)))
|
||||
|| bidi_it->orig_type == STRONG_AL))
|
||||
/* Detect strong L or R types that were pushed into higher
|
||||
embedding levels (and will thus reorder) by explicit
|
||||
embeddings and isolates. */
|
||||
|| (bidi_it->orig_type == STRONG_L
|
||||
&& bidi_it->level_stack[bidi_it->stack_idx].level > max_l2r)
|
||||
|| ((bidi_it->orig_type == STRONG_R
|
||||
|| bidi_it->orig_type == STRONG_AL)
|
||||
&& bidi_it->level_stack[bidi_it->stack_idx].level > max_r2l))
|
||||
found_pos = bidi_it->charpos;
|
||||
} while (found_pos == ZV
|
||||
&& bidi_it->charpos < ZV
|
||||
|
|
|
|||
19
src/xdisp.c
19
src/xdisp.c
|
|
@ -24511,7 +24511,7 @@ See also `bidi-paragraph-direction'. */)
|
|||
|
||||
DEFUN ("bidi-find-overridden-directionality",
|
||||
Fbidi_find_overridden_directionality,
|
||||
Sbidi_find_overridden_directionality, 2, 3, 0,
|
||||
Sbidi_find_overridden_directionality, 3, 4, 0,
|
||||
doc: /* Return position between FROM and TO where directionality was overridden.
|
||||
|
||||
This function returns the first character position in the specified
|
||||
|
|
@ -24530,12 +24530,18 @@ a buffer is preferable when the buffer is displayed in some window,
|
|||
because this function will then be able to correctly account for
|
||||
window-specific overlays, which can affect the results.
|
||||
|
||||
Optional argument BASE-DIR specifies the base paragraph directory
|
||||
of the text. It should be a symbol, either `left-to-right'
|
||||
or `right-to-left', and defaults to `left-to-right'.
|
||||
|
||||
Strong directional characters `L', `R', and `AL' can have their
|
||||
intrinsic directionality overridden by directional override
|
||||
control characters RLO (u+202e) and LRO (u+202d). See the
|
||||
function `get-char-code-property' for a way to inquire about
|
||||
control characters RLO (u+202E) and LRO (u+202D). They can also
|
||||
have their directionality affected by other formatting control
|
||||
characters: LRE (u+202A), RLE (u+202B), LRI (u+2066), and RLI (u+2067).
|
||||
See the function `get-char-code-property' for a way to inquire about
|
||||
the `bidi-class' property of a character. */)
|
||||
(Lisp_Object from, Lisp_Object to, Lisp_Object object)
|
||||
(Lisp_Object from, Lisp_Object to, Lisp_Object object, Lisp_Object base_dir)
|
||||
{
|
||||
struct buffer *buf = current_buffer;
|
||||
struct buffer *old = buf;
|
||||
|
|
@ -24632,10 +24638,9 @@ the `bidi-class' property of a character. */)
|
|||
}
|
||||
|
||||
ptrdiff_t found;
|
||||
bidi_dir_t bdir = EQ (base_dir, Qright_to_left) ? R2L : L2R;
|
||||
do {
|
||||
/* For the purposes of this function, the actual base direction of
|
||||
the paragraph doesn't matter, so just set it to L2R. */
|
||||
bidi_paragraph_init (L2R, &itb, false);
|
||||
bidi_paragraph_init (bdir, &itb, false);
|
||||
while ((found = bidi_find_first_overridden (&itb)) < from_pos)
|
||||
;
|
||||
} while (found == ZV && itb.ch == '\n' && itb.charpos < to_pos);
|
||||
|
|
|
|||
|
|
@ -99,4 +99,19 @@
|
|||
(width-in-chars (/ (car size) char-width)))
|
||||
(should (equal width-in-chars 3)))))
|
||||
|
||||
(ert-deftest xdisp-tests--find-directional-overrides ()
|
||||
(with-temp-buffer
|
||||
(insert "\
|
||||
int main() {
|
||||
bool isAdmin = false;
|
||||
/* }if (isAdmin) begin admins only */
|
||||
printf(\"You are an admin.\\n\");
|
||||
/* end admins only { */
|
||||
return 0;
|
||||
}")
|
||||
(goto-char (point-min))
|
||||
(should (eq (bidi-find-overridden-directionality (point-min) (point-max)
|
||||
nil)
|
||||
46))))
|
||||
|
||||
;;; xdisp-tests.el ends here
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue