NX實現機制淺析
0x00前言
- GNU Binutils Version:2.34
- Kernel Version:4.15.0
- Debugging Env:Ubuntu 20.04.02 x64
是否開啟NX取決于參數-z設置,而gcc僅僅是將-z keyword傳遞給linker——ld,并不會真正解析該參數:

可以使用-###參數(該參數并不會執行任何命令,僅僅是打印命令及參數。若要執行命令需使用-v參數)查看這一過程:
-v
Print (on standard error output) the commands executed to run the stages of compilation. Also print the version number of the compiler driver program and of the preprocessor and the compiler proper.
-###
Like -v except the commands are not executed and arguments are quoted unless they contain only alphanumeric characters or ./-_. This is useful for shell scripts to capture the driver-generated command lines.
上圖中collect2會執行ld,并為其傳遞參數:


0x01 ld
查看下-z參數含義:

下面從源碼角度來分析。ld相關源碼位于ld目錄下,main函數(位于ldmain.c文件)如下:
ld_config_type config;
......
struct bfd_link_info link_info;
......
int
main (
int argc,
char **argv)
{
......
config.build_constructors =
TRUE;
config.rpath_separator =
':';
config.split_by_reloc = (
unsigned)
-1;
config.split_by_file = (bfd_size_type)
-1;
config.make_executable =
TRUE;
config.magic_demand_paged =
TRUE;
config.text_read_only =
TRUE;
config.print_map_discarded =
TRUE;
link_info.disable_target_specific_optimizations =
-1;
command_line.warn_mismatch =
TRUE;
command_line.warn_search_mismatch =
TRUE;
command_line.check_section_addresses =
-1;
/* We initialize DEMANGLING based on the environment variable
COLLECT_NO_DEMANGLE. The gcc collect2 program will demangle the
output of the linker, unless COLLECT_NO_DEMANGLE is set in the
environment. Acting the same way here lets us provide the same
interface by default. */
demangling = getenv (
"COLLECT_NO_DEMANGLE") ==
NULL;
link_info.allow_undefined_version =
TRUE;
......
parse_args (argc, argv);
parse_args函數負責解析參數,位于lexsup.c文件:
void
parse_args (unsigned argc,
char **argv)
{
......
char *shortopts;
struct option *longopts;
struct option *really_longopts;
......
shortopts = (
char *) xmalloc (OPTION_COUNT *
3 +
2);
longopts = (
struct option *)
xmalloc (
sizeof (*longopts) * (OPTION_COUNT +
1));
really_longopts = (
struct option *)
malloc (
sizeof (*really_longopts) * (OPTION_COUNT +
1));
/* Starting the short option string with '-' is for programs that
expect options and other ARGV-elements in any order and that care about
the ordering of the two. We describe each non-option ARGV-element
as if it were the argument of an option with character code 1. */
shortopts[
0] =
'-';
is =
1;
il =
0;
irl =
0;
for (i =
0; i < OPTION_COUNT; i++)
{
if (ld_options[i].shortopt !=
'\0')
{
shortopts[
is] = ld_options[i].shortopt;
++
is;
if (ld_options[i].opt.has_arg == required_argument
|| ld_options[i].opt.has_arg == optional_argument)
{
shortopts[
is] =
':';
++
is;
if (ld_options[i].opt.has_arg == optional_argument)
{
shortopts[
is] =
':';
++
is;
}
}
}
if (ld_options[i].opt.name != NULL)
{
if (ld_options[i].control == EXACTLY_TWO_DASHES)
{
really_longopts[irl] = ld_options[i].opt;
++irl;
}
else
{
longopts[il] = ld_options[i].opt;
++il;
}
}
}
shortopts[
is] =
'\0';
longopts[il].name = NULL;
really_longopts[irl].name = NULL;
......
parse_args函數首先會由ld_options數組初始化shortopts,longopts以及really_longopts變量內容,這三個變量會傳遞給getopt_long_only與getopt_long函數以解析命令行參數:
......
last_optind =
-1;
while (
1)
{
int longind;
int optc;
static
unsigned
int defsym_count;
/* Using last_optind lets us avoid calling ldemul_parse_args
multiple times on a single option, which would lead to
confusion in the internal static variables maintained by
getopt. This could otherwise happen for an argument like
-nx, in which the -n is parsed as a single option, and we
loop around to pick up the -x. */
if (optind != last_optind)
if (ldemul_parse_args (argc, argv))
continue;
/* getopt_long_only is like getopt_long, but '-' as well as '--'
can indicate a long option. */
opterr =
0;
last_optind = optind;
optc = getopt_long_only (argc, argv, shortopts, longopts, &longind);
if (optc ==
'?')
{
optind = last_optind;
optc = getopt_long (argc, argv,
"-", really_longopts, &longind);
}
if (ldemul_handle_option (optc))
continue;
if (optc ==
-1)
break;
......
ldemul_handle_option函數(位于ldemul.c文件)定義如下:
bfd_boolean
ldemul_handle_option (int optc)
{
if (ld_emulation->handle_option)
return (*ld_emulation->handle_option) (optc);
return
FALSE;
}
ld_emulation定義為static ld_emulation_xfer_type *ld_emulation;,而ld_emulation_xfer_type結構針對不同架構及目標文件類型,其成員會有不同定義。以ELF文件為例,該函數定義位于elf.em文件中(由下圖可以看到同目錄下針對其他架構及目標文件類型的.em文件):

static bfd_boolean
gld${EMULATION_NAME}_handle_option (
int optc)
{
switch (optc)
{
default:
return
FALSE;
......
case
'z':
......
else
if (strcmp (optarg,
"execstack") ==
0)
{
link_info.execstack =
TRUE;
link_info.noexecstack =
FALSE;
}
else
if (strcmp (optarg,
"noexecstack") ==
0)
{
link_info.noexecstack =
TRUE;
link_info.execstack =
FALSE;
}
上述函數調用關系為:

之后main函數調用lang_process(),位于ldlang.c文件中。lang_process()函數會調用ldemul_before_allocation(),該函數同樣位于ldemul.c文件,其定義如下:
void
ldemul_before_allocation (
void)
{
ld_emulation->before_allocation ();
}
查看elf.em:
static void
gld
${EMULATION_NAME}_before_allocation (void)
{
ldelf_before_allocation (audit, depaudit,
${ELF_INTERPRETER_NAME});
}
ldelf_before_allocation函數定義位于 ldelf.c文件中,其調用 bfd_elf_size_dynamic_sections函數:
......
if (! (bfd_elf_size_dynamic_sections
(link_info.output_bfd, command_line.soname, rpath,
command_line.filter_shlib, audit, depaudit,
(
const
char *
const *) command_line.auxiliary_filters,
&link_info, &sinterp)))
einfo (_(
"%F%P: failed to set dynamic section sizes: %E\n"));
......
而該函數會執行如下操作:
......
if (info->execstack)
elf_stack_flags (output_bfd) = PF_R | PF_W | PF_X;
else
if (info->noexecstack)
elf_stack_flags (output_bfd) = PF_R | PF_W;
......
上述函數調用關系為:

main函數最終執行ldwrite()函數將stack_flags寫入Segment的p_flags字段中:
void
ldwrite (
void)
{
/* Reset error indicator, which can typically something like invalid
format from opening up the .o files. */
bfd_set_error (bfd_error_no_error);
lang_clear_os_map ();
lang_for_each_statement (build_link_order);
if (config.split_by_reloc != (
unsigned)
-1
|| config.split_by_file != (bfd_size_type)
-1)
split_sections (link_info.output_bfd, &link_info);
if (!bfd_final_link (link_info.output_bfd, &link_info))
{
/* If there was an error recorded, print it out. Otherwise assume
an appropriate error message like unknown symbol was printed
out. */
if (bfd_get_error () != bfd_error_no_error)
einfo (_(
"%F%P: final link failed: %E\n"));
else
xexit (
1);
}
}
調用的bfd_final_link函數針對不同目標文件會有不同實現,以ELF文件為例,該函數對應實現為bfd_elf_final_link,位于elflink.c文件。其調用_bfd_elf_compute_section_file_positions函數,該函數再調用assign_file_positions_except_relocs函數:
/* Work out the file positions of all the sections. This is called by
_bfd_elf_compute_section_file_positions. All the section sizes and
VMAs must be known before this is called.
Reloc sections come in two flavours: Those processed specially as
"side-channel" data attached to a section to which they apply, and those that
bfd doesn't process as relocations. The latter sort are stored in a normal
bfd section by bfd_section_from_shdr. We don't consider the former sort
here, unless they form part of the loadable image. Reloc sections not
assigned here (and compressed debugging sections and CTF sections which
nothing else in the file can rely upon) will be handled later by
assign_file_positions_for_relocs.
We also don't set the positions of the .symtab and .strtab here. */
static bfd_boolean
assign_file_positions_except_relocs (bfd *abfd,
struct bfd_link_info *link_info)
{
struct elf_obj_tdata *tdata = elf_tdata (abfd);
Elf_Internal_Ehdr *i_ehdrp = elf_elfheader (abfd);
const struct elf_backend_data *bed = get_elf_backend_data (abfd);
unsigned int alloc;
if ((abfd->flags & (EXEC_P | DYNAMIC)) ==
0
&& bfd_get_format (abfd) != bfd_core)
{
......
}
else
{
/* Assign file positions for the loaded sections based on the
assignment of sections to segments. */
if (!assign_file_positions_for_load_sections (abfd, link_info))
return
FALSE;
/* And for non-load sections. */
if (!assign_file_positions_for_non_load_sections (abfd, link_info))
return
FALSE;
}
if (!(*bed->elf_backend_modify_headers) (abfd, link_info))
return
FALSE;
/* Write out the program headers. */
alloc = i_ehdrp->e_phnum;
if (alloc !=
0)
{
if (bfd_seek (abfd, i_ehdrp->e_phoff, SEEK_SET) !=
0
|| bed->s->write_out_phdrs (abfd, tdata->phdr, alloc) !=
0)
return
FALSE;
}
return
TRUE;
}
assign_file_positions_for_load_sections函數調用_bfd_elf_map_sections_to_segments將Sections映射到Segments:
static bfd_boolean
assign_file_positions_for_load_sections (bfd *abfd,
struct bfd_link_info *link_info)
{
const
struct
elf_backend_data *
bed =
get_elf_backend_data (
abfd);
struct
elf_segment_map *
m;
struct
elf_segment_map *
phdr_load_seg;
Elf_Internal_Phdr *phdrs;
Elf_Internal_Phdr *p;
file_ptr off;
bfd_size_type maxpagesize;
unsigned
int alloc, actual;
unsigned
int i, j;
struct
elf_segment_map **
sorted_seg_map;
if (link_info ==
NULL
&& !_bfd_elf_map_sections_to_segments (abfd, link_info))
return FALSE;
......
_bfd_elf_map_sections_to_segments負責將stack_flags寫入Segment的p_flags字段:
......
if (elf_stack_flags (abfd))
{
amt = sizeof (struct elf_segment_map);
m = (struct elf_segment_map *) bfd_zalloc (abfd, amt);
if (m ==
NULL)
goto error_return;
m->next =
NULL;
m->p_type = PT_GNU_STACK;
m->p_flags = elf_stack_flags (abfd);
m->p_align = bed->stack_align;
m->p_flags_valid =
1;
m->p_align_valid = m->p_align !=
0;
if (info->stacksize >
0)
{
m->p_size = info->stacksize;
m->p_size_valid =
1;
}
*pm = m;
pm = &m->next;
}
......
變量m為elf_segment_map結構類型,其定義位于internal.h文件:
/* This structure is used to describe how sections should be assigned
to program segments. */
struct
elf_segment_map
{
/* Next program segment. */
struct
elf_segment_map *
next;
/* Program segment type. */
unsigned
long p_type;
/* Program segment flags. */
unsigned
long p_flags;
/* Program segment physical address. */
bfd_vma p_paddr;
/* Program segment virtual address offset from section vma. */
bfd_vma p_vaddr_offset;
/* Program segment alignment. */
bfd_vma p_align;
/* Segment size in file and memory */
bfd_vma p_size;
/* Whether the p_flags field is valid; if not, the flags are based
on the section flags. */
unsigned
int p_flags_valid :
1;
/* Whether the p_paddr field is valid; if not, the physical address
is based on the section lma values. */
unsigned
int p_paddr_valid :
1;
/* Whether the p_align field is valid; if not, PT_LOAD segment
alignment is based on the default maximum page size. */
unsigned
int p_align_valid :
1;
/* Whether the p_size field is valid; if not, the size are based
on the section sizes. */
unsigned
int p_size_valid :
1;
/* Whether this segment includes the file header. */
unsigned
int includes_filehdr :
1;
/* Whether this segment includes the program headers. */
unsigned
int includes_phdrs :
1;
/* Assume this PT_LOAD header has an lma of zero when sorting
headers before assigning file offsets. PT_LOAD headers with this
flag set are placed after one with includes_filehdr set, and
before PT_LOAD headers without this flag set. */
unsigned
int no_sort_lma :
1;
/* Index holding original order before sorting segments. */
unsigned
int idx;
/* Number of sections (may be 0). */
unsigned
int count;
/* Sections. Actual number of elements is in count field. */
asection *sections[
1];
};
上述函數調用關系為:

最終體現在目標文件中:

0x02 kernel
處理程序執行的系統調用為execve:
SYSCALL_DEFINE3(execve,
const
char __user *, filename,
const
char __user *
const __user *, argv,
const
char __user *
const __user *, envp)
{
return do_execve(getname(filename), argv, envp);
}
查看do_execve函數定義
int
do_execve(struct filename *filename,
const
char __user *
const __user *__argv,
const
char __user *
const __user *__envp)
{
struct
user_arg_ptr
argv = { .ptr.native = __argv };
struct
user_arg_ptr
envp = { .ptr.native = __envp };
return do_execveat_common(AT_FDCWD, filename, argv, envp,
0);
}
do_execveat_common函數定義如下:
/*
* sys_execve() executes a new program.
*/
static int do_execveat_common(int fd, struct filename *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
int flags)
{
char *pathbuf =
NULL;
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
int retval;
if (IS_ERR(filename))
return PTR_ERR(filename);
/*
* We move the actual failure in case of RLIMIT_NPROC excess from
* set*uid() to execve() because too many poorly written programs
* don't check setuid() return code. Here we additionally recheck
* whether NPROC limit is still exceeded.
*/
if ((current->flags & PF_NPROC_EXCEEDED) &&
atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) {
retval = -EAGAIN;
goto out_ret;
}
/* We're below the limit (still or again), so we don't want to make
* further execve() calls fail. */
current->flags &= ~PF_NPROC_EXCEEDED;
retval = unshare_files(&displaced);
if (retval)
goto out_ret;
retval = -ENOMEM;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
if (!bprm)
goto out_files;
retval = prepare_bprm_creds(bprm);
if (retval)
goto out_free;
check_unsafe_exec(bprm);
current->in_execve =
1;
file = do_open_execat(fd, filename, flags);
retval = PTR_ERR(file);
if (IS_ERR(file))
goto out_unmark;
sched_exec();
bprm->file = file;
if (fd == AT_FDCWD || filename->name[
0] ==
'/') {
bprm->filename = filename->name;
}
else {
if (filename->name[
0] ==
'\0')
pathbuf = kasprintf(GFP_KERNEL,
"/dev/fd/%d", fd);
else
pathbuf = kasprintf(GFP_KERNEL,
"/dev/fd/%d/%s",
fd, filename->name);
if (!pathbuf) {
retval = -ENOMEM;
goto out_unmark;
}
/*
* Record that a name derived from an O_CLOEXEC fd will be
* inaccessible after exec. Relies on having exclusive access to
* current->files (due to unshare_files above).
*/
if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
bprm->filename = pathbuf;
}
bprm->interp = bprm->filename;
retval = bprm_mm_init(bprm);
if (retval)
goto out_unmark;
bprm->argc = count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) <
0)
goto out;
bprm->envc = count(envp, MAX_ARG_STRINGS);
if ((retval = bprm->envc) <
0)
goto out;
retval = prepare_binprm(bprm);
if (retval <
0)
goto out;
retval = copy_strings_kernel(
1, &bprm->filename, bprm);
if (retval <
0)
goto out;
bprm->exec = bprm->p;
retval = copy_strings(bprm->envc, envp, bprm);
if (retval <
0)
goto out;
retval = copy_strings(bprm->argc, argv, bprm);
if (retval <
0)
goto out;
would_dump(bprm, bprm->file);
retval = exec_binprm(bprm);
if (retval <
0)
goto out;
/* execve succeeded */
current->fs->in_exec =
0;
current->in_execve =
0;
membarrier_execve(current);
acct_update_integrals(current);
task_numa_free(current);
free_bprm(bprm);
kfree(pathbuf);
putname(filename);
if (displaced)
put_files_struct(displaced);
return retval;
out:
bprm變量指向linux_binprm結構,該結構存儲與可執行文件相關的信息,其定義如下:
/*
* This structure is used to hold the arguments that are used when loading binaries.
*/
struct
linux_binprm {
char buf[BINPRM_BUF_SIZE];
//#define BINPRM_BUF_SIZE 128 存儲可執行文件前128字節
#ifdef CONFIG_MMU
struct
vm_area_struct *
vma;
unsigned
long vma_pages;
#else
# define MAX_ARG_PAGES 32
struct
page *
page[
MAX_ARG_PAGES];
#endif
struct
mm_struct *
mm;
unsigned
long p;
/* current top of mem */
unsigned
int
/*
* True after the bprm_set_creds hook has been called once
* (multiple calls can be made via prepare_binprm() for
* binfmt_script/misc).
*/
called_set_creds:
1,
/*
* True if most recent call to the commoncaps bprm_set_creds
* hook (due to multiple prepare_binprm() calls from the
* binfmt_script/misc handlers) resulted in elevated
* privileges.
*/
cap_elevated:
1,
/*
* Set by bprm_set_creds hook to indicate a privilege-gaining
* exec has happened. Used to sanitize execution environment
* and to set AT_SECURE auxv for glibc.
*/
secureexec:
1;
#ifdef __alpha__
unsigned
int taso:
1;
#endif
unsigned
int recursion_depth;
/* only for search_binary_handler() */
struct
file *
file;
struct
cred *
cred;
/* new credentials */
int unsafe;
/* how unsafe this exec is (mask of LSM_UNSAFE_*) */
unsigned
int per_clear;
/* bits to clear in current->personality */
int argc, envc;
const
char * filename;
/* Name of binary as seen by procps */
const
char * interp;
/* Name of the binary really executed. Most
of the time same as filename, but could be
different for binfmt_{misc,script} */
unsigned interp_flags;
unsigned interp_data;
unsigned
long loader, exec;
} __randomize_layout;
do_execveat_common函數會填充bprm變量中內容,之后做一些權限檢查,復制及初始化工作,最后調用exec_binprm函數執行可執行文件:
static
int
exec_binprm(struct linux_binprm *bprm)
{
pid_t old_pid, old_vpid;
int ret;
/* Need to fetch pid before load_binary changes it */
old_pid = current->pid;
rcu_read_lock();
old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
rcu_read_unlock();
ret = search_binary_handler(bprm);
if (ret >=
0) {
audit_bprm(bprm);
trace_sched_process_exec(current, old_pid, bprm);
ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
proc_exec_connector(current);
}
return ret;
}
其調用search_binary_handler函數對formats鏈表進行掃描,并執行其load_binary函數,直到其中一個成功解析了可執行文件格式,否則會返回負值:
/*
* cycle the list of binary formats handler, until one recognizes the image
*/
int search_binary_handler(struct linux_binprm *bprm)
{
bool need_retry = IS_ENABLED(CONFIG_MODULES);
struct linux_binfmt *fmt;
int retval;
/* This allows 4 levels of binfmt rewrites before failing hard. */
if (bprm->recursion_depth >
5)
return -ELOOP;
retval = security_bprm_check(bprm);
if (retval)
return retval;
retval = -ENOENT;
retry:
read_lock(&binfmt_lock);
list_for_each_entry(fmt, &formats, lh) {
if (!try_module_get(fmt->module))
continue;
read_unlock(&binfmt_lock);
bprm->recursion_depth++;
retval = fmt->load_binary(bprm);
read_lock(&binfmt_lock);
put_binfmt(fmt);
bprm->recursion_depth--;
if (retval <
0 && !bprm->mm) {
/* we got to flush_old_exec() and failed after it */
read_unlock(&binfmt_lock);
force_sigsegv(SIGSEGV, current);
return retval;
}
if (retval != -ENOEXEC || !bprm->file) {
read_unlock(&binfmt_lock);
return retval;
}
}
read_unlock(&binfmt_lock);
if (need_retry) {
if (printable(bprm->buf[
0]) && printable(bprm->buf[
1]) &&
printable(bprm->buf[
2]) && printable(bprm->buf[
3]))
return retval;
if (request_module(
"binfmt-%04x", *(ushort *)(bprm->buf +
2)) <
0)
return retval;
need_retry =
false;
goto retry;
}
return retval;
}
EXPORT_SYMBOL(search_binary_handler);
對于ELF文件,load_binary對應于load_elf_binary,其定義位于binfmt_elf.c文件:
static
struct
linux_binfmt
elf_format = {
.
module = THIS_MODULE,
.load_binary = load_elf_binary,
.load_shlib = load_elf_library,
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
};
該函數會執行如下操作:
......
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata =
NULL;
......
int executable_stack = EXSTACK_DEFAULT;
elf_ppnt = elf_phdata;
for (i =
0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
switch (elf_ppnt->p_type) {
case PT_GNU_STACK:
if (elf_ppnt->p_flags & PF_X)
executable_stack = EXSTACK_ENABLE_X;
else
executable_stack = EXSTACK_DISABLE_X;
break;
case PT_LOPROC ... PT_HIPROC:
retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
bprm->file,
false,
&arch_state);
if (retval)
goto out_free_dentry;
break;
}
......
retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
executable_stack);
if (retval <
0)
goto out_free_dentry;
setup_arg_pages函數定義位于exec.c文件中:
/*
* Finalizes the stack vm_area_struct. The flags and permissions are updated,
* the stack is optionally relocated, and some extra space is added.
*/
int
setup_arg_pages(struct linux_binprm *bprm,
unsigned
long stack_top,
int executable_stack)
{
unsigned
long ret;
unsigned
long stack_shift;
struct
mm_struct *
mm =
current->
mm;
struct
vm_area_struct *
vma =
bprm->
vma;
struct
vm_area_struct *
prev =
NULL;
unsigned
long vm_flags;
unsigned
long stack_base;
unsigned
long stack_size;
unsigned
long stack_expand;
unsigned
long rlim_stack;
......
vm_flags = VM_STACK_FLAGS;
/*
* Adjust stack execute permissions; explicitly enable for
* EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
* (arch default) otherwise.
*/
if (unlikely(executable_stack == EXSTACK_ENABLE_X))
vm_flags |= VM_EXEC;
else
if (executable_stack == EXSTACK_DISABLE_X)
vm_flags &= ~VM_EXEC;
vm_flags |= mm->def_flags;
vm_flags |= VM_STACK_INCOMPLETE_SETUP;
ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,vm_flags);
if (ret)
goto out_unlock;
......
mprotect_fixup函數進行檢查過后,會執行如下語句將vm_flags賦值給vma->vm_flags:
int mprotect_fixup(
struct vm_area_struct *vma,
struct vm_area_struct **pprev,
unsigned
long start,
unsigned
long end,
unsigned
long newflags)
{
......
success:
/*
* vm_flags and vm_page_prot are protected by the mmap_sem
* held in write mode.
*/
vma->vm_flags = newflags;
如此一來,bprm->vma中的vm_flags值為newflags,確定了該虛擬地址空間的訪問權限,權限定義位于mm.h文件中:
/* * vm_flags in vm_area_struct, see mm_types.h. * When changing, update also include/trace/events/mmflags.h */ #define VM_NONE 0x00000000 #define VM_READ 0x00000001 /* currently active flags */ #define VM_WRITE 0x00000002 #define VM_EXEC 0x00000004 #define VM_SHARED 0x00000008
上述函數調用關系為:

關于NX在CPU層面的實現,以Intel為例。Intel SDM中描述如下:

只有CPUID.80000001H:EDX.NX [bit 20] = 1,IA32_EFER.NXE才可以置位為1或是0,其支持PAE,4-level,5-level分頁,不支持32位經典分頁:

IA32_EFER.NXE置位為1,XD位才能被設置,否則保留:

內核可以通過noexec on|off來配置是否啟用NX:
static
int disable_nx;
/*
* noexec = on|off
*
* Control non-executable mappings for processes.
*
* on Enable
* off Disable
*/
static
int __init
noexec_setup(
char *str)
{
if (!str)
return -EINVAL;
if (!
strncmp(str,
"on",
2)) {
disable_nx =
0;
}
else
if (!
strncmp(str,
"off",
3)) {
disable_nx =
1;
}
x86_configure_nx();
return
0;
}
early_param(
"noexec", noexec_setup);
x86_configure_nx()函數:
void
x86_configure_nx(
void)
{
if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
__supported_pte_mask |= _PAGE_NX;
else
__supported_pte_mask &= ~_PAGE_NX;
}
其中X86_FEATURE_NX定義如下:
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ #define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ #define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ #define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ #define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ #define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ #define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ #define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ #define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ #define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
_PAGE_NX定義:
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) #define __HAVE_ARCH_PTE_DEVMAP #else #define _PAGE_NX (_AT(pteval_t, 0)) #define _PAGE_DEVMAP (_AT(pteval_t, 0)) #endif
_PAGE_BIT_NX定義語句為#define _PAGE_BIT_NX 63,與Intel SDM中描述一致。__supported_pte_mask會在massage_pgprot函數中使用:
static
inline pgprotval_t
massage_pgprot(
pgprot_t pgprot)
{
pgprotval_t protval = pgprot_val(pgprot);
if (protval & _PAGE_PRESENT)
protval &= __supported_pte_mask;
return protval;
}
static
inline pte_t
pfn_pte(
unsigned
long page_nr,
pgprot_t pgprot)
{
return __pte(((
phys_addr_t)page_nr << PAGE_SHIFT) |
massage_pgprot(pgprot));
}
static
inline pmd_t
pfn_pmd(
unsigned
long page_nr,
pgprot_t pgprot)
{
return __pmd(((
phys_addr_t)page_nr << PAGE_SHIFT) |
massage_pgprot(pgprot));
}
static
inline pud_t
pfn_pud(
unsigned
long page_nr,
pgprot_t pgprot)
{
return __pud(((
phys_addr_t)page_nr << PAGE_SHIFT) |
massage_pgprot(pgprot));
}
0x03 參閱鏈接
- NX(No-eXecute)的實現分析
- Overall Options
- Link-Options
- Collect2
- getopt_long_only(3) – Linux man page