Nodejs源码解析之module

来源：互联网发布：淘宝网汉服编辑：程序博客网时间：2024/05/20 04:51

module管理是Nodejs中比较有特色的部分，官方有详细的文档https://nodejs.org/api/modules.html 哪里介绍了一些基本的使用规则，主要的内容如下，

文件和模块之间是一一对应关系：使用方法就是require，后续源码解析中会详细介绍什么是require，以及如何实现的。文件的名字就是一个id，也就是标志符。
如何访问和被确认为主模块：简单的说就是被nodejs启动的模块为主模块，其他都不是，这个可以在源码中说明。
模块的导入：作为单个文件，目录，以及node_module如何导入。这里在源码中也有体现
缓存：被require导入的模块只导入一次，不会有多次，这个是靠cache来实现的，查找的关键字是文件（模块）的文件名（含全路径）。同时需要主要循环依赖问题
module 对象的具体的含义与使用，这个也在源码中有实现

这里主要解析module.js 的全部源码和部分node.js的代码，以说明上述的问题，在review module.js 的源码前，推荐下面两遍博文：
http://stackoverflow.com/questions/9475792/how-does-require-in-node-js-work
http://www.tuicool.com/articles/yE7zAv

module.js的导入

module.js是由node.js在Nodejs程序启动的时候导入的。module.js中使用的require函数是在node.js文件中定义的。具体的代码在node.js中，代码分为两段：

      // 程序启动时候，会利用NativeModule去require我们的module.js函数      // 下面会详细介绍NativeModule      var Module = NativeModule.require('module');      .............(省去)      } else {        // Main entry point into most programs:        // 这个就是文档中所说的主模块问题，也就是利用node app.js启动程序时候，        // app.js就是主模块，也就是module.js里面定义的runMain函数。        Module.runMain();      }

下面详细说明NativeModule的使用

    // 对象构造函数    function NativeModule(id) {    // 文件名，自动加上了js后缀，说明其仅仅解析js的module文件    this.filename = id + '.js';    // 用于记录本模块的标志符    this.id = id;    // 模块的导出对象    this.exports = {};    // 是否导出标志符    this.loaded = false;  }  // 内部的模块， 具体可以查看 process.binding的用法  NativeModule._source = process.binding('natives');  // 用于做NativeModule的内部缓存  NativeModule._cache = {};  // 这个是在module.js中使用的require函数  NativeModule.require = function(id) {    // 特殊处理，如果是native_module，直接返回    if (id == 'native_module') {      return NativeModule;    }     // 查看是否有缓存，    var cached = NativeModule.getCached(id);    if (cached) {     // 如果有，直接返回导出的对象      return cached.exports;    }    // 是否在NativeModule中存在    if (!NativeModule.exists(id)) {      throw new Error('No such native module ' + id);    }    // 这个应该是和C/C++的系统模块交互    process.moduleLoadList.push('NativeModule ' + id);    // 生成一个新的NativeModule对象    var nativeModule = new NativeModule(id);     // 做缓存    nativeModule.cache();    // 编译模块    nativeModule.compile();    // 导出模块的对象    return nativeModule.exports;  };  // 查找是否有缓存  NativeModule.getCached = function(id) {    return NativeModule._cache[id];  }  // 查找id是否存在，从代码上可以看出NativeModule要求在c/c++代码中有体现  NativeModule.exists = function(id) {    return NativeModule._source.hasOwnProperty(id);  }  // 获取source  NativeModule.getSource = function(id) {    return NativeModule._source[id];  }  // 对于script的封装，这个是后续理解module，exports等的关键。任何的require  //一个文件或者模块其实就是将文件里面的内容，封装成一个函数  NativeModule.wrap = function(script) {    return NativeModule.wrapper[0] + script + NativeModule.wrapper[1];  };  // 具体的函数头和尾  NativeModule.wrapper = [    '(function (exports, require, module, __filename, __dirname) { ',    '\n});'  ];   //编译  NativeModule.prototype.compile = function() {    var source = NativeModule.getSource(this.id);    source = NativeModule.wrap(source);    // 这个是javascript虚拟机的功能 后续会有详细介绍    var fn = runInThisContext(source, { filename: this.filename });    // 查看上述的wrap 函数定义，也就是说在module.js中使用的module    // 其实是NativeModule对象    // 使用的require函数，其实是NativeModule.require    fn(this.exports, NativeModule.require, this, this.filename);     // 模块已经导入    this.loaded = true;  };  // 增加cache  NativeModule.prototype.cache = function() {    NativeModule._cache[this.id] = this;  };

由于module模块的内容比较多，分如下重点函数进行源码分析，然后再分析帮助函数

Require函数详解

下面是require函数的源码，也就是我们通常用require(./test)时候，这里需要强调的是，require不是关键字，而是一个函数

// Loads a module at the given file path. Returns that module's// `exports` property.// 导入模块，并且返回模块的exports属性。Module.prototype.require = function(path) {  // 参数检查，确保path不为空  assert(path, 'missing path');  // 参数检查，确保path为string类型  assert(util.isString(path), 'path must be a string');  // 直接调用_load函数返回，注意参数多了一个this，也就是本module的对象  return Module._load(path, this);};// Check the cache for the requested file.// 1. If a module already exists in the cache: return its exports object.// 2. If the module is native: call `NativeModule.require()` with the//    filename and return the result.// 3. Otherwise, create a new module for the file and save it to the cache.//    Then have it load  the file contents before returning its exports//    object.// 上述的介绍翻译。// 对要求的文件检查缓存，// @request就是对应的要导入的file或者目录，// @parent其实是谁导入了该模块，从上述的源码可以看出，其实是this。// @isMain 标志是否为主文件，这里只有从node.js 调用的才是，其他都不是。// 对于缓存，会做下面的事情：// 1. 如果模块以及存在在缓存中，直接返回。// 2. 如果模块是native的，直接调用NativeModule.require()并且返回。// 3. 否则，创建一个新的module对象，保存在缓存中，并且导入文件内容，然后返回exports对象。Module._load = function(request, parent, isMain) {  // 添加log，看谁导入了当前的模块  if (parent) {    debug('Module._load REQUEST  ' + (request) + ' parent: ' + parent.id);  }  //找到当前的需要解析的文件名，以后会详解_resolveFilename  var filename = Module._resolveFilename(request, parent);   // 步骤1：如果已经有的缓存，直接返回缓存的exports  var cachedModule = Module._cache[filename];  if (cachedModule) {    return cachedModule.exports;  }  // 步骤2：如果在自然模块中存在，直接使用自然模块进行解析  if (NativeModule.exists(filename)) {    // REPL is a special case, because it needs the real require.    if (filename == 'repl') {      var replModule = new Module('repl');      replModule._compile(NativeModule.getSource('repl'), 'repl.js');      NativeModule._cache.repl = replModule;      return replModule.exports;    }    debug('load native module ' + request);    return NativeModule.require(filename);  }  // 创建Module对象。  var module = new Module(filename, parent);  // 是否为主模块，  if (isMain) {     // 主模块的话，需要将当前的module赋值给process.mainModule    process.mainModule = module;    // 主模块的id特殊的赋值为"."    module.id = '.';  }  // 将创建的模块cache起来  Module._cache[filename] = module;  // 确保是否有异常  var hadException = true;  try {    // 做真正的导入模块的操作，下面会详解该函数    module.load(filename);    hadException = false;  } finally {     // 如果有异常，直接删除上述的缓存    if (hadException) {      delete Module._cache[filename];    }  }  // 返回新创建模块的exports  return module.exports;};// Given a file name, pass it to the proper extension handler.// 指定一个文件名，导入模块，调用适当扩展处理函数，当前主要是js，json，和nodeModule.prototype.load = function(filename) {  //增加log，当前导入什么文件，id是什么   debug('load ' + JSON.stringify(filename) +        ' for module ' + JSON.stringify(this.id));  // 确保当前模块没有被载入  assert(!this.loaded);  // 赋值当前模块的文件名  this.filename = filename;  // 当前的path  this.paths = Module._nodeModulePaths(path.dirname(filename));  // 当前文件的后缀  var extension = path.extname(filename) || '.js';  // 确认默认的后缀都*.js  if (!Module._extensions[extension]) extension = '.js';  // 根据后缀的解析函数来做解析  Module._extensions[extension](this, filename);  this.loaded = true;};下面是nodejs支持的三种后缀： // Native extension for .js// js后缀的处理Module._extensions['.js'] = function(module, filename) {  //直接同步的读入文件的内容。   var content = fs.readFileSync(filename, 'utf8');  // 然后调用_compile进行编译。下面会分析该函数  module._compile(stripBOM(content), filename);};// Native extension for .json// 对于json文件的处理Module._extensions['.json'] = function(module, filename) {  //直接同步的读入文件的内容。   var content = fs.readFileSync(filename, 'utf8');  try {   // 直接将模块的exports赋值为json文件的内容    module.exports = JSON.parse(stripBOM(content));  } catch (err) {    // 异常处理    err.message = filename + ': ' + err.message;    throw err;  }};//node文件的打开处理，通常为C/C++文件。Module._extensions['.node'] = process.dlopen;下面就分析最后的一个函数_compile:// Run the file contents in the correct scope or sandbox. Expose// the correct helper variables (require, module, exports) to// the file.// Returns exception, if any.// 这个函数会给出require, module, exports等帮助变量给文件// @content 主要是js文件的主要内容// @filename 是js文件的文件名Module.prototype._compile = function(content, filename) {  // self就是一个帮助变量，代表的this  var self = this;  // remove shebang  // 去掉一些注释（shebang）  content = content.replace(/^\#\!.*/, '');   // 其实模块中的require就是这个函数   // 其仅仅是一个对module中的require函数的封装。  function require(path) {    return self.require(path);  }  //resolve 函数，这个会解释文档中的module导入的路径问题，是单个文件，目录还是模块   require.resolve = function(request) {    return Module._resolveFilename(request, self);  };  // 禁止使用require中的paths路径  Object.defineProperty(require, 'paths', { get: function() {    throw new Error('require.paths is removed. Use ' +                    'node_modules folders, or the NODE_PATH ' +                    'environment variable instead.');  }});  //注意require.main就是主模块   require.main = process.mainModule;  // Enable support to add extra extension types  // 将Module._extensions赋值给require  require.extensions = Module._extensions;  require.registerExtension = function() {    throw new Error('require.registerExtension() removed. Use ' +                    'require.extensions instead.');  };  //将缓存也赋值给require  // require一会是函数，一会又像是对象，其实都是对象：）  require.cache = Module._cache;  // 获取当前的文件的路径  var dirname = path.dirname(filename);  // 当NODE_MODULE_CONTEXTS为1的时候才可以调用，也就是说，所有的模块都在一个环境中，无需  // 模块来，这个好像通常情况下不会被执行。  // Module._contextLoad = (+process.env['NODE_MODULE_CONTEXTS'] > 0);  if (Module._contextLoad) {    if (self.id !== '.') {      debug('load submodule');      // not root module      var sandbox = {};      for (var k in global) {        sandbox[k] = global[k];      }      sandbox.require = require;      sandbox.exports = self.exports;      sandbox.__filename = filename;      sandbox.__dirname = dirname;      sandbox.module = self;      sandbox.global = sandbox;      sandbox.root = root;      return runInNewContext(content, sandbox, { filename: filename });    }    debug('load root module');    // root module    global.require = require;    global.exports = self.exports;    global.__filename = filename;    global.__dirname = dirname;    global.module = self;    return runInThisContext(content, { filename: filename });  }  // create wrapper function  // 这里的wrap函数就是node.js中的函数，会将文件中的内容封装成一个函数。  var wrapper = Module.wrap(content);  // 编译内容，返回函数  var compiledWrapper = runInThisContext(wrapper, { filename: filename });  // 处理debug模式，  if (global.v8debug) {    if (!resolvedArgv) {      // we enter the repl if we're not given a filename argument.      if (process.argv[1]) {        resolvedArgv = Module._resolveFilename(process.argv[1], null);      } else {        resolvedArgv = 'repl';      }    }    // Set breakpoint on module start    if (filename === resolvedArgv) {      global.v8debug.Debug.setBreakPoint(compiledWrapper, 0, 0);    }  }  // 直接调用wrapper函数，将module模块中的exports，本函数中的require，   //self也就是新创建的module作为参数传递给模块，进行执行。  // filename, dirname作为参数传递过去  // 这就是为什么我们可以直接在module文件中，直接访问exports, module, require函数的原因  var args = [self.exports, require, self, filename, dirname];  return compiledWrapper.apply(self.exports, args);};

所以，从上面的源码分析中，我们知道了缓存是如何实现的， module中的变量如exports，id，filename是如何能得到访问的。

module路径解析

这里先可以看一下官方文章中的内容：

require(X) from module at path Y1. If X is a core module, // 如果是核心模块，   a. return the core module // 直接返回核心模块，这里的核心模块是指nodejs下lib的内容   b. STOP 返回2. If X begins with './' or '/' or '../' // 如果文件以 "./", "/",或者 "../"形式，   a. LOAD_AS_FILE(Y + X) // 导入一个文件 返回，如何处理导入文件   b. LOAD_AS_DIRECTORY(Y + X) // 导入一个目录，返回 如何导入目录，看后面3. LOAD_NODE_MODULES(X, dirname(Y)) // 导入一个NODE_MODULE，返回。4. THROW "not found" // 上述都没找到，直接排出没找到的异常。LOAD_AS_FILE(X) // 导入一个文件按1. If X is a file, load X as JavaScript text.  STOP  // 如果X是一个文件，作为js文件导入，直接返回。直接停止2. If X.js is a file, load X.js as JavaScript text.  STOP //加上js后缀为一个文件，直接作为js文件导入 直接停止3. If X.json is a file, parse X.json to a JavaScript Object.  STOP//加上json后缀为一个文件，直接作为json文件导入 直接停止4. If X.node is a file, load X.node as binary addon.  STOP//加上node后缀为一个文件，直接作为c/c++ addon文件导入 直接停止LOAD_AS_DIRECTORY(X) 如何导入一个目录的处理方式1. If X/package.json is a file, // 查找X/package.json是否存在   a. Parse X/package.json, and look for "main" field. //查找json file中是否有main   b. let M = X + (json main field) // 生成新的文件   c. LOAD_AS_FILE(M) // 作为文件导入。2. If X/index.js is a file, load X/index.js as JavaScript text.  STOP // 查看是否存在 X/index.js，如果存在，作为js文件导入3. If X/index.json is a file, parse X/index.json to a JavaScript object. STOP // 查看是否存在 X/index.json，如果存在，导入json文件作为js对象4. If X/index.node is a file, load X/index.node as binary addon.  STOP// 查看是否存在 X/index.node，如果存在，导入c/c++的 addon导入LOAD_NODE_MODULES(X, START) // 导入node_module的步骤1. let DIRS=NODE_MODULES_PATHS(START) // 返回一个文件目录内容2. for each DIR in DIRS:       // 对于目录里的内容   a. LOAD_AS_FILE(DIR/X)      // 作为文件导入。 作为文件导入，查看LOAD_AS_FILE   b. LOAD_AS_DIRECTORY(DIR/X) // 或者作为一个目录导入， 查看LOAD_AS_DIRECTORY// 请注意，这里仅仅是说还是作为一个具体的文件或者目录，如果有一个找到了，就需要停止，而不是真的要导入文件文件里面的所有内容NODE_MODULES_PATHS(START) // 具体NODE_MODULES文件目录算法1. let PARTS = path split(START) 2. let I = count of PARTS - 13. let DIRS = []4. while I >= 0,   a. if PARTS[I] = "node_modules" CONTINUE   c. DIR = path join(PARTS[0 .. I] + "node_modules")   b. DIRS = DIRS + DIR   c. let I = I - 15. return DIRS

基本的思想都在文档中做了详细的说明，那么下面分析源代码是如何实现的。废话少说，直接源代码分析：

// 这个函数就是在load的时候调用的，也就是说其负责具体filename的文件查找。// 所以，从这里可以看出，无论怎么处理，require仅仅能导入的是一个文件模块，不能是一个目录，// 有目录的说法，仅仅是因为可以在目录上查找具体的文件。这也就是文档中所说的文件和目录的一一对应关系Module._resolveFilename = function(request, parent) {  //如果是NativeModule中已经存在，直接用，这个其实就是core 模块，文档中的第一种情况  if (NativeModule.exists(request)) {    return request;  }  // 根据文件名称，调用_resolveLookupPaths来查找具体的文件模块  var resolvedModule = Module._resolveLookupPaths(request, parent);  // 上述返回的id  var id = resolvedModule[0];  // 上述返回的具体的目录文件夹  var paths = resolvedModule[1];  // look up the filename first, since that's the cache key.  // 输出具体的id和paths  debug('looking for ' + JSON.stringify(id) +        ' in ' + JSON.stringify(paths));  // 找到具体的文件名称，以返回。  var filename = Module._findPath(request, paths);  // 处理文件找不到的情况  if (!filename) {    var err = new Error("Cannot find module '" + request + "'");    err.code = 'MODULE_NOT_FOUND';    throw err;  }  // 返回具体的文件  return filename;};// 从上面的文档可以看出，在处理核心模块后，其是依靠_resolveLookupPaths和_findPath这两个// 帮助函数来查找具体的文件模块。 // _resolveLookupPaths： 该函数用于查找当前文件“可能的路径”// _findPath： 根据上述的路径,根据优先级选择一个具体的文件，如官方文档中的文件Module._resolveLookupPaths = function(request, parent) {  //依然先检查核心模块   if (NativeModule.exists(request)) {    return [request, []];  }  // 查找request的标志符，如果不以"./" 或者'..'开头  var start = request.substring(0, 2);  if (start !== './' && start !== '..') {    // 这种情况直接返回nodejs系统路径modulePaths，这个是在Module._initPaths 函数中设置的，    // 有兴趣可以自己分析一下，很简单的函数    var paths = modulePaths;    if (parent) {       // 设置一下父亲的路径，其实就是谁导入了当前模块      if (!parent.paths) parent.paths = [];      paths = parent.paths.concat(paths);    }    //直接返回    return [request, paths];  }  // with --eval, parent.id is not set and parent.filename is null  // 处理父亲模块为空的情况，这种情况我认为一般就是主模块  if (!parent || !parent.id || !parent.filename) {    // make require('./path/to/foo') work - normally the path is taken    // from realpath(__filename) but with eval there is no filename    // 生成新的目录， 在系统目录modulePaths，当前目录和"node_modules"作为候选的路径    // 对于node_modules，可以参考_nodeModulePaths函数。    var mainPaths = ['.'].concat(modulePaths);    mainPaths = Module._nodeModulePaths('.').concat(mainPaths);    return [request, mainPaths];  }  // Is the parent an index module?  // We can assume the parent has a valid extension,  // as it already has been accepted as a module.  // 处理父亲模块是否为index模块，  var isIndex = /^index\.\w+?$/.test(path.basename(parent.filename));  var parentIdPath = isIndex ? parent.id : path.dirname(parent.id);  // 返回request中的具体的id  var id = path.resolve(parentIdPath, request);  // make sure require('./path') and require('path') get distinct ids, even  // when called from the toplevel js file  // 确保require('./path') and require('path') 的id不一样。应该会做不同的缓存，所以，我们  if (parentIdPath === '.' && id.indexOf('/') === -1) {    id = './' + id;  }  debug('RELATIVE: requested:' + request +        ' set ID to: ' + id + ' from ' + parent.id);  //返回id和具体的父亲模块的文件夹  //注意： 也就是说，当我们以"./" 等方式require是，都是以当前父模块为对象路径的  return [id, [path.dirname(parent.filename)]];};接下来分析一下：_findPath // 从候选路径中选择Module._findPath = function(request, paths) {  // 等到具体的文件扩展，现在有js,json 和node  var exts = Object.keys(Module._extensions);  //如果以绝对路径开头，直接重置可选路径，所以这个windows下和linux下应该不一样，  //这点应该是对linux来说的，所以，我们最好不要以/开头导入模块  if (request.charAt(0) === '/') {    paths = [''];  }  var trailingSlash = (request.slice(-1) === '/');  // 这里是做文件扫描的cache，防止重复查找  var cacheKey = JSON.stringify({request: request, paths: paths});  if (Module._pathCache[cacheKey]) {    // 如果cache已经有了，直接返回    return Module._pathCache[cacheKey];  }  // For each path  // 从每个候选路径中查找文件  for (var i = 0, PL = paths.length; i < PL; i++) {    var basePath = path.resolve(paths[i], request);    var filename;    if (!trailingSlash) {      // try to join the request to the path      // 尝试一下当前文件是否存在      filename = tryFile(basePath);      // 尝试一下当前文件加上后缀文件是否存在      if (!filename && !trailingSlash) {        // try it with each of the extensions        filename = tryExtensions(basePath, exts);      }    }    // 尝试一下当前的package.json文件是否存在    if (!filename) {      filename = tryPackage(basePath, exts);    }     // 尝试一下index文件加上后缀是否存在    if (!filename) {      // try it with each of the extensions at "index"      filename = tryExtensions(path.resolve(basePath, 'index'), exts);    }    if (filename) {      // 增加到文件缓存中      Module._pathCache[cacheKey] = filename;      return filename;    }  }  return false;};// 所以从这里可以看出，对于具体的文件的优先级：// 1. 具体文件。// 2. 加上后缀。// 3. package.json// 4  index加上后缀// 候选路径以当前文件夹，nodejs系统文件夹和node_module中的文件夹为候选，以上述顺序找到任意一个，// 就直接返回

还有一些帮助函数，如readPackage， tryPackage，tryFile，_initPaths没有做详细的说明。

上述就是module的全部重要的源码说明。

0 0