之前看chenx6大佬的博客學習了一下編寫基礎的LLVM Pass,但是那個有很明顯的問題是,作者為了處理Function內部重復引用的多次解密的問題,特判了引用次數,如果存在多處對global string的引用是無法進行混淆的。

但是實際的編程中很難不會引用多處字符串,所以那個只能混淆簡單代碼。我后面根據評論區的說法對此優化了一下,改成Function的EntryBasicBlock處解密,但是過不了llvm-dis,感覺是在alloc棧變量的時候出了問題,暫時不知道怎么解決,后面如果有能力的話再重新寫一遍吧。

之后學習了一下pluto-obfuscator(https://github.com/bluesadi/Pluto-Obfuscator)項目,里面有一份GlobalEncryption.cpp,借此機會學習一下,順便寫一份New PassManager版本的。

pluto-obfuscator的全局變量加密對全局整數和數組變量進行了處理,采用的方式是編譯期加密,運行期解密,就是在IR階段對全局變量加密,然后對每個全局變量調用解密函數并添加到.ctors中,讓程序在運行期進行全局構造的時候解密。

看雪有一個帖子(https://bbs.kanxue.com/thread-263107.htm)總結幾種加密方式總結得比較好,搬運一下。

runOnModule

首先獲取Module的LLVMContext,獲取所有的全局變量,添加到GVs中。

    
INIT_CONTEXT(M);
    vector GVs;
    for (GlobalVariable &GV : M.getGlobalList()) {
        GVs.push_back(&GV);
    }

然后篩選出需要加密的全局變量,ObfuTimes是混淆次數,默認為1。

    
for (int i = 0; i < ObfuTimes; i++) {
        for (GlobalVariable *GV : GVs) {
        // 只對Integer和Array類型進行加密
        if (!GV->getValueType()->isIntegerTy() &&
            !GV->getValueType()->isArrayTy()) {
            continue;
        }
        // 篩出".str"全局變量,LLVM IR的metadata同樣也要保留
        if (GV->hasInitializer() && GV->getInitializer() &&
            (GV->getName().contains(".str") || !OnlyStr)
            // Do not encrypt globals having a section named "llvm.metadata"
            && !GV->getSection().equals("llvm.metadata")) {
            Constant *initializer = GV->getInitializer();
            ConstantInt *intData = dyn_cast(initializer);
            ConstantDataArray *arrData = dyn_cast(initializer);

然后分別處理數組和整數類型的全局變量,在這里直接進行加密,然后調用insertArrayDecryptioninsertIntDecryption將解密函數添加到全局構造函數表中。

            
if (arrData) {
 // 獲取數組的長度和數組元素的大小
 uint32_t eleSize = arrData->getElementByteSize();
 uint32_t eleNum = arrData->getNumElements();
 uint32_t arrLen = eleNum * eleSize;
 char *data = const_cast(arrData->getRawDataValues().data());
 char *dataCopy = new char[arrLen];
 memcpy(dataCopy, data, arrLen);
 // 生成密鑰
 uint64_t key = cryptoutils->get_uint64_t();
 // A simple xor encryption
 for (uint32_t i = 0; i < arrLen; i++) {
  dataCopy[i] ^= ((char *)&key)[i % eleSize];
  }
 GV->setInitializer(ConstantDataArray::getRaw(
   StringRef(dataCopy, arrLen), eleNum, arrData->getElementType()));
  GV->setConstant(false);
 insertArrayDecryption(M, {GV, key, eleNum});
  } else if (intData) {
  uint64_t key = cryptoutils->get_uint64_t();
  ConstantInt *enc =
  CONST(intData->getType(), key ^ intData->getZExtValue());
  GV->setInitializer(enc);
  GV->setConstant(false);
  insertIntDecryption(M, {GV, key, 1LL});
  }

insertArrayDecryption

因為在解密的方式上其實很像,這里只記錄比較復雜的數組解密,這里用IRBuilder構造了一個for循環來實現解密,整數全局變量只要和密鑰異或就行了,原理是一樣的。

首先構造一個函數,返回值是void。

    vector args;
    FunctionType *funcType =
        FunctionType::get(Type::getVoidTy(M.getContext()), args, false);
    string funcName = genHashedName(encGV.GV);
    FunctionCallee callee = M.getOrInsertFunction(funcName, funcType);
    Function *func = cast(callee.getCallee());

然后構造四個BasicBlock,用于實現for循環的幾個階段。

    BasicBlock *entry = BasicBlock::Create(*CONTEXT, "entry", func);
    // for(int i = 0; i < length; i++)
    // 條件判斷
    BasicBlock *forCond = BasicBlock::Create(*CONTEXT, "for.cond", func);
    // 循環體
    BasicBlock *forBody = BasicBlock::Create(*CONTEXT, "for.body", func);
    // i++
    BasicBlock *forInc = BasicBlock::Create(*CONTEXT, "for.inc", func);
    // 結束
    BasicBlock *forEnd = BasicBlock::Create(*CONTEXT, "for.inc", func);

首先獲取func的EntryBasicBlock,然后初始化循環變量。

    
IRBuilder<> builder(*CONTEXT);
    builder.SetInsertPoint(entry);
    AllocaInst *indexPtr = builder.CreateAlloca(TYPE_I32, CONST_I32(1), "i");
    builder.CreateStore(CONST_I32(0), indexPtr);
    builder.CreateBr(forCond);

forCond實現i < length的部分,如果i < length成立則跳轉到循環體,如果不成立則跳出循環。

    builder.SetInsertPoint(forCond);
    LoadInst *index = builder.CreateLoad(TYPE_I32, indexPtr);
    ICmpInst *cond =
        cast(builder.CreateICmpSLT(index, CONST_I32(encGV.len)));
    builder.CreateCondBr(cond, forBody, forEnd);

循環體內就是解密的過程了,其實就是和密鑰的每一位進行異或,最后跳轉到i++。

    builder.SetInsertPoint(forBody);
    Value *indexList[2] = {CONST_I32(0), index};
    Value *ele = builder.CreateGEP(encGV.GV, ArrayRef(indexList, 2));
    ArrayType *arrTy = cast(encGV.GV->getValueType());
    Type *eleTy = arrTy->getElementType();
    Value *encEle =
        builder.CreateXor(builder.CreateLoad(ele), CONST(eleTy, encGV.key));
    builder.CreateStore(encEle, ele);
    builder.CreateBr(forInc);

forInc實現的就是i++了,這里比較簡單,最后跳轉到forCond進行條件判斷,這樣就實現了循環。

    builder.SetInsertPoint(forInc);
    builder.CreateStore(builder.CreateAdd(index, CONST_I32(1)), indexPtr);
    builder.CreateBr(forCond);

forEnd就是返回ret,最后再將函數寫入.ctors中,實現運行期全局構造。

    builder.SetInsertPoint(forEnd);
    builder.CreateRetVoid();
    appendToGlobalCtors(M, func, 0);

以下是我改寫成New PassManager的GlobalsEncryption.cpp,加密部分沒做修改,所有代碼在libObfuscator/tree/pluto-enc(https://github.com/AimiP02/libObfuscator/tree/pluto-enc)。

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include 
#include 
#include 
#include 
#include "CryptoUtils.h"
using namespace llvm;
struct EncryptedGV {
  GlobalVariable *GV;
  uint64_t key;
  uint32_t len;
};
namespace {
static cl::opt
    ObfuTimes("gvobfus-times", cl::init(1),
              cl::desc("Run GlobalsEncryption pass  time(s)"));
static cl::opt OnlyStr("onlystr", cl::init(false),
                             cl::desc("Encrypt string variable only"));
class GVObfuscator : public PassInfoMixin {
public:
  GVObfuscator() {}
  ~GVObfuscator() {}
  LLVMContext *ctx;
  virtual void InsertIntDecryption(Module &M, EncryptedGV encGV);
  virtual void InsertArrayDecryption(Module &M, EncryptedGV encGV);
  virtual PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};
std::string GenHashedName(GlobalVariable *GV) {
  Module &M = *GV->getParent();
  std::string funcName =
      formatv("{0}_{1:x-}", M.getName(), M.getMDKindID(GV->getName()));
  SHA1 sha1;
  sha1.update(funcName);
  StringRef digest = sha1.final();
  std::stringstream ss;
  ss << std::hex;
  for (size_t i = 0; i < digest.size(); i++) {
    ss << std::setw(2) << std::setfill('0') << (unsigned)(digest[i] & 0xFF);
  }
  return ss.str();
}
void GVObfuscator::InsertIntDecryption(Module &M, EncryptedGV encGV) {
  std::vector funcArgs;
  FunctionType *funcType =
      FunctionType::get(Type::getVoidTy(M.getContext()), funcArgs, false);
  std::string funcName = GenHashedName(encGV.GV);
  FunctionCallee callee = M.getOrInsertFunction(funcName, funcType);
  Function *func = cast(callee.getCallee());
  BasicBlock *entry = BasicBlock::Create(*ctx, "entry", func);
  IRBuilder<> builder(*ctx);
  builder.SetInsertPoint(entry);
  LoadInst *val = builder.CreateLoad(encGV.GV);
  Value *xorVal = builder.CreateXor(
      val, ConstantInt::get(encGV.GV->getValueType(), encGV.key));
  builder.CreateStore(xorVal, encGV.GV);
  builder.CreateRetVoid();
  appendToGlobalCtors(M, func, 0);
}
void GVObfuscator::InsertArrayDecryption(Module &M, EncryptedGV encGV) {
  std::vector funcArgs;
  FunctionType *funcType =
      FunctionType::get(Type::getVoidTy(M.getContext()), funcArgs, false);
  std::string funcName = GenHashedName(encGV.GV);
  FunctionCallee callee = M.getOrInsertFunction(funcName, funcType);
  Function *func = cast(callee.getCallee());
  BasicBlock *entry = BasicBlock::Create(*ctx, "entry", func);
  BasicBlock *forCond = BasicBlock::Create(*ctx, "for.cond", func);
  BasicBlock *forBody = BasicBlock::Create(*ctx, "for.body", func);
  BasicBlock *forInc = BasicBlock::Create(*ctx, "for.inc", func);
  BasicBlock *forEnd = BasicBlock::Create(*ctx, "for.inc", func);
  IRBuilder<> builder(*ctx);
  Type *Int32Ty = builder.getInt32Ty();
  builder.SetInsertPoint(entry);
  AllocaInst *indexPtr =
      builder.CreateAlloca(Int32Ty, ConstantInt::get(Int32Ty, 1, false), "i");
  builder.CreateStore(ConstantInt::get(Int32Ty, 0), indexPtr);
  builder.CreateBr(forCond);
  builder.SetInsertPoint(forCond);
  LoadInst *index = builder.CreateLoad(Int32Ty, indexPtr);
  ICmpInst *cond = cast(
      builder.CreateICmpSLT(index, ConstantInt::get(Int32Ty, encGV.len)));
  builder.CreateCondBr(cond, forBody, forEnd);
  builder.SetInsertPoint(forBody);
  Value *indexList[2] = {ConstantInt::get(Int32Ty, 0), index};
  Value *ele = builder.CreateGEP(encGV.GV, ArrayRef(indexList, 2));
  ArrayType *arrTy = cast(encGV.GV->getValueType());
  Type *eleTy = arrTy->getElementType();
  Value *encEle = builder.CreateXor(builder.CreateLoad(ele),
                                    ConstantInt::get(eleTy, encGV.key));
  builder.CreateStore(encEle, ele);
  builder.CreateBr(forInc);
  builder.SetInsertPoint(forInc);
  builder.CreateStore(builder.CreateAdd(index, ConstantInt::get(Int32Ty, 1)),
                      indexPtr);
  builder.CreateBr(forCond);
  builder.SetInsertPoint(forEnd);
  builder.CreateRetVoid();
  appendToGlobalCtors(M, func, 0);
}
PreservedAnalyses GVObfuscator::run(Module &M, ModuleAnalysisManager &MAM) {
  outs() << "Pass start...";
  ctx = &M.getContext();
  std::vector GVs;
  for (auto &GV : M.globals()) {
    GVs.push_back(&GV);
  }
  for (int i = 0; i < ObfuTimes; i++) {
    outs() << "Current ObfuTimes: " << i << "";
    for (auto *GV : GVs) {
      // 只對Integer和Array類型進行加密
      if (!GV->getValueType()->isIntegerTy() &&
          !GV->getValueType()->isArrayTy()) {
        continue;
      }
      // 篩出".str"全局變量,LLVM IR的metadata同樣也要保留
      if (GV->hasInitializer() && GV->getInitializer() &&
          (GV->getName().contains(".str") || !OnlyStr) &&
          !GV->getName().contains("llvm.metadata")) {
        Constant *initializer = GV->getInitializer();
        ConstantInt *intData = dyn_cast(initializer);
        ConstantDataArray *arrayData = dyn_cast(initializer);
        // 處理數組
        if (arrayData) {
          // 獲取數組的長度和數組元素的大小
          outs() << "Get global arraydata";
          uint32_t eleSize = arrayData->getElementByteSize();
          uint32_t eleNum = arrayData->getNumElements();
          uint32_t arrLen = eleNum * eleSize;
          outs() << "Global Variable: " << *GV << ""
                 << "Array Length: " << eleSize << " * " << eleNum << " = "
                 << arrLen << "";
          char *data = const_cast(arrayData->getRawDataValues().data());
          char *dataCopy = new char[arrLen];
          memcpy(dataCopy, data, arrLen);
          // 生成密鑰
          uint64_t key = cryptoutils->get_uint64_t();
          for (uint32_t i = 0; i < arrLen; i++) {
            dataCopy[i] ^= ((char *)&key)[i % eleSize];
          }
          GV->setInitializer(
              ConstantDataArray::getRaw(StringRef(dataCopy, arrLen), eleNum,
                                        arrayData->getElementType()));
          GV->setConstant(false);
          InsertArrayDecryption(M, {GV, key, eleNum});
        }
        // 處理整數
        else if (intData) {
          uint64_t key = cryptoutils->get_uint64_t();
          ConstantInt *enc = ConstantInt::get(intData->getType(),
                                              key ^ intData->getZExtValue());
          GV->setInitializer(enc);
          InsertIntDecryption(M, {GV, key, 1LL});
        }
      }
    }
  }
  outs() << "Pass end...";
  return PreservedAnalyses::all();
}
} // namespace
// 注冊Pass
extern "C" PassPluginLibraryInfo llvmGetPassPluginInfo() {
  return {.APIVersion = LLVM_PLUGIN_API_VERSION,
          .PluginName = "GVObfuscator",
          .PluginVersion = LLVM_VERSION_STRING,
          .RegisterPassBuilderCallbacks = [](PassBuilder &PB) {
            PB.registerPipelineParsingCallback(
                [](StringRef Name, ModulePassManager &MPM,
                   ArrayRef) -> bool {
                  if (Name == "gvobfus") {
                    MPM.addPass(GVObfuscator());
                    return true;
                  }
                  return false;
                });
          }};
}

寫個代碼測試一下。

#include 
int a = 10;
void func(const char *s) {
  puts("!!!The testing string!!!");
  puts(s);
}
int main() {
  puts("This is a testing string!");
  char ch;
  if ((ch = getchar()) == '6') {
    printf("6666%c", ch);
  } else {
    printf("WTF?!");
  }
  func("!!!The testing string!!!");
  return 0;
}

混淆前后的IR對比。

; 混淆前
@a = dso_local global i32 10, align 4
@.str = private unnamed_addr constant [25 x i8] c"!!!The testing string!!!\00", align 1
@.str.1 = private unnamed_addr constant [26 x i8] c"This is a testing string!\00", align 1
@.str.2 = private unnamed_addr constant [8 x i8] c"6666%c\0A\00", align 1
@.str.3 = private unnamed_addr constant [7 x i8] c"WTF?!\0A\00", align 1
; 混淆后
@a = dso_local global i32 -660274230, align 4
@.str = private unnamed_addr global [25 x i8] c"\C5\C5\C5\B0\8C\81\C4\90\81\97\90\8D\8A\83\C4\97\90\96\8D\8A\83\C5\C5\C5\E4", align 1
@.str.1 = private unnamed_addr global [26 x i8] c"\1B'&@.str.2 = private unnamed_addr global [8 x i8] c"\F6\F6\F6\F6\E5\A3\CA\C0", align 1
@.str.3 = private unnamed_addr global [7 x i8] c"ji{\02\1C7=", align 1
@llvm.global_ctors = appending global [5 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @cc2b6b071cb0cb47a4171a4b1d76a06963d6f5e6, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @e176df9cb36840d9378338da84362465dd29b20a, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @"2ec1d2c5cdff4d08047220c5c1ee639ae45deb5a", i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @d7db60557e37f256d7c62e73e03a42051365a247, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @"50c74527ef4457f6934c3f7d6291948f2f509e58", i8* null }]

當然這對動調來說是沒啥用的。

對靜態分析來說,這個強度的加密還是有點弱,加密函數很容易就會被看出來,還可以加點另外的平坦化之類的混淆,對分析加密函數也提高難度。