之前看chenx6大佬的博客學習了一下編寫基礎的LLVM Pass,但是那個有很明顯的問題是,作者為了處理Function內部重復引用的多次解密的問題,特判了引用次數,如果存在多處對global string的引用是無法進行混淆的。
但是實際的編程中很難不會引用多處字符串,所以那個只能混淆簡單代碼。我后面根據評論區的說法對此優化了一下,改成Function的EntryBasicBlock處解密,但是過不了llvm-dis,感覺是在alloc棧變量的時候出了問題,暫時不知道怎么解決,后面如果有能力的話再重新寫一遍吧。
之后學習了一下pluto-obfuscator(https://github.com/bluesadi/Pluto-Obfuscator)項目,里面有一份GlobalEncryption.cpp,借此機會學習一下,順便寫一份New PassManager版本的。
pluto-obfuscator的全局變量加密對全局整數和數組變量進行了處理,采用的方式是編譯期加密,運行期解密,就是在IR階段對全局變量加密,然后對每個全局變量調用解密函數并添加到.ctors中,讓程序在運行期進行全局構造的時候解密。
看雪有一個帖子(https://bbs.kanxue.com/thread-263107.htm)總結幾種加密方式總結得比較好,搬運一下。

runOnModule
首先獲取Module的LLVMContext,獲取所有的全局變量,添加到GVs中。
INIT_CONTEXT(M);
vector GVs;
for (GlobalVariable &GV : M.getGlobalList()) {
GVs.push_back(&GV);
}
然后篩選出需要加密的全局變量,ObfuTimes是混淆次數,默認為1。
for (int i = 0; i < ObfuTimes; i++) {
for (GlobalVariable *GV : GVs) {
// 只對Integer和Array類型進行加密
if (!GV->getValueType()->isIntegerTy() &&
!GV->getValueType()->isArrayTy()) {
continue;
}
// 篩出".str"全局變量,LLVM IR的metadata同樣也要保留
if (GV->hasInitializer() && GV->getInitializer() &&
(GV->getName().contains(".str") || !OnlyStr)
// Do not encrypt globals having a section named "llvm.metadata"
&& !GV->getSection().equals("llvm.metadata")) {
Constant *initializer = GV->getInitializer();
ConstantInt *intData = dyn_cast(initializer);
ConstantDataArray *arrData = dyn_cast(initializer);
然后分別處理數組和整數類型的全局變量,在這里直接進行加密,然后調用insertArrayDecryption和insertIntDecryption將解密函數添加到全局構造函數表中。
if (arrData) {
// 獲取數組的長度和數組元素的大小
uint32_t eleSize = arrData->getElementByteSize();
uint32_t eleNum = arrData->getNumElements();
uint32_t arrLen = eleNum * eleSize;
char *data = const_cast(arrData->getRawDataValues().data());
char *dataCopy = new char[arrLen];
memcpy(dataCopy, data, arrLen);
// 生成密鑰
uint64_t key = cryptoutils->get_uint64_t();
// A simple xor encryption
for (uint32_t i = 0; i < arrLen; i++) {
dataCopy[i] ^= ((char *)&key)[i % eleSize];
}
GV->setInitializer(ConstantDataArray::getRaw(
StringRef(dataCopy, arrLen), eleNum, arrData->getElementType()));
GV->setConstant(false);
insertArrayDecryption(M, {GV, key, eleNum});
} else if (intData) {
uint64_t key = cryptoutils->get_uint64_t();
ConstantInt *enc =
CONST(intData->getType(), key ^ intData->getZExtValue());
GV->setInitializer(enc);
GV->setConstant(false);
insertIntDecryption(M, {GV, key, 1LL});
}
insertArrayDecryption
因為在解密的方式上其實很像,這里只記錄比較復雜的數組解密,這里用IRBuilder構造了一個for循環來實現解密,整數全局變量只要和密鑰異或就行了,原理是一樣的。
首先構造一個函數,返回值是void。
vector args;
FunctionType *funcType =
FunctionType::get(Type::getVoidTy(M.getContext()), args, false);
string funcName = genHashedName(encGV.GV);
FunctionCallee callee = M.getOrInsertFunction(funcName, funcType);
Function *func = cast(callee.getCallee());
然后構造四個BasicBlock,用于實現for循環的幾個階段。
BasicBlock *entry = BasicBlock::Create(*CONTEXT, "entry", func);
// for(int i = 0; i < length; i++)
// 條件判斷
BasicBlock *forCond = BasicBlock::Create(*CONTEXT, "for.cond", func);
// 循環體
BasicBlock *forBody = BasicBlock::Create(*CONTEXT, "for.body", func);
// i++
BasicBlock *forInc = BasicBlock::Create(*CONTEXT, "for.inc", func);
// 結束
BasicBlock *forEnd = BasicBlock::Create(*CONTEXT, "for.inc", func);
首先獲取func的EntryBasicBlock,然后初始化循環變量。
IRBuilder<> builder(*CONTEXT);
builder.SetInsertPoint(entry);
AllocaInst *indexPtr = builder.CreateAlloca(TYPE_I32, CONST_I32(1), "i");
builder.CreateStore(CONST_I32(0), indexPtr);
builder.CreateBr(forCond);
forCond實現i < length的部分,如果i < length成立則跳轉到循環體,如果不成立則跳出循環。
builder.SetInsertPoint(forCond);
LoadInst *index = builder.CreateLoad(TYPE_I32, indexPtr);
ICmpInst *cond =
cast(builder.CreateICmpSLT(index, CONST_I32(encGV.len)));
builder.CreateCondBr(cond, forBody, forEnd);
循環體內就是解密的過程了,其實就是和密鑰的每一位進行異或,最后跳轉到i++。
builder.SetInsertPoint(forBody);
Value *indexList[2] = {CONST_I32(0), index};
Value *ele = builder.CreateGEP(encGV.GV, ArrayRef(indexList, 2));
ArrayType *arrTy = cast(encGV.GV->getValueType());
Type *eleTy = arrTy->getElementType();
Value *encEle =
builder.CreateXor(builder.CreateLoad(ele), CONST(eleTy, encGV.key));
builder.CreateStore(encEle, ele);
builder.CreateBr(forInc);
forInc實現的就是i++了,這里比較簡單,最后跳轉到forCond進行條件判斷,這樣就實現了循環。
builder.SetInsertPoint(forInc);
builder.CreateStore(builder.CreateAdd(index, CONST_I32(1)), indexPtr);
builder.CreateBr(forCond);
forEnd就是返回ret,最后再將函數寫入.ctors中,實現運行期全局構造。
builder.SetInsertPoint(forEnd);
builder.CreateRetVoid();
appendToGlobalCtors(M, func, 0);
以下是我改寫成New PassManager的GlobalsEncryption.cpp,加密部分沒做修改,所有代碼在libObfuscator/tree/pluto-enc(https://github.com/AimiP02/libObfuscator/tree/pluto-enc)。
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include
#include
#include
#include
#include "CryptoUtils.h"
using namespace llvm;
struct EncryptedGV {
GlobalVariable *GV;
uint64_t key;
uint32_t len;
};
namespace {
static cl::opt
ObfuTimes("gvobfus-times", cl::init(1),
cl::desc("Run GlobalsEncryption pass time(s)"));
static cl::opt OnlyStr("onlystr", cl::init(false),
cl::desc("Encrypt string variable only"));
class GVObfuscator : public PassInfoMixin {
public:
GVObfuscator() {}
~GVObfuscator() {}
LLVMContext *ctx;
virtual void InsertIntDecryption(Module &M, EncryptedGV encGV);
virtual void InsertArrayDecryption(Module &M, EncryptedGV encGV);
virtual PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};
std::string GenHashedName(GlobalVariable *GV) {
Module &M = *GV->getParent();
std::string funcName =
formatv("{0}_{1:x-}", M.getName(), M.getMDKindID(GV->getName()));
SHA1 sha1;
sha1.update(funcName);
StringRef digest = sha1.final();
std::stringstream ss;
ss << std::hex;
for (size_t i = 0; i < digest.size(); i++) {
ss << std::setw(2) << std::setfill('0') << (unsigned)(digest[i] & 0xFF);
}
return ss.str();
}
void GVObfuscator::InsertIntDecryption(Module &M, EncryptedGV encGV) {
std::vector funcArgs;
FunctionType *funcType =
FunctionType::get(Type::getVoidTy(M.getContext()), funcArgs, false);
std::string funcName = GenHashedName(encGV.GV);
FunctionCallee callee = M.getOrInsertFunction(funcName, funcType);
Function *func = cast(callee.getCallee());
BasicBlock *entry = BasicBlock::Create(*ctx, "entry", func);
IRBuilder<> builder(*ctx);
builder.SetInsertPoint(entry);
LoadInst *val = builder.CreateLoad(encGV.GV);
Value *xorVal = builder.CreateXor(
val, ConstantInt::get(encGV.GV->getValueType(), encGV.key));
builder.CreateStore(xorVal, encGV.GV);
builder.CreateRetVoid();
appendToGlobalCtors(M, func, 0);
}
void GVObfuscator::InsertArrayDecryption(Module &M, EncryptedGV encGV) {
std::vector funcArgs;
FunctionType *funcType =
FunctionType::get(Type::getVoidTy(M.getContext()), funcArgs, false);
std::string funcName = GenHashedName(encGV.GV);
FunctionCallee callee = M.getOrInsertFunction(funcName, funcType);
Function *func = cast(callee.getCallee());
BasicBlock *entry = BasicBlock::Create(*ctx, "entry", func);
BasicBlock *forCond = BasicBlock::Create(*ctx, "for.cond", func);
BasicBlock *forBody = BasicBlock::Create(*ctx, "for.body", func);
BasicBlock *forInc = BasicBlock::Create(*ctx, "for.inc", func);
BasicBlock *forEnd = BasicBlock::Create(*ctx, "for.inc", func);
IRBuilder<> builder(*ctx);
Type *Int32Ty = builder.getInt32Ty();
builder.SetInsertPoint(entry);
AllocaInst *indexPtr =
builder.CreateAlloca(Int32Ty, ConstantInt::get(Int32Ty, 1, false), "i");
builder.CreateStore(ConstantInt::get(Int32Ty, 0), indexPtr);
builder.CreateBr(forCond);
builder.SetInsertPoint(forCond);
LoadInst *index = builder.CreateLoad(Int32Ty, indexPtr);
ICmpInst *cond = cast(
builder.CreateICmpSLT(index, ConstantInt::get(Int32Ty, encGV.len)));
builder.CreateCondBr(cond, forBody, forEnd);
builder.SetInsertPoint(forBody);
Value *indexList[2] = {ConstantInt::get(Int32Ty, 0), index};
Value *ele = builder.CreateGEP(encGV.GV, ArrayRef(indexList, 2));
ArrayType *arrTy = cast(encGV.GV->getValueType());
Type *eleTy = arrTy->getElementType();
Value *encEle = builder.CreateXor(builder.CreateLoad(ele),
ConstantInt::get(eleTy, encGV.key));
builder.CreateStore(encEle, ele);
builder.CreateBr(forInc);
builder.SetInsertPoint(forInc);
builder.CreateStore(builder.CreateAdd(index, ConstantInt::get(Int32Ty, 1)),
indexPtr);
builder.CreateBr(forCond);
builder.SetInsertPoint(forEnd);
builder.CreateRetVoid();
appendToGlobalCtors(M, func, 0);
}
PreservedAnalyses GVObfuscator::run(Module &M, ModuleAnalysisManager &MAM) {
outs() << "Pass start...";
ctx = &M.getContext();
std::vector GVs;
for (auto &GV : M.globals()) {
GVs.push_back(&GV);
}
for (int i = 0; i < ObfuTimes; i++) {
outs() << "Current ObfuTimes: " << i << "";
for (auto *GV : GVs) {
// 只對Integer和Array類型進行加密
if (!GV->getValueType()->isIntegerTy() &&
!GV->getValueType()->isArrayTy()) {
continue;
}
// 篩出".str"全局變量,LLVM IR的metadata同樣也要保留
if (GV->hasInitializer() && GV->getInitializer() &&
(GV->getName().contains(".str") || !OnlyStr) &&
!GV->getName().contains("llvm.metadata")) {
Constant *initializer = GV->getInitializer();
ConstantInt *intData = dyn_cast(initializer);
ConstantDataArray *arrayData = dyn_cast(initializer);
// 處理數組
if (arrayData) {
// 獲取數組的長度和數組元素的大小
outs() << "Get global arraydata";
uint32_t eleSize = arrayData->getElementByteSize();
uint32_t eleNum = arrayData->getNumElements();
uint32_t arrLen = eleNum * eleSize;
outs() << "Global Variable: " << *GV << ""
<< "Array Length: " << eleSize << " * " << eleNum << " = "
<< arrLen << "";
char *data = const_cast(arrayData->getRawDataValues().data());
char *dataCopy = new char[arrLen];
memcpy(dataCopy, data, arrLen);
// 生成密鑰
uint64_t key = cryptoutils->get_uint64_t();
for (uint32_t i = 0; i < arrLen; i++) {
dataCopy[i] ^= ((char *)&key)[i % eleSize];
}
GV->setInitializer(
ConstantDataArray::getRaw(StringRef(dataCopy, arrLen), eleNum,
arrayData->getElementType()));
GV->setConstant(false);
InsertArrayDecryption(M, {GV, key, eleNum});
}
// 處理整數
else if (intData) {
uint64_t key = cryptoutils->get_uint64_t();
ConstantInt *enc = ConstantInt::get(intData->getType(),
key ^ intData->getZExtValue());
GV->setInitializer(enc);
InsertIntDecryption(M, {GV, key, 1LL});
}
}
}
}
outs() << "Pass end...";
return PreservedAnalyses::all();
}
} // namespace
// 注冊Pass
extern "C" PassPluginLibraryInfo llvmGetPassPluginInfo() {
return {.APIVersion = LLVM_PLUGIN_API_VERSION,
.PluginName = "GVObfuscator",
.PluginVersion = LLVM_VERSION_STRING,
.RegisterPassBuilderCallbacks = [](PassBuilder &PB) {
PB.registerPipelineParsingCallback(
[](StringRef Name, ModulePassManager &MPM,
ArrayRef) -> bool {
if (Name == "gvobfus") {
MPM.addPass(GVObfuscator());
return true;
}
return false;
});
}};
}
寫個代碼測試一下。
#include
int a = 10;
void func(const char *s) {
puts("!!!The testing string!!!");
puts(s);
}
int main() {
puts("This is a testing string!");
char ch;
if ((ch = getchar()) == '6') {
printf("6666%c", ch);
} else {
printf("WTF?!");
}
func("!!!The testing string!!!");
return 0;
}
混淆前后的IR對比。
; 混淆前
@a = dso_local global i32 10, align 4
@.str = private unnamed_addr constant [25 x i8] c"!!!The testing string!!!\00", align 1
@.str.1 = private unnamed_addr constant [26 x i8] c"This is a testing string!\00", align 1
@.str.2 = private unnamed_addr constant [8 x i8] c"6666%c\0A\00", align 1
@.str.3 = private unnamed_addr constant [7 x i8] c"WTF?!\0A\00", align 1
; 混淆后
@a = dso_local global i32 -660274230, align 4
@.str = private unnamed_addr global [25 x i8] c"\C5\C5\C5\B0\8C\81\C4\90\81\97\90\8D\8A\83\C4\97\90\96\8D\8A\83\C5\C5\C5\E4", align 1
@.str.1 = private unnamed_addr global [26 x i8] c"\1B'&@.str.2 = private unnamed_addr global [8 x i8] c"\F6\F6\F6\F6\E5\A3\CA\C0", align 1
@.str.3 = private unnamed_addr global [7 x i8] c"ji{\02\1C7=", align 1
@llvm.global_ctors = appending global [5 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @cc2b6b071cb0cb47a4171a4b1d76a06963d6f5e6, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @e176df9cb36840d9378338da84362465dd29b20a, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @"2ec1d2c5cdff4d08047220c5c1ee639ae45deb5a", i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @d7db60557e37f256d7c62e73e03a42051365a247, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @"50c74527ef4457f6934c3f7d6291948f2f509e58", i8* null }]

當然這對動調來說是沒啥用的。

對靜態分析來說,這個強度的加密還是有點弱,加密函數很容易就會被看出來,還可以加點另外的平坦化之類的混淆,對分析加密函數也提高難度。
一顆小胡椒
一顆小胡椒
骨哥說事
雷石安全實驗室
一顆小胡椒
FuzzWiki
看雪學苑
合天網安實驗室
瀟湘信安
合天網安實驗室
Andrew
虹科網絡安全