#undef _SCRT_STARTUP_MAIN #include #include "opencv2/opencv.hpp" #include "glm/glm.hpp" #include "glm/ext.hpp" #include #include #include #include #include #include "lua.hpp" #include #include #include #include #include "cuda_runtime.h" #include "device_launch_parameters.h" #include #include #include "cudamathOld.cuh" #include #define RESOLUTION 512 #define ALLOC_MEM_TRIS_NUM 200 __host__ __device__ struct Triangle { public: float points[13] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; float3 col = { 0, 1, 0 }; float3 normal = { 0, 0, 0 }; }; void calculateBounding(Triangle& triangle) { float* points = triangle.points; triangle.points[9] = min(points[0], min(points[3], points[6])); triangle.points[10] = max(points[0], max(points[3], points[6])); triangle.points[11] = min(points[1], min(points[4], points[7])); triangle.points[12] = max(points[1], max(points[4], points[7])); } void calculateNormal(Triangle& triangle) { float3 normal = normalize(cross(sub({ triangle.points[6], triangle.points[7], triangle.points[8] }, { triangle.points[0], triangle.points[1], triangle.points[2] }), sub({ triangle.points[3], triangle.points[4], triangle.points[5] }, { triangle.points[0], triangle.points[1], triangle.points[2] }))); triangle.normal = normal; } __host__ __device__ struct Mesh { public: glm::vec3 position; std::vector triangles; }; __host__ __device__ bool rayTriangleIntersect(float3 v0, float3 v1, float3 v2, float& t, float3 rayPos, float3 rayVec) { float3 dir = rayVec; float3 orig = rayPos; float3 v0v1 = sub(v1, v0); float3 v0v2 = sub(v2, v0); float3 N = cross(v0v1, v0v2); float area2 = length(N); float NdotRayDirection = dot(N, dir); if (fabs(NdotRayDirection) < 0.000001f) return false; float d = -dot(N, v0); t = -(dot(N, orig) + d) / NdotRayDirection; if (t < 0) return false; float3 P = add(orig, mult({ t, t, t }, dir)); float3 C; float3 edge0 = sub(v1, v0); float3 vp0 = sub(P, v0); C = cross(edge0, vp0); if (dot(N, C) < 0) return false; float3 edge1 = sub(v2, v1); float3 vp1 = sub(P, v1); C = cross(edge1, vp1); if (dot(N, C) < 0) return false; float3 edge2 = sub(v0, v2); float3 vp2 = sub(P, v2); C = cross(edge2, vp2); if (dot(N, C) < 0) return false; return true; } __host__ __device__ bool raySphereIntersect(float x, float y, float z, float radius, float3 rayPos, float3 rayVec, float &t) { float3 oc = sub(rayPos, { x, y, z }); float a = dot(rayVec, rayVec); float B = 2.0 * dot(oc, rayVec); float c = dot(oc, oc) - radius * radius; float discriminant = B * B - 4 * a * c; if (discriminant >= 0.0) { float numerator = -B - sqrtf(discriminant); if (numerator > 0.0) { float dist = numerator / (2.0 * a); t = dist; return true; } } return false; } __device__ void calculate_ray(Triangle* tris, float& closestT, Triangle& closest, float3 rayVec, float3 rayPos, bool& sunHit, float playerX, float playerY, float playerZ, float sunX, float sunY, float sunZ) { float t; for (int i = 0;i < ALLOC_MEM_TRIS_NUM;i++) { float3 p1{ tris[i].points[0], tris[i].points[1], tris[i].points[2] }; float3 p2{ tris[i].points[3], tris[i].points[4], tris[i].points[5] }; float3 p3{ tris[i].points[6], tris[i].points[7], tris[i].points[8] }; if (rayTriangleIntersect(p1, p2, p3, t, rayPos, rayVec)) { if (t < closestT) { closestT = t; closest = tris[i]; } } } if (raySphereIntersect(playerX, playerY, playerZ, 0.2f, rayPos, rayVec, t)) { if (t < closestT) { closestT = t; Triangle sphereTri; sphereTri.normal = normalize(sub({ playerX, playerY, playerZ }, add(rayPos, mult(rayVec, { t, t, t })))); sphereTri.col = { 0.0f, 1.0f, 0.0f }; closest = sphereTri; } } if (raySphereIntersect(sunX, sunY, sunZ, 1.0f, rayPos, rayVec, t)) { if (t < closestT) { closestT = t; sunHit = true; } } } __device__ void calculate_ray_less_outputs(Triangle* tris, float& closestT, float3 rayVec, float3 rayPos, float playerX, float playerY, float playerZ) { for (int i = 0;i < ALLOC_MEM_TRIS_NUM;i++) { float3 p1{ tris[i].points[0], tris[i].points[1], tris[i].points[2] }; float3 p2{ tris[i].points[3], tris[i].points[4], tris[i].points[5] }; float3 p3{ tris[i].points[6], tris[i].points[7], tris[i].points[8] }; float t; if (rayTriangleIntersect(p1, p2, p3, t, rayPos, rayVec)) { if (t < closestT) { closestT = t; } } } float3 oc = sub(rayPos, { playerX, playerY, playerZ }); float a = dot(rayVec, rayVec); float B = 2.0 * dot(oc, rayVec); float c = dot(oc, oc) - 0.2f * 0.2f; float discriminant = B * B - 4 * a * c; if (discriminant >= 0.0) { float numerator = -B - sqrtf(discriminant); if (numerator > 0.0) { float dist = numerator / (2.0 * a); if (dist < closestT) { closestT = dist; } } } } __global__ void _draw_pix(int y, float camX, float camY, float camZ, float sunX, float sunY, float sunZ, float playerX, float playerY, float playerZ, float degreesXZ, float degreesYZ, curandState* rand_state, Triangle* tris, int sample_size, int* r, int* g, int* b) { float i = threadIdx.x; float j = blockIdx.x; float3 rayVec = normalize({ -(i / RESOLUTION - 0.5f), -((j + y * RESOLUTION * 0.25f) / RESOLUTION - 0.5f), 1.0f }); float3 rayPos{ camX, camY, camZ }; int3 fragColor{ 255, 100, 100 }; float SINF = sinf(degreesXZ); float COSF = cosf(degreesXZ); float output2[1][3] = { { 0, 0, 0 } }; float input1[1][3]; float input2[3][3]; input2[0][0] = 1; input2[0][1] = 0; input2[0][2] = 0; input2[1][0] = 0; input2[2][0] = 0; input2[1][1] = COSF; input2[1][2] = -SINF; input2[2][1] = SINF; input2[2][2] = COSF; input1[0][0] = rayVec.x; input1[0][1] = rayVec.y; input1[0][2] = rayVec.z; for (int _ = 0;_ < 1;_++) for (int Y = 0;Y < 3;Y++) for (int k = 0;k < 3;k++) { output2[_][Y] += input1[_][k] * input2[k][Y]; } rayVec = { (float)output2[0][0], (float)output2[0][1], (float)output2[0][2] }; SINF = sinf(degreesYZ); COSF = cosf(degreesYZ); float output22[1][3] = { { 0, 0, 0 } }; input2[0][0] = COSF; input2[0][1] = 0; input2[0][2] = SINF; input2[1][0] = 0; input2[2][0] = -SINF; input2[1][1] = 1; input2[1][2] = 0; input2[2][1] = 0; input2[2][2] = COSF; input1[0][0] = rayVec.x; input1[0][1] = rayVec.y; input1[0][2] = rayVec.z; for (int _ = 0;_ < 1;_++) for (int Y = 0;Y < 3;Y++) for (int k = 0;k < 3;k++) { output22[_][Y] += input1[_][k] * input2[k][Y]; } rayVec = { (float)output22[0][0], (float)output22[0][1], (float)output22[0][2] }; rayVec = normalize(rayVec); Triangle closest; float closeT = 1000.0f; bool sunHit = false; calculate_ray(tris, closeT, closest, rayVec, rayPos, sunHit, playerX, playerY, playerZ, sunX, sunY, sunZ); if (sunHit) { fragColor = { 255, 255, 255 }; } else if (closeT < 1000.0f) { float3 intersect = add(rayPos, mult({ closeT, closeT, closeT }, rayVec)); float totalR = 0.0f, totalG = 0.0f, totalB = 0.0f; for (int n = 0;n < sample_size;n++) { curand_init(9384 + n, threadIdx.x + blockIdx.x, 0, rand_state); float3 rand = { curand_uniform(rand_state) * 0.1f - 0.05f, curand_uniform(rand_state) * 0.1f - 0.05f, curand_uniform(rand_state) * 0.1f - 0.05f }; float3 seperateVecs[2] = { normalize(sub(normalize(reflect(rayVec, closest.normal)), rand)), normalize(sub(normalize(sub({ sunX, sunY, sunZ }, intersect)), rand)) }; float3 seperateCols[2] = { closest.col, { 0.0f, 0.0f, 0.0f } }; float seperateLums[2] = { 0.0f, 0.0f }; rayPos = add(intersect, mult({ 0.005f, 0.005f, 0.005f }, seperateVecs[1])); closeT = 1000.0f; calculate_ray_less_outputs(tris, closeT, seperateVecs[1], rayPos, playerX, playerY, playerZ); if (closeT == 1000.0f) { seperateLums[0] = max(dot(closest.normal, normalize(sub(intersect, { sunX, sunY, sunZ }))), 0.0f) * 255; } rayPos = add(intersect, mult({ 0.005f, 0.005f, 0.005f }, seperateVecs[0])); closeT = 1000.0f; Triangle closest2; calculate_ray(tris, closeT, closest2, seperateVecs[0], rayPos, sunHit, playerX, playerY, playerZ, sunX, sunY, sunZ); if (sunHit) { seperateCols[1] = { 255.0f, 255.0f, 255.0f }; seperateLums[1] = 1.0f; } else if (closeT < 1000.0f) { seperateCols[1] = closest2.col; float3 intersect2 = add(rayPos, mult({ closeT, closeT, closeT }, seperateVecs[0])); float3 rayVec2 = normalize(sub({ sunX, sunY, sunZ }, intersect2)); rayPos = add(intersect2, mult({ 0.005f, 0.005f, 0.005f }, rayVec2)); closeT = 1000.0f; calculate_ray_less_outputs(tris, closeT, rayVec2, rayPos, playerX, playerY, playerZ); if (closeT == 1000.0f) { seperateLums[1] = max(dot(closest2.normal, normalize(sub(rayPos, { sunX, sunY, sunZ }))), 0.0f) * 255; } } totalR += ((seperateLums[1] * seperateCols[1].x) * 0.5f + (seperateLums[0] * seperateCols[0].x) * 0.5f) / sample_size; totalG += ((seperateLums[1] * seperateCols[1].y) * 0.5f + (seperateLums[0] * seperateCols[0].y) * 0.5f) / sample_size; totalB += ((seperateLums[1] * seperateCols[1].z) * 0.5f + (seperateLums[0] * seperateCols[0].z) * 0.5f) / sample_size; } fragColor = { (int)totalR, (int)totalG, (int)totalB }; } r[(int)i + (int)j * RESOLUTION] = fragColor.x; g[(int)i + (int)j * RESOLUTION] = fragColor.y; b[(int)i + (int)j * RESOLUTION] = fragColor.z; } class Wrapper { public: int* r; int* g; int* b; }; bool loadFromObjectFile(std::string sFilename, std::vector& anyData) { std::ifstream f(sFilename); if (!f.is_open()) return false; // Local cache of verts std::vector verts; int i = -1; while (!f.eof()) { char line[128]; f.getline(line, 128); std::strstream s; s << line; char junk; if (line[0] == 'v') { glm::vec3 v; s >> junk >> v[0] >> v[1] >> v[2]; verts.push_back(v); } if (line[0] == 'f') { i++; int f[3]; s >> junk >> f[0] >> f[1] >> f[2]; if (i < ALLOC_MEM_TRIS_NUM) { anyData[i].points[0] = verts[f[0] - 1][0]; anyData[i].points[1] = verts[f[0] - 1][1]; anyData[i].points[2] = verts[f[0] - 1][2] + 10.0f; anyData[i].points[3] = verts[f[1] - 1][0]; anyData[i].points[4] = verts[f[1] - 1][1]; anyData[i].points[5] = verts[f[1] - 1][2] + 10.0f; anyData[i].points[6] = verts[f[2] - 1][0]; anyData[i].points[7] = verts[f[2] - 1][1]; anyData[i].points[8] = verts[f[2] - 1][2] + 10.0f; anyData[i].col = { (float)(rand() % 255) / 255, (float)(rand() % 255) / 255, (float)(rand() % 255) / 255 }; calculateBounding(anyData[i]); calculateNormal(anyData[i]); } } } return true; } Wrapper helper(int y, float camX, float camY, float camZ, float sunX, float sunY, float sunZ, float playerX, float playerY, float playerZ, float degreesXZ, float degreesYZ, Triangle* tris, curandState* state, int depth) { Triangle* dev_tris = nullptr; cudaMalloc(&dev_tris, sizeof(Triangle) * ALLOC_MEM_TRIS_NUM); cudaMemcpy(dev_tris, tris, sizeof(Triangle) * ALLOC_MEM_TRIS_NUM, cudaMemcpyHostToDevice); int* dev_outputR = nullptr; int outputR[(RESOLUTION * (RESOLUTION / 4))] = { 255 }; cudaMalloc(&dev_outputR, (RESOLUTION * (RESOLUTION / 4)) * sizeof(int)); cudaMemcpy(dev_outputR, outputR, (RESOLUTION * (RESOLUTION / 4)) * sizeof(int), cudaMemcpyHostToDevice); int* dev_outputG = nullptr; int outputG[(RESOLUTION * (RESOLUTION / 4))] = { 255 }; cudaMalloc(&dev_outputG, (RESOLUTION * (RESOLUTION / 4)) * sizeof(int)); cudaMemcpy(dev_outputG, outputG, (RESOLUTION * (RESOLUTION / 4)) * sizeof(int), cudaMemcpyHostToDevice); int* dev_outputB = nullptr; int outputB[(RESOLUTION * (RESOLUTION / 4))] = { 255 }; cudaMalloc(&dev_outputB, (RESOLUTION * (RESOLUTION / 4)) * sizeof(int)); cudaMemcpy(dev_outputB, outputB, (RESOLUTION * (RESOLUTION / 4)) * sizeof(int), cudaMemcpyHostToDevice); curandState* dev_state = nullptr; cudaMalloc(&dev_state, sizeof(curandState)); cudaMemcpy(dev_state, state, sizeof(curandState), cudaMemcpyHostToDevice); _draw_pix<<>>(y, camX, camY, camZ, sunX, sunY, sunZ, playerX, playerY, playerZ, degreesXZ, degreesYZ, dev_state, dev_tris, depth, dev_outputR, dev_outputG, dev_outputB); cudaDeviceSynchronize(); cudaMemcpy(outputR, dev_outputR, RESOLUTION * (RESOLUTION / 4) * sizeof(int), cudaMemcpyDeviceToHost); cudaMemcpy(outputG, dev_outputG, RESOLUTION * (RESOLUTION / 4) * sizeof(int), cudaMemcpyDeviceToHost); cudaMemcpy(outputB, dev_outputB, RESOLUTION * (RESOLUTION / 4) * sizeof(int), cudaMemcpyDeviceToHost); cudaFree(dev_tris); cudaFree(dev_outputR); cudaFree(dev_outputG); cudaFree(dev_outputB); Wrapper wrapper; wrapper.r = outputR; wrapper.g = outputG; wrapper.b = outputB; return wrapper; } float camPos[3] = { 0, 0, 0 }; float camRot[3] = { -0.0, 0, 0 }; float sunPos[3] = { 0.0f, 10.0f, 10.0f }; float mouseDiff[2] = { 0, 0 }; void mouseCallback(int event, int x, int y, int flags, void* userData) { mouseDiff[0] = (float)x / RESOLUTION * glm::two_pi() * 2; mouseDiff[1] = (float)y / RESOLUTION * glm::two_pi() * 2; } void main() { srand(time(NULL)); Mesh mesh; mesh.triangles = std::vector(ALLOC_MEM_TRIS_NUM); loadFromObjectFile("C:/Users/arthu/ObjFiles/helmet.obj", mesh.triangles); cv::Mat canvas; glm::mat4 projection = glm::perspectiveFov(glm::half_pi() / 2.0f, 2.0f, 2.0f, 0.01f, 100.0f); float frameCount = 0; glm::mat4 identity = glm::identity(); float playerPos[3] = { 0, 2.0f, 0 }; float playerVec[3] = { 0, 0, 0 }; float playerRotY = 0.0f; curandState* state; float cameraDist = 4.0f; float depth_UNDERCOVER = 1.0f; int depth = 1.0f; while (true) { frameCount++; canvas = cv::Mat::zeros(cv::Size(RESOLUTION, RESOLUTION), CV_8UC3); std::vector oldTris = mesh.triangles; float s = clock(); glm::quat rotQuat = glm::angleAxis(camRot[0], glm::vec3{ 1.0f, 0.0f, 0.0f }); glm::mat4 rotMatX = glm::mat4_cast(rotQuat); rotQuat = glm::angleAxis(camRot[1], glm::vec3{ 0.0f, 1.0f, 0.0f }); glm::mat4 rotMatY = glm::mat4_cast(rotQuat); rotQuat = glm::angleAxis(camRot[2], glm::vec3{ 0.0f, 0.0f, 1.0f }); glm::mat4 rotMatZ = glm::mat4_cast(rotQuat); glm::vec4 lookVector = { 0, 0, 1, 0 }; lookVector = rotMatX * rotMatY * rotMatZ * lookVector; std::vector tris = mesh.triangles; float closeT = cameraDist; float3 rayVec = normalize({ -cosf(playerRotY + glm::half_pi()), 0.2f, -sinf(playerRotY + glm::half_pi()) }); for (int i = 0;i < ALLOC_MEM_TRIS_NUM;i++) { float3 p1{ tris[i].points[0], tris[i].points[1], tris[i].points[2] }; float3 p2{ tris[i].points[3], tris[i].points[4], tris[i].points[5] }; float3 p3{ tris[i].points[6], tris[i].points[7], tris[i].points[8] }; float t; if (rayTriangleIntersect(p1, p2, p3, t, { playerPos[0], playerPos[1], playerPos[2] }, rayVec)) { if (t < closeT) { closeT = t; } } } camPos[0] = playerPos[0] + max(closeT - 0.1f, 0.0f) * rayVec.x; camPos[1] = playerPos[1] + max(closeT - 0.1f, 0.0f) * rayVec.y; camPos[2] = playerPos[2] + max(closeT - 0.1f, 0.0f) * rayVec.z; // Player physics rayVec = { 0, -1, 0 }; closeT = 0.2f; for (int i = 0;i < ALLOC_MEM_TRIS_NUM;i++) { float3 p1{ tris[i].points[0], tris[i].points[1], tris[i].points[2] }; float3 p2{ tris[i].points[3], tris[i].points[4], tris[i].points[5] }; float3 p3{ tris[i].points[6], tris[i].points[7], tris[i].points[8] }; float t; if (rayTriangleIntersect(p1, p2, p3, t, { playerPos[0], playerPos[1], playerPos[2] }, rayVec)) { if (t < closeT) { closeT = t; playerVec[0] = -tris[i].normal.x * 0.01f; playerVec[1] = -tris[i].normal.y * 0.015f; playerVec[2] = -tris[i].normal.z * 0.01f; } } } playerVec[1] -= 0.01f; playerVec[0] *= 0.97f; playerVec[2] *= 0.97f; playerPos[0] += playerVec[0]; playerPos[1] += playerVec[1]; playerPos[2] += playerVec[2]; depth_UNDERCOVER += 1.0f; depth = (int)depth_UNDERCOVER; for (int y = 0;y < 4;y++) { Wrapper col = helper(y, camPos[0], camPos[1], camPos[2], sunPos[0], sunPos[1], sunPos[2], playerPos[0], playerPos[1], playerPos[2], camRot[0], camRot[1], &mesh.triangles[0], state, depth); for (int y2 = 0;y2 < RESOLUTION / 4;y2++) { for (int x = 0;x < RESOLUTION;x++) { cv::Vec3b& at = canvas.at(y2 + y * (RESOLUTION / 4), x); at.val[0] = col.r[(x + y2 * (RESOLUTION))]; at.val[1] = col.g[(x + y2 * (RESOLUTION))]; at.val[2] = col.b[(x + y2 * (RESOLUTION))]; } } } mesh.triangles = oldTris; std::cout << (clock() - s) / CLOCKS_PER_SEC << std::endl; cv::imshow("Output", canvas); cv::setMouseCallback("Output", mouseCallback); cv::waitKey(1); if (GetKeyState('W') & 0x8000) { playerVec[2] += lookVector.z * 0.1; playerVec[0] += -lookVector.x * 0.1; playerVec[1] += lookVector.y * 0.1; depth_UNDERCOVER = 1.0f; } if (GetKeyState('S') & 0x8000) { playerVec[2] -= lookVector.z * 0.1; playerVec[0] -= -lookVector.x * 0.1; playerVec[1] -= lookVector.y * 0.1; depth_UNDERCOVER = 1.0f; } playerRotY = mouseDiff[0]; camRot[1] = mouseDiff[0]; if (GetKeyState('R') & 0x8000) { playerVec[1] -= 0.01f; depth_UNDERCOVER = 1.0f; } if (GetKeyState('T') & 0x8000) { playerVec[1] += 0.01f; depth_UNDERCOVER = 1.0f; } if (GetKeyState('F') & 0x8000) { sunPos[0] += 0.1f; depth_UNDERCOVER = 1.0f; } if (GetKeyState('G') & 0x8000) { sunPos[0] -= 0.1f; depth_UNDERCOVER = 1.0f; } if (GetKeyState('H') & 0x8000) { sunPos[1] += 0.1f; depth_UNDERCOVER = 1.0f; } if (GetKeyState('J') & 0x8000) { sunPos[1] -= 0.1f; depth_UNDERCOVER = 1.0f; } if (GetKeyState('K') & 0x8000) { sunPos[2] += 0.1f; depth_UNDERCOVER = 1.0f; } if (GetKeyState('L') & 0x8000) { sunPos[2] -= 0.1f; depth_UNDERCOVER = 1.0f; } if (GetKeyState('E') & 0x8000) { cameraDist -= 0.05f; depth_UNDERCOVER = 1.0f; } if (GetKeyState('Q') & 0x8000) { cameraDist += 0.05f; depth_UNDERCOVER = 1.0f; } } return; }