#include #include "opencv2/opencv.hpp" #include #include #include #include #include #include #include #include #include "cuda_runtime.h" #include "device_launch_parameters.h" #include #include #include namespace KrovEngine { __host__ __device__ float fast_inverse_sqrt(float number) { long i; float x2, y; const float threehalfs = 1.5F; x2 = number * 0.5F; y = number; i = *(long*)&y; i = 0x5f3759df - (i >> 1); y = *(float*)&i; y = y * (threehalfs - (x2 * y * y)); return y; } template class Vector3 { public: T x; T y; T z; __host__ __device__ Vector3(T x, T y, T z) { this->x = x; this->y = y; this->z = z; } __host__ __device__ Vector3(T x) { this->x = x; this->y = x; this->z = x; } __host__ __device__ Vector3() { // Default this->x = (T)0; this->y = (T)0; this->z = (T)0; } __host__ __device__ void fill(T(*gen)(int idx)) { this->x = gen(0); this->y = gen(1); this->z = gen(2); } __host__ __device__ T dot(Vector3 other) { return this->x * other.x + this->y * other.y + this->z * other.z; } __host__ __device__ Vector3 cross(Vector3 other) { return Vector3( this->y * other.z - this->z * other.y, this->z * other.x - this->x * other.z, this->x * other.y - this->y * other.x ); } __host__ __device__ Vector3& operator-(const Vector3 other) { return Vector3(this->x - other.x, this->y - other.y, this->z - other.z); } __host__ __device__ Vector3& operator*(const Vector3 other) { return Vector3(this->x * other.x, this->y * other.y, this->z * other.z); } __host__ __device__ Vector3& operator/(const Vector3 other) { return Vector3(this->x / other.x, this->y / other.y, this->z / other.z); } __host__ __device__ Vector3& operator+(const Vector3 other) { return Vector3(this->x + other.x, this->y + other.y, this->z + other.z); } __host__ __device__ float magnitude() { return sqrtf(this->x * this->x + this->y * this->y + this->z * this->z); } __host__ __device__ Vector3 normalize() { float inv_mag = fast_inverse_sqrt(this->x * this->x + this->y * this->y + this->z * this->z); Vector3 res; res.x = this->x * inv_mag; res.y = this->y * inv_mag; res.z = this->z * inv_mag; return res; } }; template class Vector4 { public: T x; T y; T z; T w; __host__ __device__ Vector4(T x, T y, T z, T w) { this->x = x; this->y = y; this->z = z; this->w = w; } __host__ __device__ Vector4(T x) { this->x = x; this->y = x; this->z = x; this->w = x; } __host__ __device__ Vector4() { // Default } __host__ __device__ void fill(T(*gen)(int idx)) { this->x = gen(0); this->y = gen(1); this->z = gen(2); this->w = gen(2); } __host__ __device__ T dot(Vector4 other) { return this->x * other.x + this->y * other.y + this->z * other.z + this->w * other.w; } __host__ __device__ Vector4& operator-(const Vector4 other) { return Vector3(this->x - other.x, this->y - other.y, this->z - other.z, this->w - other.w); } __host__ __device__ Vector4& operator*(const Vector4 other) { return Vector3(this->x * other.x, this->y * other.y, this->z * other.z, this->w * other.w); } __host__ __device__ Vector4& operator/(const Vector4 other) { return Vector3(this->x / other.x, this->y / other.y, this->z / other.z, this->w / other.w); } __host__ __device__ Vector4& operator+(const Vector4 other) { return Vector4(this->x + other.x, this->y + other.y, this->z + other.z, this->w + other.w); } __host__ __device__ float magnitude() { return sqrtf(this->x * this->x + this->y * this->y + this->z * this->z + this->w * this->w); } __host__ __device__ Vector4 normalize() { float inv_mag = fast_inverse_sqrt(this->x * this->x + this->y * this->y + this->z * this->z + this->w * this->w); Vector4 res; res.x = this->x * inv_mag; res.y = this->y * inv_mag; res.z = this->z * inv_mag; res.w = this->w * inv_mag; return res; } }; template class Vector2 { public: T x; T y; __host__ __device__ Vector2(T x, T y) { this->x = x; this->y = y; } __host__ __device__ Vector2(T x) { this->x = x; this->y = x; } __host__ __device__ Vector2() { // Default this->x = (T)0; this->y = (T)0; } __host__ __device__ void fill(T(*gen)(int idx)) { this->x = gen(0); this->y = gen(1); } __host__ __device__ T dot(Vector2 other) { return this->x * other.x + this->y * other.y; } __host__ __device__ Vector2& operator-(const Vector2 other) { return Vector2(this->x - other.x, this->y - other.y); } __host__ __device__ Vector2& operator*(const Vector2 other) { return Vector2(this->x * other.x, this->y * other.y); } __host__ __device__ Vector2& operator/(const Vector2 other) { return Vector2(this->x / other.x, this->y / other.y); } __host__ __device__ Vector2& operator+(const Vector2 other) { return Vector2(this->x + other.x, this->y + other.y); } __host__ __device__ float magnitude() { return sqrtf(this->x * this->x + this->y * this->y); } __host__ __device__ Vector2 normalize() { float inv_mag = fast_inverse_sqrt(this->x * this->x + this->y * this->y); Vector2 res; res.x = this->x * inv_mag; res.y = this->y * inv_mag; return res; } }; class Texture2D { public: float data[512][512][3]; float normal_data[512][512][3]; bool normal; bool albedo; __host__ __device__ Texture2D(bool normal, bool albedo) { this->normal = normal; this->albedo = albedo; } __host__ __device__ Texture2D() { // Default } }; void texture_fill(Texture2D& tex, const char* texture_directory, bool write_normals = false) { cv::Mat cv_tex = cv::imread(texture_directory); cv::resize(cv_tex, cv_tex, cv::Size(512, 512)); cv_tex.convertTo(cv_tex, CV_8UC3); for (int y = 0;y < 512;y++) { for (int x = 0;x < 512;x++) { cv::Vec3b& at = cv_tex.at(cv::Point(y, x)); if (!write_normals) { tex.data[y][x][0] = at.val[0]; tex.data[y][x][1] = at.val[1]; tex.data[y][x][2] = at.val[2]; } else { tex.normal_data[y][x][0] = (((float)at.val[0]) / 255.0f - 0.5f) * 2.0f; tex.normal_data[y][x][1] = (((float)at.val[1]) / 255.0f - 0.5f) * 2.0f; tex.normal_data[y][x][2] = (((float)at.val[2]) / 255.0f - 0.5f) * 2.0f; } } } } class Triangle { public: Vector3 vertices[3]; Vector3 color; Vector3 normal; int texture_idx; bool textured = false; bool glass = false; bool reflective = false; float reflective_index; __host__ __device__ Triangle(Vector3 p1, Vector3 p2, Vector3 p3) { this->vertices[0] = p1; this->vertices[1] = p2; this->vertices[2] = p3; } __host__ __device__ Triangle() { // Default } __host__ __device__ void set_vertices(Vector3 p1, Vector3 p2, Vector3 p3) { this->vertices[0] = p1; this->vertices[1] = p2; this->vertices[2] = p3; } __host__ __device__ void calc_normal() { this->normal = ((this->vertices[2] - this->vertices[0]).cross(this->vertices[1] - this->vertices[0])).normalize(); } }; class Ray { public: Vector3 origin; Vector3 direction; Vector3 color; float u = 0.0f; float v = 0.0f; __host__ __device__ Ray(Vector3 origin, Vector3 direction, Vector3 color) { this->origin = origin; this->direction = direction; this->color = color; } __host__ __device__ Ray() { // Default } }; namespace LinAlg { __host__ __device__ void barycentric(Vector3 p, Vector3 a, Vector3 b, Vector3 c, float& u, float& v, float& w) { Vector3 v0 = b - a, v1 = c - a, v2 = p - a; float d00 = v0.dot(v0); float d01 = v0.dot(v1); float d11 = v1.dot(v1); float d20 = v2.dot(v0); float d21 = v2.dot(v1); float denom = d00 * d11 - d01 * d01; v = (d11 * d20 - d01 * d21) / denom; w = (d00 * d21 - d01 * d20) / denom; u = 1.0f - v - w; } __host__ __device__ Vector3 reflect(Vector3 v, Vector3 normal) { return v - Vector3(2) * Vector3(normal.dot(v)) * normal; } __host__ __device__ Vector3 rotate(Vector3 v, Vector3 rotate_by, Vector3 center) { v = v - center; float SINF = sinf(rotate_by.z); float COSF = cosf(rotate_by.z); float output2[1][3] = { { 0, 0, 0 } }; float input1[1][3]; float input2[3][3]; input2[0][0] = COSF; input2[0][1] = -SINF; input2[0][2] = 0; input2[1][0] = SINF; input2[1][1] = COSF; input2[1][2] = 0; input2[2][0] = 0; input2[2][1] = 0; input2[2][2] = 1; input1[0][0] = v.x; input1[0][1] = v.y; input1[0][2] = v.z; for (int _ = 0;_ < 1;_++) for (int Y = 0;Y < 3;Y++) for (int k = 0;k < 3;k++) { output2[_][Y] += input1[_][k] * input2[k][Y]; } v = Vector3((float)output2[0][0], (float)output2[0][1], (float)output2[0][2]); SINF = sinf(rotate_by.y); COSF = cosf(rotate_by.y); float output[1][3] = { { 0, 0, 0 } }; input2[0][0] = COSF; input2[0][1] = 0; input2[0][2] = SINF; input2[1][0] = 0; input2[1][1] = 1; input2[1][2] = 0; input2[2][0] = -SINF; input2[2][1] = 0; input2[2][2] = COSF; input1[0][0] = v.x; input1[0][1] = v.y; input1[0][2] = v.z; for (int _ = 0;_ < 1;_++) for (int Y = 0;Y < 3;Y++) for (int k = 0;k < 3;k++) { output[_][Y] += input1[_][k] * input2[k][Y]; } v = Vector3((float)output[0][0], (float)output[0][1], (float)output[0][2]); SINF = sinf(rotate_by.x); COSF = cosf(rotate_by.x); float output4[1][3] = { { 0, 0, 0 } }; input2[0][0] = 1; input2[0][1] = 0; input2[0][2] = 0; input2[1][0] = 0; input2[1][1] = COSF; input2[1][2] = -SINF; input2[2][0] = 0; input2[2][1] = SINF; input2[2][2] = COSF; input1[0][0] = v.x; input1[0][1] = v.y; input1[0][2] = v.z; for (int _ = 0;_ < 1;_++) for (int Y = 0;Y < 3;Y++) for (int k = 0;k < 3;k++) { output4[_][Y] += input1[_][k] * input2[k][Y]; } v = Vector3((float)output4[0][0], (float)output4[0][1], (float)output4[0][2]); v = v + center; return v; } } class GameObject { private: std::vector triangles; public: std::string name; Vector2> bounding; bool load_from_object_file(std::string sFilename, Vector3 color, bool glass = false, bool reflective = false, float reflective_index = 0.0f, int texture_idx = 0, bool textured = false) { std::ifstream f(sFilename); if (!f.is_open()) return false; // Local cache of verts std::vector> verts; int i = -1; while (!f.eof()) { char line[128]; f.getline(line, 128); std::strstream s; s << line; char junk; if (line[0] == 'v') { Vector3 v; s >> junk >> v.x >> v.y >> v.z; verts.push_back(v); } if (line[0] == 'f') { i++; int f[3]; s >> junk >> f[0] >> f[1] >> f[2]; Triangle triangle; triangle.set_vertices( verts[f[0] - 1], verts[f[1] - 1], verts[f[2] - 1] ); triangle.color = color; triangle.calc_normal(); triangle.textured = textured; triangle.texture_idx = texture_idx; triangle.glass = glass; triangle.reflective = reflective; triangle.reflective_index = reflective_index; this->triangles.push_back(triangle); } } return true; } std::vector get_triangles() { return this->triangles; } void translate(Vector3 translate_by) { for (Triangle& triangle : this->triangles) { triangle.vertices[0] = triangle.vertices[0] + translate_by; triangle.vertices[1] = triangle.vertices[1] + translate_by; triangle.vertices[2] = triangle.vertices[2] + translate_by; } } void push_back_triangle(Triangle triangle) { this->triangles.push_back(triangle); } void rotate(Vector3 rotate_by, Vector3 center) { for (Triangle& triangle : this->triangles) { triangle.vertices[0] = LinAlg::rotate(triangle.vertices[0], rotate_by, center); triangle.vertices[1] = LinAlg::rotate(triangle.vertices[1], rotate_by, center); triangle.vertices[2] = LinAlg::rotate(triangle.vertices[2], rotate_by, center); triangle.calc_normal(); } } void calculate_aabb() { Vector3 min_point(1000000000.0f, 1000000000.0f, 1000000000.0f); Vector3 max_point(-1000000000.0f, -1000000000.0f, -1000000000.0f); for (Triangle triangle : this->triangles) { min_point = Vector3(min(min_point.x, triangle.vertices[0].x), min(min_point.y, triangle.vertices[0].y), min(min_point.z, triangle.vertices[0].z)); max_point = Vector3(max(max_point.x, triangle.vertices[0].x), max(max_point.y, triangle.vertices[0].y), max(max_point.z, triangle.vertices[0].z)); min_point = Vector3(min(min_point.x, triangle.vertices[1].x), min(min_point.y, triangle.vertices[1].y), min(min_point.z, triangle.vertices[1].z)); max_point = Vector3(max(max_point.x, triangle.vertices[1].x), max(max_point.y, triangle.vertices[1].y), max(max_point.z, triangle.vertices[1].z)); min_point = Vector3(min(min_point.x, triangle.vertices[2].x), min(min_point.y, triangle.vertices[2].y), min(min_point.z, triangle.vertices[2].z)); max_point = Vector3(max(max_point.x, triangle.vertices[2].x), max(max_point.y, triangle.vertices[2].y), max(max_point.z, triangle.vertices[2].z)); } this->bounding.x = min_point; this->bounding.y = max_point; } }; bool load_multiple_from_object_file(std::vector &output_game_objects, std::string sFilename, Vector3 color, bool glass = false, bool reflective = false, float reflective_index = 0.0f, int texture_idx = 0, bool textured = false) { std::ifstream f(sFilename); if (!f.is_open()) return false; // Local cache of verts std::vector> verts; int i = -1; GameObject cur_game_object; bool passed; while (!f.eof()) { char line[128]; f.getline(line, 128); std::strstream s; s << line; char junk; if (line[0] == 'o') { if (passed) { cur_game_object.calculate_aabb(); output_game_objects.push_back(cur_game_object); } GameObject new_object; s >> junk >> new_object.name; cur_game_object = new_object; passed = true; } if (line[0] == 'v') { Vector3 v; s >> junk >> v.x >> v.y >> v.z; verts.push_back(v); } if (line[0] == 'f') { i++; int f[3]; s >> junk >> f[0] >> f[1] >> f[2]; Triangle triangle; triangle.set_vertices( verts[f[0] - 1], verts[f[1] - 1], verts[f[2] - 1] ); triangle.color = color; triangle.calc_normal(); triangle.textured = textured; triangle.texture_idx = texture_idx; triangle.glass = glass; triangle.reflective = reflective; triangle.reflective_index = reflective_index; cur_game_object.push_back_triangle(triangle); } } return true; } namespace Intersections { __host__ __device__ bool ray_triangle(Triangle triangle, float& t, Ray& ray) { Vector3 v0v1 = triangle.vertices[1] - triangle.vertices[0]; Vector3 v0v2 = triangle.vertices[2] - triangle.vertices[0]; Vector3 pvec = ray.direction.cross(v0v2); float det = v0v1.dot(pvec); // if (fabs(det) < 0.00001f) return false; float invDet = 1.0f / det; Vector3 tvec = ray.origin - triangle.vertices[0]; float u = tvec.dot(pvec) * invDet; if (u < 0 || u > 1) return false; Vector3 qvec = tvec.cross(v0v1); float v = ray.direction.dot(qvec) * invDet; if (v < 0 || u + v > 1) return false; t = v0v2.dot(qvec) * invDet; if (t < 0) return false; ray.u = u; ray.v = v; return true; } __host__ __device__ bool ray_aabb(Ray ray, float& t, Vector2> minmax) { if (ray.origin.x > minmax.x.x && ray.origin.x < minmax.y.x && ray.origin.y > minmax.x.y && ray.origin.y < minmax.y.y && ray.origin.z > minmax.x.z && ray.origin.z < minmax.y.z) { t = 0.0f; return true; } Vector3 tMin = (minmax.x - ray.origin) / ray.direction; Vector3 tMax = (minmax.y - ray.origin) / ray.direction; Vector3 t1 = Vector3(min(tMin.x, tMax.x), min(tMin.y, tMax.y), min(tMin.z, tMax.z)); Vector3 t2 = Vector3(max(tMin.x, tMax.x), max(tMin.y, tMax.y), max(tMin.z, tMax.z)); float tNear = max(max(t1.x, t1.y), t1.z); float tFar = min(min(t2.x, t2.y), t2.z); if (tNear - 1 > tFar) return false; if (tNear < 0) return false; t = tNear; return true; } __host__ __device__ bool ray_sphere(Vector3 center, float radius, float& t, Ray ray) { Vector3 oc = ray.origin - center; float a = ray.direction.dot(ray.direction); float B = 2.0 * oc.dot(ray.direction); float c = oc.dot(oc) - radius * radius; float discriminant = B * B - 4 * a * c; if (discriminant >= 0.0) { float numerator = -B - sqrtf(discriminant); if (numerator > 0.0) { float dist = numerator / (2.0 * a); t = dist; return true; } } return false; } bool Raycast(float& t, Ray ray, std::vector triangles) { float curT = t; for (Triangle triangle : triangles) { float temp; if (ray_triangle(triangle, temp, ray)) { if (temp < t) { t = temp; } } } if (t < curT) { return true; } return false; } __device__ bool device_raycast(Triangle* triangles, Ray ray, Triangle& out_tri, float &out_t, float* boundings, int bounding_count) { int excluded[100]; int excluded_count = 0; while (excluded_count != bounding_count) { float t2 = 10000.0f; int start_tri = -1; int end_tri = -1; int b_idx2 = 0; for (int b_idx = 0;b_idx < bounding_count;b_idx++) { Vector3 min_point(boundings[b_idx * 8], boundings[b_idx * 8 + 1], boundings[b_idx * 8 + 2]); Vector3 max_point(boundings[b_idx * 8 + 3], boundings[b_idx * 8 + 4], boundings[b_idx * 8 + 5]); float temp; if (ray_aabb(ray, temp, Vector2>(min_point, max_point))) { if (temp < t2) { bool should_continue = false; for (int j = 0;j < excluded_count;j++) { if (b_idx == excluded[j]) should_continue = true; } if (should_continue) continue; start_tri = (int)boundings[b_idx * 8 + 6]; end_tri = (int)boundings[b_idx * 8 + 7]; t2 = temp; b_idx2 = b_idx; } } } if (start_tri == -1) { return false; } float t = 10000.0f; Triangle triangle; for (int t_idx = start_tri;t_idx < end_tri + 1;t_idx++) { Triangle triangle2 = triangles[t_idx]; float dist; if (ray_triangle(triangle2, dist, ray)) { if (dist < t) { t = dist; triangle = triangle2; } } } if (t == 10000.0f) { excluded[excluded_count] = b_idx2; excluded_count++; } else { out_tri = triangle; out_t = t; return true; } } } } class Light { public: Vector3 pos; Vector3 color; float intensity; __host__ __device__ Light(Vector3 pos, Vector3 color, float intensity) { this->pos = pos; this->color = color; this->intensity = intensity; } __host__ __device__ Light() { // Default } }; class Camera { public: Vector3 pos; Vector2 rot; Camera(Vector3 pos, Vector2 rot) { this->pos = pos; this->rot = rot; } Camera() { // Default } }; namespace Wrappers { class render_result_wrapper { public: int* r; int* g; int* b; }; } namespace Rendering { __global__ void kernel_render_screen(Triangle* triangles, int triangles_count, float* boundings, int bounding_count, Camera camera, Light* lights, int light_count, Vector3 screen_background, Vector2 screen_resolution, Texture2D* textures, int* out_r, int* out_g, int* out_b) { float i = (float)threadIdx.x; float j = (float)blockIdx.x; Ray ray; ray.origin = camera.pos; ray.direction = Vector3(0.5f, -(i / screen_resolution.x - 0.5f), (j / screen_resolution.y - 0.5f)).normalize(); float SINF = sinf(camera.rot.x); float COSF = cosf(camera.rot.x); float output2[1][3] = { { 0, 0, 0 } }; float input1[1][3]; float input2[3][3]; input2[0][0] = COSF; input2[0][1] = -SINF; input2[0][2] = 0; input2[1][0] = SINF; input2[1][1] = COSF; input2[1][2] = 0; input2[2][0] = 0; input2[2][1] = 0; input2[2][2] = 1; input1[0][0] = ray.direction.x; input1[0][1] = ray.direction.y; input1[0][2] = ray.direction.z; for (int _ = 0;_ < 1;_++) for (int Y = 0;Y < 3;Y++) for (int k = 0;k < 3;k++) { output2[_][Y] += input1[_][k] * input2[k][Y]; } ray.direction = Vector3((float)output2[0][0], (float)output2[0][1], (float)output2[0][2]); SINF = sinf(camera.rot.y); COSF = cosf(camera.rot.y); float output23[1][3] = { { 0, 0, 0 } }; input2[0][0] = COSF; input2[0][1] = 0; input2[0][2] = SINF; input2[1][0] = 0; input2[2][0] = -SINF; input2[1][1] = 1; input2[1][2] = 0; input2[2][1] = 0; input2[2][2] = COSF; input1[0][0] = ray.direction.x; input1[0][1] = ray.direction.y; input1[0][2] = ray.direction.z; for (int _ = 0;_ < 1;_++) for (int Y = 0;Y < 3;Y++) for (int k = 0;k < 3;k++) { output23[_][Y] += input1[_][k] * input2[k][Y]; } ray.direction = Vector3((float)output23[0][0], (float)output23[0][1], (float)output23[0][2]); ray.color = screen_background; bool reflected = false; bool glass = false; for (int depth_idx = 0;depth_idx < 3;depth_idx++) { float t = 10000.0f; Triangle out_tri; if (Intersections::device_raycast(triangles, ray, out_tri, t, boundings, bounding_count)) { Vector3 intersection = ray.origin + Vector3(t) * ray.direction; float u, v, w; LinAlg::barycentric(intersection, out_tri.vertices[0], out_tri.vertices[1], out_tri.vertices[2], u, v, w); if (out_tri.normal.dot((ray.origin - intersection).normalize()) > 0.0f) out_tri.normal = out_tri.normal * Vector3(-1.0f); if (out_tri.textured && textures[out_tri.texture_idx].normal) { float tex_color[3] = { textures[out_tri.texture_idx].normal_data[(int)(u * 256)][(int)(w * 128)][0], textures[out_tri.texture_idx].normal_data[(int)(u * 256)][(int)(w * 128)][1], textures[out_tri.texture_idx].normal_data[(int)(u * 256)][(int)(w * 128)][2] }; Vector3 tex_vector(tex_color[0], tex_color[1], tex_color[2]); float d = fast_inverse_sqrt(out_tri.normal.x * out_tri.normal.x + out_tri.normal.y * out_tri.normal.y), float d2 = fast_inverse_sqrt(tex_vector.x * tex_vector.x + tex_vector.y * tex_vector.y); float phi = atanf(out_tri.normal.y / out_tri.normal.x), theta = out_tri.normal.z; float phi2 = atanf(tex_vector.y / tex_vector.x), theta2 = tex_vector.z; phi2 += phi; theta2 += theta; d2 += d; out_tri.normal = Vector3( d2 * cosf(phi2), d2 * sinf(phi2), tex_vector.z + out_tri.normal.z ); } if (out_tri.normal.dot((ray.origin - intersection).normalize()) > 0.0f) out_tri.normal = out_tri.normal * Vector3(-1.0f); out_tri.normal = out_tri.normal.normalize(); float avg_lums = 0.0f; Vector3 avg_col; for (int light_idx = 0;light_idx < light_count;light_idx++) { Vector3 itol = (lights[light_idx].pos - intersection); float itol_mag = itol.magnitude(); float intensity = max(-itol.normalize().dot(out_tri.normal), 0.0f); intensity *= lights[light_idx].intensity / itol_mag; avg_col = avg_col + Vector3(lights[light_idx].color.x * intensity, lights[light_idx].color.y * intensity, lights[light_idx].color.z * intensity); Ray ray_to_sun(intersection, (lights[light_idx].pos - intersection).normalize(), ray.color); ray_to_sun.origin = ray_to_sun.origin + ray_to_sun.direction * Vector3(0.001f); int shadowHit = 1; float t2 = itol_mag; float temp; Triangle tri_temp; if (Intersections::device_raycast(triangles, ray_to_sun, tri_temp, temp, boundings, bounding_count)) { if (temp < t2) shadowHit = 0; } avg_lums += intensity * shadowHit; } avg_col = avg_col.normalize(); if (out_tri.textured && textures[out_tri.texture_idx].albedo) { float tex_color[3] = { textures[out_tri.texture_idx].data[(int)(u * 512)][(int)(w * 512)][0], textures[out_tri.texture_idx].data[(int)(u * 512)][(int)(w * 512)][1], textures[out_tri.texture_idx].data[(int)(u * 512)][(int)(w * 512)][2] }; out_tri.color = Vector3(tex_color[0], tex_color[1], tex_color[2]); } Vector3 this_col = Vector3(min(out_tri.color.x * avg_col.x * avg_lums, 255.0f), min(out_tri.color.y * avg_col.y * avg_lums, 255.0f), min(out_tri.color.z * avg_col.z * avg_lums, 255.0f)); if (out_tri.reflective || reflected) { reflected = true; ray.direction = LinAlg::reflect(ray.direction, out_tri.normal); // + Vector3(curand_uniform(&rand_states[tid + depth_idx]) * 0.1f - 0.05f, curand_uniform(&rand_states[tid + depth_idx + 1]) * 0.1f - 0.05f, curand_uniform(&rand_states[tid + depth_idx + 2]) * 0.1f - 0.05f); ray.origin = intersection + ray.direction * Vector3(0.01f); if (depth_idx == 0) ray.color = this_col; else { ray.color = (Vector3(this_col.x * (1.0f - out_tri.reflective_index), this_col.y * (1.0f - out_tri.reflective_index), this_col.z * (1.0f - out_tri.reflective_index)) + Vector3(ray.color.x * out_tri.reflective_index, ray.color.y * out_tri.reflective_index, ray.color.z * out_tri.reflective_index)); } } else if (out_tri.glass || glass) { glass = true; ray.origin = intersection + ray.direction * Vector3(0.01f); if (depth_idx == 0) ray.color = this_col; else ray.color = (Vector3(this_col.x * 0.95f, this_col.y * 0.95f, this_col.z * 0.95f) + Vector3(ray.color.x * 0.05f, ray.color.y * 0.05f, ray.color.z * 0.05f)); } else { ray.color = this_col; break; } } else { ray.color = (Vector3(ray.color.x * 0.875f, ray.color.y * 0.875f, ray.color.z * 0.875f) + Vector3(screen_background.x * 0.125f, screen_background.y * 0.125f, screen_background.z * 0.125f)); } } out_r[(int)j + (int)i * screen_resolution.y] = ray.color.x; out_g[(int)j + (int)i * screen_resolution.y] = ray.color.y; out_b[(int)j + (int)i * screen_resolution.y] = ray.color.z; } int data_r[500 * 500]; int data_g[500 * 500]; int data_b[500 * 500]; Wrappers::render_result_wrapper render_screen(Triangle* triangles, int triangle_count, float* boundings, int bounding_count, Vector2 screen_resolution, Vector3 screen_background, int ray_depth, Camera camera, Light* lights, int light_count, Texture2D* textures, int texture_count) { Triangle* gpu_triangles = nullptr; cudaMalloc(&gpu_triangles, sizeof(Triangle) * triangle_count); cudaMemcpy(gpu_triangles, triangles, sizeof(Triangle) * triangle_count, cudaMemcpyHostToDevice); int* dev_outputR = nullptr; cudaMalloc(&dev_outputR, (int)screen_resolution.x * screen_resolution.y * sizeof(int)); cudaMemcpy(dev_outputR, data_r, (int)screen_resolution.x * screen_resolution.y * sizeof(int), cudaMemcpyHostToDevice); int* dev_outputG = nullptr; cudaMalloc(&dev_outputG, (int)screen_resolution.x * screen_resolution.y * sizeof(int)); cudaMemcpy(dev_outputG, data_g, (int)screen_resolution.x * screen_resolution.y * sizeof(int), cudaMemcpyHostToDevice); int* dev_outputB = nullptr; cudaMalloc(&dev_outputB, (int)screen_resolution.x * screen_resolution.y * sizeof(int)); cudaMemcpy(dev_outputB, data_b, (int)screen_resolution.x * screen_resolution.y * sizeof(int), cudaMemcpyHostToDevice); Light* dev_lights = nullptr; cudaMalloc(&dev_lights, sizeof(Light) * light_count); cudaMemcpy(dev_lights, lights, sizeof(Light) * light_count, cudaMemcpyHostToDevice); Texture2D* dev_textures = nullptr; cudaMalloc(&dev_textures, sizeof(Texture2D) * texture_count); cudaMemcpy(dev_textures, textures, sizeof(Texture2D) * texture_count, cudaMemcpyHostToDevice); float* dev_boundings; cudaMalloc(&dev_boundings, 8 * sizeof(float) * bounding_count); cudaMemcpy(dev_boundings, boundings, 8 * sizeof(float) * bounding_count, cudaMemcpyHostToDevice); float t = clock(); kernel_render_screen<<<(int)screen_resolution.y, (int)screen_resolution.x>>>(gpu_triangles, triangle_count, dev_boundings, bounding_count, camera, dev_lights, light_count, screen_background, Vector2(screen_resolution.x, screen_resolution.y), dev_textures, dev_outputR, dev_outputG, dev_outputB); cudaDeviceSynchronize(); std::cout << "RENDER FPS: " << (clock() - t) / CLOCKS_PER_SEC << " ERROR: " << cudaGetErrorString(cudaGetLastError()) << std::endl; Wrappers::render_result_wrapper wrapper; cudaMemcpy(data_r, dev_outputR, (int)screen_resolution.x * screen_resolution.y * sizeof(int), cudaMemcpyDeviceToHost); cudaMemcpy(data_g, dev_outputG, (int)screen_resolution.x * screen_resolution.y * sizeof(int), cudaMemcpyDeviceToHost); cudaMemcpy(data_b, dev_outputB, (int)screen_resolution.x * screen_resolution.y * sizeof(int), cudaMemcpyDeviceToHost); wrapper.r = data_r; wrapper.g = data_g; wrapper.b = data_b; cudaFree(dev_outputR); cudaFree(dev_outputG); cudaFree(dev_outputB); cudaFree(gpu_triangles); cudaFree(dev_lights); cudaFree(dev_textures); cudaFree(dev_boundings); return wrapper; } } class Scene { private: std::vector _scene; std::vector lights; Vector2 inv_resolution; std::vector boundary_data; public: cv::Mat canvas; Vector2 stretch_to; Camera camera; Vector3 color_background; std::vector textures; bool bloom = false; Scene(int CANVAS_TYPE, Vector2 resolution, Vector2 stretch_resolution_to, Camera camera, Vector3 color_background) { this->canvas = cv::Mat::zeros(cv::Size(resolution.x, resolution.y), CANVAS_TYPE); this->camera = camera; this->color_background = color_background; this->stretch_to = stretch_resolution_to; } void add_game_object(GameObject game_object) { this->_scene.push_back(game_object); } std::vector get_game_objects() { return this->_scene; } void set_game_object(int idx, GameObject new_object) { this->_scene[idx] = new_object; } void add_light(Light light) { this->lights.push_back(light); } std::vector get_lights() { return this->lights; } void set_light(const int idx, const Light new_light) { this->lights[idx] = new_light; } bool load_lights_from_object_file(std::string sFilename, Vector3 color, float intensity) { std::ifstream f(sFilename); if (!f.is_open()) return false; while (!f.eof()) { char line[128]; f.getline(line, 128); std::strstream s; s << line; char junk; if (line[0] == 'v') { Vector3 v; s >> junk >> v.x >> v.y >> v.z; this->lights.push_back(Light(v, color, intensity)); } } return true; } void load_gameobjects_from_vector(std::vector game_objects) { for (GameObject game_object : game_objects) { this->add_game_object(game_object); } } int find_gameobject_by_name(std::string name) { for (int i = 0;i < this->_scene.size();i++) { if (this->_scene[i].name == name) return i; } return -1; } void bake_boundary_data() { std::vector bounding_data; int triangle_counter = 0; for (GameObject& game_object : this->_scene) { std::vector game_object_triangles = game_object.get_triangles(); bounding_data.push_back(game_object.bounding.x.x); bounding_data.push_back(game_object.bounding.x.y); bounding_data.push_back(game_object.bounding.x.z); bounding_data.push_back(game_object.bounding.y.x); bounding_data.push_back(game_object.bounding.y.y); bounding_data.push_back(game_object.bounding.y.z); bounding_data.push_back((float)triangle_counter); bounding_data.push_back((float)triangle_counter + game_object_triangles.size() - 1); triangle_counter += game_object_triangles.size(); } this->boundary_data = bounding_data; } void render() { std::vector triangles; for (GameObject& game_object : this->_scene) { std::vector game_object_triangles = game_object.get_triangles(); triangles.insert(triangles.end(), game_object_triangles.begin(), game_object_triangles.end()); } Wrappers::render_result_wrapper col = Rendering::render_screen(triangles.data(), triangles.size(), this->boundary_data.data(), this->_scene.size(), Vector2(this->canvas.rows, this->canvas.cols), this->color_background, 1, this->camera, &this->lights[0], this->lights.size(), &this->textures[0], this->textures.size()); for (int y = 0;y < this->canvas.rows;y++) { for (int x = 0;x < this->canvas.cols;x++) { cv::Vec3b& at = this->canvas.at(y, x); at.val[0] = col.r[(x + y * this->canvas.cols)]; at.val[1] = col.g[(x + y * this->canvas.cols)]; at.val[2] = col.b[(x + y * this->canvas.cols)]; } } if (this->bloom) { cv::Mat gray; cv::cvtColor(this->canvas, gray, cv::COLOR_BGR2GRAY); cv::threshold(gray, gray, 200, 255, cv::THRESH_BINARY); cv::GaussianBlur(gray, gray, cv::Size(0, 0), 10); cv::cvtColor(gray, gray, cv::COLOR_GRAY2BGR); cv::addWeighted(this->canvas, 1, gray, 6, 0, this->canvas); } cv::GaussianBlur(this->canvas, this->canvas, cv::Size(0, 0), 1.0); cv::Mat output_canvas; cv::resize(this->canvas, output_canvas, cv::Size(this->stretch_to.x, this->stretch_to.y)); cv::imshow("Krov Engine", output_canvas); cv::waitKey(1); } }; namespace GenTypes { template __host__ __device__ T FILL_ZEROS(int idx) { return (T)0; } template __host__ __device__ T FILL_ONES(int idx) { return (T)1; } template __host__ __device__ T FILL_IDX(int idx) { return (T)idx; } } }