rendering Panda correctly
This commit is contained in:
@@ -2,5 +2,5 @@
|
||||
|
||||
- [x] ELF
|
||||
- [x] DOL
|
||||
- [ ] panda.dol
|
||||
- [x] panda.dol
|
||||
- [ ] libogc simple examples
|
||||
+1
-1
@@ -16,7 +16,7 @@ broadway::broadway() {
|
||||
void broadway::set_pc(ircolib::u32 value) { pc = value; }
|
||||
|
||||
void broadway::run(mem &mem) {
|
||||
for (int i = 0; i < 100000; i++) {
|
||||
for (int i = 0; i < 12150000; i++) {
|
||||
execute(fetch(mem), mem);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
# Object files
|
||||
*.o
|
||||
*.ko
|
||||
*.obj
|
||||
*.elf
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
*.pch
|
||||
|
||||
# Libraries
|
||||
*.lib
|
||||
*.a
|
||||
*.la
|
||||
*.lo
|
||||
|
||||
# Shared objects (inc. Windows DLLs)
|
||||
*.dll
|
||||
*.so
|
||||
*.so.*
|
||||
*.dylib
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
*.i*86
|
||||
*.x86_64
|
||||
*.hex
|
||||
|
||||
# Debug files
|
||||
*.dSYM/
|
||||
|
||||
# Build directory
|
||||
build/
|
||||
Vendored
-31
@@ -1,31 +0,0 @@
|
||||
cmake_minimum_required (VERSION 2.6)
|
||||
project (yuv_rgb)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror -Wall -Wextra -pedantic -std=c99")
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -pedantic -std=c99")
|
||||
|
||||
set(USE_FFMPEG FALSE CACHE BOOL "Enable ffmpeg")
|
||||
if(USE_FFMPEG)
|
||||
add_definitions(-DUSE_FFMPEG=1)
|
||||
endif(USE_FFMPEG)
|
||||
|
||||
set(USE_IPP FALSE CACHE BOOL "Enable IPP")
|
||||
if(USE_IPP)
|
||||
set(IPP_ROOT /opt/intel CACHE PATH "IPP install path")
|
||||
|
||||
include_directories(${IPP_ROOT}/ipp/include)
|
||||
link_directories(${IPP_ROOT}/ipp/lib/intel64)
|
||||
add_definitions(-DUSE_IPP=1)
|
||||
endif(USE_IPP)
|
||||
|
||||
include_directories ("${PROJECT_SOURCE_DIR}")
|
||||
add_executable(test_yuv_rgb test_yuv_rgb.c yuv_rgb.c)
|
||||
|
||||
if(USE_FFMPEG)
|
||||
target_link_libraries(test_yuv_rgb swscale)
|
||||
endif(USE_FFMPEG)
|
||||
|
||||
if(USE_IPP)
|
||||
target_link_libraries(test_yuv_rgb ippcc)
|
||||
endif(USE_IPP)
|
||||
|
||||
Vendored
-27
@@ -1,27 +0,0 @@
|
||||
Copyright (c) 2016, Adrien Descamps
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of yuv2rgb nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
Vendored
-62
@@ -1,62 +0,0 @@
|
||||
# yuv2rgb
|
||||
C library for fast image conversion between yuv420p and rgb24.
|
||||
|
||||
This is a simple library for optimized image conversion between YUV420p and rgb24.
|
||||
It was done mainly as an exercise to learn to use sse instrinsics, so there may still be room for optimization.
|
||||
|
||||
For each conversion, a standard c optimized function and two sse function (with aligned and unaligned memory) are implemented.
|
||||
The sse version requires only SSE2, which is available on any reasonnably recent CPU.
|
||||
The library also supports the three different YUV (YCrCb to be correct) color spaces that exist (see comments in code), and others can be added simply.
|
||||
|
||||
There is a simple test program, that convert a raw YUV file to rgb ppm format, and measure computation time.
|
||||
Optionnaly, it also compares the result and computation time with the ffmpeg implementation (that uses MMX), and with the IPP functions.
|
||||
|
||||
To compile, simply do :
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release ..
|
||||
make
|
||||
|
||||
The test program only support raw YUV files for the YUV420 format, and ppm for the RGB24 format.
|
||||
To generate a raw yuv file, you can use avconv:
|
||||
|
||||
avconv -i example.jpg -c:v rawvideo -pix_fmt yuv420p example.yuv
|
||||
|
||||
To generate the rgb file, you can use the ImageMagick convert program:
|
||||
|
||||
convert example.jpg example.ppm
|
||||
|
||||
Then, for YUV420 to RGB24 conversion, use the test program like that:
|
||||
|
||||
./test_yuv_rgb yuv2rgb image.yuv 4096 2160 image
|
||||
|
||||
The second and third parameters are image width and height (that are needed because not available in the raw YUV file), and fourth parameter is the output filename template (several output files will be generated, named for example output_sse.ppm, output_av.ppm, etc.)
|
||||
|
||||
Similarly, for RGB24 to YUV420 conversion:
|
||||
|
||||
./test_yuv_rgb rgb2yuv image.ppm image
|
||||
|
||||
On my computer, the test program on a 4K image give the following for yuv2rgb:
|
||||
|
||||
Time will be measured in each configuration for 100 iterations...
|
||||
Processing time (std) : 2.630193 sec
|
||||
Processing time (sse2_unaligned) : 0.704394 sec
|
||||
Processing time (ffmpeg_unaligned) : 1.221432 sec
|
||||
Processing time (ipp_unaligned) : 0.636274 sec
|
||||
Processing time (sse2_aligned) : 0.606648 sec
|
||||
Processing time (ffmpeg_aligned) : 1.227100 sec
|
||||
Processing time (ipp_aligned) : 0.636951 sec
|
||||
|
||||
And for rgb2yuv:
|
||||
|
||||
Time will be measured in each configuration for 100 iterations...
|
||||
Processing time (std) : 2.588675 sec
|
||||
Processing time (sse2_unaligned) : 0.676625 sec
|
||||
Processing time (ffmpeg_unaligned) : 3.385816 sec
|
||||
Processing time (ipp_unaligned) : 0.593890 sec
|
||||
Processing time (sse2_aligned) : 0.640630 sec
|
||||
Processing time (ffmpeg_aligned) : 3.397952 sec
|
||||
Processing time (ipp_aligned) : 0.579043 sec
|
||||
|
||||
configuration : gcc 4.9.2, swscale 3.0.0, IPP 9.0.1, intel i7-5500U
|
||||
Vendored
BIN
Binary file not shown.
Vendored
-1
File diff suppressed because one or more lines are too long
Vendored
BIN
Binary file not shown.
Vendored
-1
File diff suppressed because one or more lines are too long
Vendored
BIN
Binary file not shown.
Vendored
-1
File diff suppressed because one or more lines are too long
Vendored
-45
@@ -1,45 +0,0 @@
|
||||
Just to document the convertion between rgb24 and planar rgb..
|
||||
|
||||
R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5
|
||||
G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 R10 G10
|
||||
B10 R11 G11 B11 R12 G12 B12 R13 G13 B13 R14 G14 B14 R15 G15 B15
|
||||
R16 G16 B16 R17 G17 B17 R18 G18 B18 R19 G19 B19 R20 G20 B20 R21
|
||||
G21 B21 R22 G22 B22 R23 G23 B23 R24 G24 B24 R25 G25 B25 R26 G26
|
||||
B26 R27 G27 B27 R28 G28 B28 R29 G29 B29 R30 G30 B30 R31 G31 B31
|
||||
|
||||
R0 R16 G0 G16 B0 B16 R1 R17 G1 G17 B1 B17 R2 R18 G2 G18
|
||||
B2 B18 R3 R19 G3 G19 B3 B19 R4 R20 G4 G20 B4 B20 R5 R21
|
||||
G5 G21 B5 B21 R6 R22 G6 G22 B6 B22 R7 R23 G7 G23 B7 B23
|
||||
R8 R24 G8 G24 B8 B24 R9 R25 G9 G25 B9 B25 R10 R26 G10 G26
|
||||
B10 B26 R11 R27 G11 G27 B11 B27 R12 R28 G12 G28 B12 B28 R13 R29
|
||||
G13 G29 B13 B29 R14 R30 G14 G30 B14 B30 R15 R31 G15 G31 B15 B31
|
||||
|
||||
R0 R8 R16 R24 G0 G8 G16 G24 B0 B8 B16 B24 R1 R9 R17 R25
|
||||
G1 G9 G17 G25 B1 B9 B17 B25 R2 R10 R18 R26 G2 G10 G18 G26
|
||||
B2 B10 B18 B26 R3 R11 R19 R27 G3 G11 G19 G27 B3 B11 B19 B27
|
||||
R4 R12 R20 R28 G4 G12 G20 G28 B4 B12 B20 B28 R5 R13 R21 R29
|
||||
G5 G13 G21 G29 B5 B13 B21 B29 R6 R14 R22 R30 G6 G14 G22 G30
|
||||
B6 B14 B22 B30 R7 R15 R23 R31 G7 G15 G23 G31 B7 B15 B23 B31
|
||||
|
||||
R0 R4 R8 R12 R16 R20 R24 R28 G0 G4 G8 G12 G16 G20 G24 G28
|
||||
B0 B4 B8 B12 B16 B20 B24 B28 R1 R5 R9 R13 R17 R21 R25 R29
|
||||
G1 G5 G9 G13 G17 G21 G25 G29 B1 B5 B9 B13 B17 B21 B25 B29
|
||||
R2 R6 R10 R14 R18 R22 R26 R30 G2 G6 G10 G14 G18 G22 G26 G30
|
||||
B2 B6 B10 B14 B18 B22 B26 B30 R3 R7 R11 R15 R19 R23 R27 R31
|
||||
G3 G7 G11 G15 G19 G23 G27 G31 B3 B7 B11 B15 B19 B23 B27 B31
|
||||
|
||||
R0 R2 R4 R6 R8 R10 R12 R14 R16 R18 R20 R22 R24 R26 R28 R30
|
||||
G0 G2 G4 G6 G8 G10 G12 G14 G16 G18 G20 G22 G24 G26 G28 G30
|
||||
B0 B2 B4 B6 B8 B10 B12 B14 B16 B18 B20 B22 B24 B26 B28 B30
|
||||
R1 R3 R5 R7 R9 R11 R13 R15 R17 R19 R21 R23 R25 R27 R29 R31
|
||||
G1 G3 G5 G7 G9 G11 G13 G15 G17 G19 G21 G23 G25 G27 G29 G31
|
||||
B1 B3 B5 B7 B9 B11 B13 B15 B17 B19 B21 B23 B25 B27 B29 B31
|
||||
|
||||
R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
|
||||
R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 R30 R31
|
||||
G0 G1 G2 G3 G4 G5 G6 G7 G8 G9 G10 G11 G12 G13 G14 G15
|
||||
G16 G17 G18 G19 G20 G21 G22 G23 G24 G25 G26 G27 G28 G29 G30 G31
|
||||
B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 B10 B11 B12 B13 B14 B15
|
||||
B16 B17 B18 B19 B20 B21 B22 B23 B24 B25 B26 B27 B28 B29 B30 B31
|
||||
|
||||
|
||||
Vendored
-54
@@ -1,54 +0,0 @@
|
||||
Just to document the convertion between rgb32 and planar rgb..
|
||||
|
||||
R0 G0 B0 A0 R1 G1 B1 A1 R2 G2 B2 A2 R3 G3 B3 A3
|
||||
R4 G4 B4 A4 R5 G5 B5 A5 R6 G6 B6 A6 R7 G7 B7 A7
|
||||
R8 G8 B8 A8 R9 G9 B9 A9 R10 G10 B10 A10 R11 G11 B11 A11
|
||||
R12 G12 B12 A12 R13 G13 B13 A13 R14 G14 B14 A14 R15 G15 B15 A15
|
||||
R16 G16 B16 A16 R17 G17 B17 A17 R18 G18 B18 A18 R19 G19 B19 A19
|
||||
R20 G20 B20 A20 R21 G21 B21 A21 R22 G22 B22 A22 R23 G23 B23 A23
|
||||
R24 G24 B24 A24 R25 G25 B25 A25 R26 G26 B26 A26 R27 G27 B27 A27
|
||||
R28 G28 B28 A28 R29 G29 B29 A29 R30 G30 B30 A30 R31 G31 B31 A31
|
||||
|
||||
R0 R16 G0 G16 B0 B16 A0 A16 R1 R17 G1 G17 B1 B17 A1 A17
|
||||
R2 R18 G2 G18 B2 B18 A2 A18 R3 R19 G3 G19 B3 B19 A3 A19
|
||||
R4 R20 G4 G20 B4 B20 A4 A20 R5 R21 G5 G21 B5 B21 A5 A21
|
||||
R6 R22 G6 G22 B6 B22 A6 A22 R7 R23 G7 G23 B7 B23 A7 A23
|
||||
R8 R24 G8 G24 B8 B24 A8 A24 R9 R25 G9 G25 B9 B25 A9 A25
|
||||
R10 R26 G10 G26 B10 B26 A10 A26 R11 R27 G11 G27 B11 B27 A11 A27
|
||||
R12 R28 G12 G28 B12 B28 A12 A28 R13 R29 G13 G29 B13 B29 A13 A29
|
||||
R14 R30 G14 G30 B14 B30 A14 A30 R15 R31 G15 G31 B15 B31 A15 A31
|
||||
|
||||
R0 R8 R16 R24 G0 G8 G16 G24 B0 B8 B16 B24 A0 A8 A16 A24
|
||||
R1 R9 R17 R25 G1 G9 G17 G25 B1 B9 B17 B25 A1 A9 A17 A25
|
||||
R2 R10 R18 R26 G2 G10 G18 G26 B2 B10 B18 B26 A2 A10 A18 A26
|
||||
R3 R11 R19 R27 G3 G11 G19 G27 B3 B11 B19 B27 A3 A11 A19 A27
|
||||
R4 R12 R20 R28 G4 G12 G20 G28 B4 B12 B20 B28 A4 A12 A20 A28
|
||||
R5 R13 R21 R29 G5 G13 G21 G29 B5 B13 B21 B29 A5 A13 A21 A29
|
||||
R6 R14 R22 R30 G6 G14 G22 G30 B6 B14 B22 B30 A6 A14 A22 A30
|
||||
R7 R15 R23 R31 G7 G15 G23 G31 B7 B15 B23 B31 A7 A15 A23 A31
|
||||
|
||||
R0 R4 R8 R12 R16 R20 R24 R28 G0 G4 G8 G12 G16 G20 G24 G28
|
||||
B0 B4 B8 B12 B16 B20 B24 B28 A0 A4 A8 A12 A16 A20 A24 A28
|
||||
R1 R5 R9 R13 R17 R21 R25 R29 G1 G5 G9 G13 G17 G21 G25 G29
|
||||
B1 B5 B9 B13 B17 B21 B25 B29 A1 A5 A9 A13 A17 A21 A25 A29
|
||||
R2 R6 R10 R14 R18 R22 R26 R30 G2 G6 G10 G14 G18 G22 G26 G30
|
||||
B2 B6 B10 B14 B18 B22 B26 B30 A2 A6 A10 A14 A18 A22 A26 A30
|
||||
R3 R7 R11 R15 R19 R23 R27 R31 G3 G7 G11 G15 G19 G23 G27 G31
|
||||
B3 B7 B11 B15 B19 B23 B27 B31 A3 A7 A11 A15 A19 A23 A27 A31
|
||||
|
||||
R0 R2 R4 R6 R8 R10 R12 R14 R16 R18 R20 R22 R24 R26 R28 R30
|
||||
G0 G2 G4 G6 G8 G10 G12 G14 G16 G18 G20 G22 G24 G26 G28 G30
|
||||
B0 B2 B4 B6 B8 B10 B12 B14 B16 B18 B20 B22 B24 B26 B28 B30
|
||||
A0 A2 A4 A6 A8 A10 A12 A14 A16 A18 A20 A22 A24 A26 A28 A30
|
||||
R1 R3 R5 R7 R9 R11 R13 R15 R17 R19 R21 R23 R25 R27 R29 R31
|
||||
G1 G3 G5 G7 G9 G11 G13 G15 G17 G19 G21 G23 G25 G27 G29 G31
|
||||
B1 B3 B5 B7 B9 B11 B13 B15 B17 B19 B21 B23 B25 B27 B29 B31
|
||||
A1 A3 A5 A7 A9 A11 A13 A15 A17 A19 A21 A23 A25 A27 A29 A31
|
||||
|
||||
R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
|
||||
R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 R30 R31
|
||||
G0 G1 G2 G3 G4 G5 G6 G7 G8 G9 G10 G11 G12 G13 G14 G15
|
||||
G16 G17 G18 G19 G20 G21 G22 G23 G24 G25 G26 G27 G28 G29 G30 G31
|
||||
B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 B10 B11 B12 B13 B14 B15
|
||||
B16 B17 B18 B19 B20 B21 B22 B23 B24 B25 B26 B27 B28 B29 B30 B31
|
||||
|
||||
Vendored
-623
@@ -1,623 +0,0 @@
|
||||
// Copyright 2016 Adrien Descamps
|
||||
// Distributed under BSD 3-Clause License
|
||||
|
||||
// This program demonstrate how to convert a YUV420p image (raw format) to RGB (ppm format), and the reverse operation
|
||||
|
||||
#include "yuv_rgb.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
#if USE_FFMPEG
|
||||
#include <libswscale/swscale.h>
|
||||
#endif
|
||||
#if USE_IPP
|
||||
#include <ippcc.h>
|
||||
#endif
|
||||
|
||||
// read a raw yuv image file
|
||||
// raw yuv files can be generated by ffmpeg, for example, using :
|
||||
// ffmpeg -i test.png -c:v rawvideo -pix_fmt yuv420p test.yuv
|
||||
// the returned image channels are contiguous, and Y stride=width, U and V stride=width/2
|
||||
// memory must be freed with free
|
||||
int readRawYUV(const char *filename, uint32_t width, uint32_t height, uint8_t **YUV)
|
||||
{
|
||||
FILE *fp = fopen(filename, "rb");
|
||||
if(!fp)
|
||||
{
|
||||
perror("Error opening yuv image for read");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// check file size
|
||||
fseek(fp, 0, SEEK_END);
|
||||
uint32_t size = ftell(fp);
|
||||
if(size!=(width*height + 2*((width+1)/2)*((height+1)/2)))
|
||||
{
|
||||
fprintf(stderr, "Wrong size of yuv image : %d bytes, expected %d bytes\n", size, (width*height + 2*((width+1)/2)*((height+1)/2)));
|
||||
fclose(fp);
|
||||
return 2;
|
||||
}
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
|
||||
*YUV = malloc(size);
|
||||
size_t result = fread(*YUV, 1, size, fp);
|
||||
if (result != size) {
|
||||
perror("Error reading yuv image");
|
||||
fclose(fp);
|
||||
return 3;
|
||||
}
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// write a raw yuv image file
|
||||
int saveRawYUV(const char *filename, uint32_t width, uint32_t height, const uint8_t *YUV, size_t y_stride, size_t uv_stride)
|
||||
{
|
||||
FILE *fp = fopen(filename, "wb");
|
||||
if(!fp)
|
||||
{
|
||||
perror("Error opening yuv image for write");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(y_stride==width)
|
||||
{
|
||||
fwrite(YUV, 1, width*height, fp);
|
||||
YUV+=width*height;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(uint32_t y=0; y<height; ++y)
|
||||
{
|
||||
fwrite(YUV, 1, width, fp);
|
||||
YUV+=y_stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if(uv_stride==(width+1/2))
|
||||
{
|
||||
fwrite(YUV, 1, ((width+1)/2)*((height+1)/2)*2, fp);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(uint32_t y=0; y<((height+1)/2); ++y)
|
||||
{
|
||||
fwrite(YUV, 1, ((width+1)/2), fp);
|
||||
YUV+=uv_stride;
|
||||
}
|
||||
|
||||
for(uint32_t y=0; y<((height+1)/2); ++y)
|
||||
{
|
||||
fwrite(YUV, 1, ((width+1)/2), fp);
|
||||
YUV+=uv_stride;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// read a ppm binary image file
|
||||
// memory must be freed with free
|
||||
int readPPM(const char* filename, uint32_t *width, uint32_t *height, uint8_t **RGB)
|
||||
{
|
||||
FILE *fp = fopen(filename, "rb");
|
||||
if(!fp)
|
||||
{
|
||||
perror("Error opening rgb image for read");
|
||||
return 1;
|
||||
}
|
||||
|
||||
char magic[3];
|
||||
size_t result = fread(magic, 1, 2, fp);
|
||||
magic[2]='\0';
|
||||
if(result!=2 || strcmp(magic,"P6")!=0)
|
||||
{
|
||||
perror("Error reading rgb image header, or invalid format");
|
||||
fclose(fp);
|
||||
return 3;
|
||||
}
|
||||
|
||||
uint32_t max;
|
||||
result = fscanf(fp, " %u %u %u ", width, height, &max);
|
||||
if(result!=3 || max>255)
|
||||
{
|
||||
perror("Error reading rgb image header, or invalid values");
|
||||
fclose(fp);
|
||||
return 3;
|
||||
}
|
||||
|
||||
size_t size = 3*(*width)*(*height);
|
||||
*RGB = malloc(size);
|
||||
if(!*RGB)
|
||||
{
|
||||
perror("Error allocating rgb image memory");
|
||||
fclose(fp);
|
||||
return 2;
|
||||
}
|
||||
|
||||
result = fread(*RGB, 1, size, fp);
|
||||
if(result != size)
|
||||
{
|
||||
perror("Error reading rgb image");
|
||||
fclose(fp);
|
||||
return 3;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// save a rgb image to ppm binary format
|
||||
int savePPM(const char* filename, uint32_t width, uint32_t height, const uint8_t *RGB, size_t stride)
|
||||
{
|
||||
FILE *fp = fopen(filename, "wb");
|
||||
if(!fp)
|
||||
{
|
||||
perror("Error opening rgb image for write");
|
||||
return 1;
|
||||
}
|
||||
|
||||
fprintf(fp, "P6 %u %u 255\n", width, height);
|
||||
if(stride==(3*width))
|
||||
{
|
||||
fwrite(RGB, 1, 3*width*height, fp);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(uint32_t i=0; i<height; ++i)
|
||||
{
|
||||
fwrite(RGB+i*stride, 1, 3*width, fp);
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void convert_rgb_to_rgba(const uint8_t *RGB, uint32_t width, uint32_t height, uint8_t **RGBA)
|
||||
{
|
||||
*RGBA = malloc(4*width*height);
|
||||
for(uint32_t y=0; y<height; ++y)
|
||||
{
|
||||
for(uint32_t x=0; x<width; ++x)
|
||||
{
|
||||
(*RGBA)[(y*width+x)*4] = RGB[(y*width+x)*3];
|
||||
(*RGBA)[(y*width+x)*4+1] = RGB[(y*width+x)*3+1];
|
||||
(*RGBA)[(y*width+x)*4+2] = RGB[(y*width+x)*3+2];
|
||||
(*RGBA)[(y*width+x)*4+3] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
RGB2YUV,
|
||||
YUV2RGB,
|
||||
YUV2RGB_NV12,
|
||||
YUV2RGB_NV21,
|
||||
RGBA2YUV
|
||||
} Mode;
|
||||
|
||||
typedef void (*yuv2rgb_ptr)(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
typedef void (*yuvsp2rgb_ptr)(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
typedef void (*rgb2yuv_ptr)(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *rgb, uint32_t rgb_stride,
|
||||
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
|
||||
// call yuv2rgb conversion function, time it and save result
|
||||
void test_yuv2rgb(uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride, YCbCrType yuv_type,
|
||||
const char *file, const char *name, uint32_t iteration_number, const yuv2rgb_ptr yuv2rgb_fun)
|
||||
{
|
||||
clock_t t = clock();
|
||||
for(uint32_t i=0;i<iteration_number; ++i)
|
||||
yuv2rgb_fun(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
|
||||
t = clock()-t;
|
||||
printf("Processing time (%s) : %f sec\n", name, ((float)t)/CLOCKS_PER_SEC);
|
||||
|
||||
char *out_filename = malloc(strlen(file)+strlen(name)+6);
|
||||
strcpy(out_filename, file);
|
||||
strcat(out_filename, "_");
|
||||
strcat(out_filename, name);
|
||||
strcat(out_filename, ".ppm");
|
||||
savePPM(out_filename, width, height, rgb, rgb_stride);
|
||||
free(out_filename);
|
||||
}
|
||||
|
||||
// call yuv2rgb semi planar conversion function, time it and save result
|
||||
void test_yuvsp2rgb(uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride, YCbCrType yuv_type,
|
||||
const char *file, const char *name, uint32_t iteration_number, const yuvsp2rgb_ptr yuv2rgb_fun)
|
||||
{
|
||||
clock_t t = clock();
|
||||
for(uint32_t i=0;i<iteration_number; ++i)
|
||||
yuv2rgb_fun(width, height, y, uv, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
|
||||
t = clock()-t;
|
||||
printf("Processing time (%s) : %f sec\n", name, ((float)t)/CLOCKS_PER_SEC);
|
||||
|
||||
char *out_filename = malloc(strlen(file)+strlen(name)+6);
|
||||
strcpy(out_filename, file);
|
||||
strcat(out_filename, "_");
|
||||
strcat(out_filename, name);
|
||||
strcat(out_filename, ".ppm");
|
||||
savePPM(out_filename, width, height, rgb, rgb_stride);
|
||||
free(out_filename);
|
||||
}
|
||||
|
||||
|
||||
// call rgb2yuv conversion function, time it and save result
|
||||
void test_rgb2yuv(uint32_t width, uint32_t height,
|
||||
const uint8_t *rgb, uint32_t rgb_stride,
|
||||
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type,
|
||||
const char *file, const char *name, uint32_t iteration_number, const rgb2yuv_ptr rgb2yuv_fun)
|
||||
{
|
||||
clock_t t = clock();
|
||||
for(uint32_t i=0;i<iteration_number; ++i)
|
||||
rgb2yuv_fun(width, height, rgb, rgb_stride, y, u, v, y_stride, uv_stride, yuv_type);
|
||||
t = clock()-t;
|
||||
printf("Processing time (%s) : %f sec\n", name, ((float)t)/CLOCKS_PER_SEC);
|
||||
|
||||
char *out_filename = malloc(strlen(file)+strlen(name)+6);
|
||||
strcpy(out_filename, file);
|
||||
strcat(out_filename, "_");
|
||||
strcat(out_filename, name);
|
||||
strcat(out_filename, ".yuv");
|
||||
saveRawYUV(out_filename, width, height, y, y_stride, uv_stride);
|
||||
free(out_filename);
|
||||
}
|
||||
|
||||
// equivalent conversion functions for external libraries
|
||||
|
||||
#if USE_FFMPEG
|
||||
static struct SwsContext *yuv2rgb_swscale_ctx = NULL;
|
||||
static struct SwsContext *rgb2yuv_swscale_ctx = NULL;
|
||||
|
||||
void yuv420_rgb24_ffmpeg(uint32_t __attribute__ ((unused)) width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType __attribute__ ((unused)) yuv_type)
|
||||
{
|
||||
const uint8_t *const inData[3] = {y, u, v};
|
||||
int inLinesize[3] = {y_stride, uv_stride, uv_stride};
|
||||
int outLinesize[1] = {rgb_stride};
|
||||
sws_scale(yuv2rgb_swscale_ctx, inData, inLinesize, 0, height, &rgb, outLinesize);
|
||||
}
|
||||
|
||||
void rgb24_yuv420_ffmpeg(uint32_t __attribute__ ((unused)) width, uint32_t height,
|
||||
const uint8_t *rgb, uint32_t rgb_stride,
|
||||
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
YCbCrType __attribute__ ((unused)) yuv_type)
|
||||
{
|
||||
int inLineSize[1] = {rgb_stride};
|
||||
int outLineSize[3] = {y_stride, uv_stride, uv_stride};
|
||||
uint8_t *const outData[3] = {y, u, v};
|
||||
sws_scale(rgb2yuv_swscale_ctx, &rgb, inLineSize, 0, height, outData, outLineSize);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if USE_IPP
|
||||
void yuv420_rgb24_ipp(uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType __attribute__ ((unused)) yuv_type)
|
||||
{
|
||||
const Ipp8u* pSrc[3] = {y, u, v};
|
||||
int srcStep[3] = {y_stride, uv_stride, uv_stride};
|
||||
Ipp8u* pDst = rgb;
|
||||
int dstStep = rgb_stride;
|
||||
IppiSize imgSize = {.width=width, .height=height};
|
||||
ippiYCbCr420ToRGB_8u_P3C3R(pSrc, srcStep, pDst, dstStep, imgSize);
|
||||
}
|
||||
|
||||
void rgb24_yuv420_ipp(uint32_t width, uint32_t height,
|
||||
const uint8_t *rgb, uint32_t rgb_stride,
|
||||
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
YCbCrType __attribute__ ((unused)) yuv_type)
|
||||
{
|
||||
const Ipp8u* pSrc = rgb;
|
||||
int srcStep = rgb_stride;
|
||||
Ipp8u* pDst[3] = {y, u, v};
|
||||
int dstStep[3] = {y_stride, uv_stride, uv_stride};
|
||||
IppiSize imgSize = {.width=width, .height=height};
|
||||
ippiRGBToYCbCr420_8u_C3P3R(pSrc, srcStep, pDst, dstStep, imgSize);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if(argc<4)
|
||||
{
|
||||
printf("Usage : test yuv2rgb <yuv image file> <image width> <image height> <output template filename>\n");
|
||||
printf("Or : test yuv2rgb_nv12 <yuv image file> <image width> <image height> <output template filename>\n");
|
||||
printf("Or : test yuv2rgb_nv21 <yuv image file> <image width> <image height> <output template filename>\n");
|
||||
printf("Or : test rgb2yuv <rgb24 binary ppm image file> <output template filename>\n");
|
||||
printf("Or : test rgba2yuv <rgb24 binary ppm image file> <output template filename>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
const int iteration_number = 100;
|
||||
printf("Time will be measured in each configuration for %d iterations...\n", iteration_number);
|
||||
const YCbCrType yuv_format = YCBCR_601;
|
||||
//const YCbCrType yuv_format = YCBCR_709;
|
||||
//const YCbCrType yuv_format = YCBCR_JPEG;
|
||||
|
||||
Mode mode;
|
||||
if(strcmp(argv[1], "yuv2rgb")==0)
|
||||
{
|
||||
mode=YUV2RGB;
|
||||
if(argc<6)
|
||||
{
|
||||
printf("Invalid argument number for yuv2rgb mode, call without argument to see usage.\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if(strcmp(argv[1], "yuv2rgb_nv12")==0)
|
||||
{
|
||||
mode=YUV2RGB_NV12;
|
||||
}
|
||||
else if(strcmp(argv[1], "yuv2rgb_nv21")==0)
|
||||
{
|
||||
mode=YUV2RGB_NV21;
|
||||
}
|
||||
else if(strcmp(argv[1], "rgb2yuv")==0)
|
||||
{
|
||||
mode=RGB2YUV;
|
||||
}
|
||||
else if(strcmp(argv[1], "rgba2yuv")==0)
|
||||
{
|
||||
mode=RGBA2YUV;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("Invalid mode, call without argument to see usage.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *filename = argv[2];
|
||||
uint32_t width, height;
|
||||
const char *out;
|
||||
uint8_t *YUV=NULL, *RGB=NULL, *Y=NULL, *U=NULL, *V=NULL, *RGBa=NULL, *YUVa=NULL, *Ya=NULL, *Ua=NULL, *Va=NULL;
|
||||
|
||||
if(mode==YUV2RGB || mode==YUV2RGB_NV12 || mode==YUV2RGB_NV21)
|
||||
{
|
||||
//parse argument line
|
||||
width = atoi(argv[3]);
|
||||
height = atoi(argv[4]);
|
||||
out = argv[5];
|
||||
|
||||
// read input data and allocate output data
|
||||
if(readRawYUV(filename, width, height, &YUV)!=0)
|
||||
{
|
||||
printf("Error reading image file, check that the file exists and has the correct format and resolution.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if USE_FFMPEG
|
||||
yuv2rgb_swscale_ctx = sws_getContext(width, height, AV_PIX_FMT_YUV420P, width, height, AV_PIX_FMT_RGB24, 0, 0, 0, 0);
|
||||
#endif
|
||||
|
||||
RGB = malloc(3*width*height);
|
||||
|
||||
Y = YUV;
|
||||
U = YUV+width*height;
|
||||
V = YUV+width*height+((width+1)/2)*((height+1)/2);
|
||||
|
||||
// allocate aligned data
|
||||
const size_t y_stride = width + (16-width%16)%16;
|
||||
const size_t uv_stride = (mode==YUV2RGB) ? (width+1)/2 + (16-((width+1)/2)%16)%16 : y_stride;
|
||||
const size_t rgb_stride = width*3 +(16-(3*width)%16)%16;
|
||||
|
||||
const size_t y_size = y_stride*height, uv_size = uv_stride*((height+1)/2);
|
||||
YUVa = _mm_malloc(y_size+2*uv_size, 16);
|
||||
Ya = YUVa;
|
||||
Ua = YUVa+y_size;
|
||||
Va = YUVa+y_size+uv_size;
|
||||
for(unsigned int i=0; i<height; ++i)
|
||||
{
|
||||
memcpy(Ya+i*y_stride, Y+i*width, width);
|
||||
if((i%2)==0)
|
||||
{
|
||||
if(mode==YUV2RGB)
|
||||
{
|
||||
memcpy(Ua+(i/2)*uv_stride, U+(i/2)*((width+1)/2), (width+1)/2);
|
||||
memcpy(Va+(i/2)*uv_stride, V+(i/2)*((width+1)/2), (width+1)/2);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(Ua+(i/2)*uv_stride, U+(i/2)*width, width);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RGBa = _mm_malloc(rgb_stride*height, 16);
|
||||
|
||||
// test all versions
|
||||
if(mode==YUV2RGB)
|
||||
{
|
||||
test_yuv2rgb(width, height, Y, U, V, width, (width+1)/2, RGB, width*3, yuv_format,
|
||||
out, "std", iteration_number, yuv420_rgb24_std);
|
||||
test_yuv2rgb(width, height, Y, U, V, width, (width+1)/2, RGB, width*3, yuv_format,
|
||||
out, "sse2_unaligned", iteration_number, yuv420_rgb24_sseu);
|
||||
#if USE_FFMPEG
|
||||
test_yuv2rgb(width, height, Y, U, V, width, (width+1)/2, RGB, width*3, yuv_format,
|
||||
out, "ffmpeg_unaligned", iteration_number, yuv420_rgb24_ffmpeg);
|
||||
#endif
|
||||
#if USE_IPP
|
||||
test_yuv2rgb(width, height, Y, U, V, width, (width+1)/2, RGB, width*3, yuv_format,
|
||||
out, "ipp_unaligned", iteration_number, yuv420_rgb24_ipp);
|
||||
#endif
|
||||
test_yuv2rgb(width, height, Ya, Ua, Va, y_stride, uv_stride, RGBa, rgb_stride, yuv_format,
|
||||
out, "sse2_aligned", iteration_number, yuv420_rgb24_sse);
|
||||
#if USE_FFMPEG
|
||||
test_yuv2rgb(width, height, Ya, Ua, Va, y_stride, uv_stride, RGBa, rgb_stride, yuv_format,
|
||||
out, "ffmpeg_aligned", iteration_number, yuv420_rgb24_ffmpeg);
|
||||
#endif
|
||||
#if USE_IPP
|
||||
test_yuv2rgb(width, height, Ya, Ua, Va, y_stride, uv_stride, RGBa, rgb_stride, yuv_format,
|
||||
out, "ipp_aligned", iteration_number, yuv420_rgb24_ipp);
|
||||
#endif
|
||||
}
|
||||
else if(mode==YUV2RGB_NV12)
|
||||
{
|
||||
test_yuvsp2rgb(width, height, Y, U, width, width, RGB, width*3, yuv_format,
|
||||
out, "std", iteration_number, nv12_rgb24_std);
|
||||
test_yuvsp2rgb(width, height, Y, U, width, width, RGB, width*3, yuv_format,
|
||||
out, "sse2_unaligned", iteration_number, nv12_rgb24_sseu);
|
||||
test_yuvsp2rgb(width, height, Ya, Ua, y_stride, uv_stride, RGBa, rgb_stride, yuv_format,
|
||||
out, "sse2_aligned", iteration_number, nv12_rgb24_sse);
|
||||
}
|
||||
else if(mode==YUV2RGB_NV21)
|
||||
{
|
||||
test_yuvsp2rgb(width, height, Y, U, width, width, RGB, width*3, yuv_format,
|
||||
out, "std", iteration_number, nv21_rgb24_std);
|
||||
test_yuvsp2rgb(width, height, Y, U, width, width, RGB, width*3, yuv_format,
|
||||
out, "sse2_unaligned", iteration_number, nv21_rgb24_sseu);
|
||||
test_yuvsp2rgb(width, height, Ya, Ua, y_stride, uv_stride, RGBa, rgb_stride, yuv_format,
|
||||
out, "sse2_aligned", iteration_number, nv21_rgb24_sse);
|
||||
}
|
||||
}
|
||||
else if(mode==RGB2YUV)
|
||||
{
|
||||
//parse argument line
|
||||
out = argv[3];
|
||||
|
||||
// read input data and allocate output data
|
||||
if(readPPM(filename, &width, &height, &RGB)!=0)
|
||||
{
|
||||
printf("Error reading image file, check that the file exists and has the correct format.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if USE_FFMPEG
|
||||
rgb2yuv_swscale_ctx = sws_getContext(width, height, AV_PIX_FMT_RGB24, width, height, AV_PIX_FMT_YUV420P, 0, 0, 0, 0);
|
||||
#endif
|
||||
|
||||
YUV = malloc(width*height*3/2);
|
||||
|
||||
Y = YUV;
|
||||
U = YUV+width*height;
|
||||
V = YUV+width*height+((width+1)/2)*((height+1)/2);
|
||||
|
||||
// allocate aligned data
|
||||
const size_t y_stride = width + (16-width%16)%16,
|
||||
uv_stride = (width+1)/2 + (16-((width+1)/2)%16)%16,
|
||||
rgb_stride = width*3 +(16-(3*width)%16)%16;
|
||||
|
||||
RGBa = _mm_malloc(rgb_stride*height, 16);
|
||||
for(unsigned int i=0; i<height; ++i)
|
||||
{
|
||||
memcpy(RGBa+i*rgb_stride, RGB+i*width*3, width*3);
|
||||
}
|
||||
|
||||
const size_t y_size = y_stride*height, uv_size = uv_stride*((height+1)/2);
|
||||
YUVa = _mm_malloc(y_size+2*uv_size, 16);
|
||||
Ya = YUVa;
|
||||
Ua = YUVa+y_size;
|
||||
Va = YUVa+y_size+uv_size;
|
||||
|
||||
|
||||
// test all versions
|
||||
test_rgb2yuv(width, height, RGB, width*3, Y, U, V, width, (width+1)/2, yuv_format,
|
||||
out, "std", iteration_number, rgb24_yuv420_std);
|
||||
test_rgb2yuv(width, height, RGB, width*3, Y, U, V, width, (width+1)/2, yuv_format,
|
||||
out, "sse2_unaligned", iteration_number, rgb24_yuv420_sseu);
|
||||
#if USE_FFMPEG
|
||||
test_rgb2yuv(width, height, RGB, width*3, Y, U, V, width, (width+1)/2, yuv_format,
|
||||
out, "ffmpeg_unaligned", iteration_number, rgb24_yuv420_ffmpeg);
|
||||
#endif
|
||||
#if USE_IPP
|
||||
test_rgb2yuv(width, height, RGB, width*3, Y, U, V, width, (width+1)/2, yuv_format,
|
||||
out, "ipp_unaligned", iteration_number, rgb24_yuv420_ipp);
|
||||
#endif
|
||||
test_rgb2yuv(width, height, RGBa, rgb_stride, Ya, Ua, Va, y_stride, uv_stride, yuv_format,
|
||||
out, "sse2_aligned", iteration_number, rgb24_yuv420_sse);
|
||||
#if USE_FFMPEG
|
||||
test_rgb2yuv(width, height, RGBa, rgb_stride, Ya, Ua, Va, y_stride, uv_stride, yuv_format,
|
||||
out, "ffmpeg_aligned", iteration_number, rgb24_yuv420_ffmpeg);
|
||||
#endif
|
||||
#if USE_IPP
|
||||
test_rgb2yuv(width, height, RGBa, rgb_stride, Ya, Ua, Va, y_stride, uv_stride, yuv_format,
|
||||
out, "ipp_aligned", iteration_number, rgb24_yuv420_ipp);
|
||||
#endif
|
||||
}
|
||||
else if(mode==RGBA2YUV)
|
||||
{
|
||||
//parse argument line
|
||||
out = argv[3];
|
||||
|
||||
// read input data and allocate output data
|
||||
if(readPPM(filename, &width, &height, &RGB)!=0)
|
||||
{
|
||||
printf("Error reading image file, check that the file exists and has the correct format.\n");
|
||||
return 1;
|
||||
}
|
||||
// convert rgb to rgba
|
||||
uint8_t *RGBA = NULL;
|
||||
convert_rgb_to_rgba(RGB, width, height, &RGBA);
|
||||
|
||||
YUV = malloc(width*height*3/2);
|
||||
|
||||
Y = YUV;
|
||||
U = YUV+width*height;
|
||||
V = YUV+width*height+((width+1)/2)*((height+1)/2);
|
||||
|
||||
// allocate aligned data
|
||||
const size_t y_stride = width + (16-width%16)%16,
|
||||
uv_stride = (width+1)/2 + (16-((width+1)/2)%16)%16,
|
||||
rgba_stride = width*4 +(16-(4*width)%16)%16;
|
||||
|
||||
RGBa = _mm_malloc(rgba_stride*height, 16);
|
||||
for(unsigned int i=0; i<height; ++i)
|
||||
{
|
||||
memcpy(RGBa+i*rgba_stride, RGBA+i*width*4, width*4);
|
||||
}
|
||||
|
||||
const size_t y_size = y_stride*height, uv_size = uv_stride*((height+1)/2);
|
||||
YUVa = _mm_malloc(y_size+2*uv_size, 16);
|
||||
Ya = YUVa;
|
||||
Ua = YUVa+y_size;
|
||||
Va = YUVa+y_size+uv_size;
|
||||
|
||||
// test all versions
|
||||
test_rgb2yuv(width, height, RGBA, width*4, Y, U, V, width, (width+1)/2, yuv_format,
|
||||
out, "std", iteration_number, rgb32_yuv420_std);
|
||||
test_rgb2yuv(width, height, RGBA, width*4, Y, U, V, width, (width+1)/2, yuv_format,
|
||||
out, "sse2_unaligned", iteration_number, rgb32_yuv420_sseu);
|
||||
test_rgb2yuv(width, height, RGBa, rgba_stride, Ya, Ua, Va, y_stride, uv_stride, yuv_format,
|
||||
out, "sse2_aligned", iteration_number, rgb32_yuv420_sse);
|
||||
|
||||
free(RGBA);
|
||||
}
|
||||
|
||||
_mm_free(RGBa);
|
||||
_mm_free(YUVa);
|
||||
free(RGB);
|
||||
free(YUV);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Vendored
-1312
File diff suppressed because it is too large
Load Diff
Vendored
-155
@@ -1,155 +0,0 @@
|
||||
// Copyright 2016 Adrien Descamps
|
||||
// Distributed under BSD 3-Clause License
|
||||
|
||||
// Provide optimized functions to convert images from 8bits yuv420 to rgb24 format
|
||||
|
||||
// There are a few slightly different variations of the YCbCr color space with different parameters that
|
||||
// change the conversion matrix.
|
||||
// The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here.
|
||||
// See the respective standards for details
|
||||
// The matrix values used are derived from http://www.equasys.de/colorconversion.html
|
||||
|
||||
// YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor
|
||||
// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This
|
||||
// is suboptimal for image quality, but by far the fastest method.
|
||||
|
||||
// For all methods, width and height should be even, if not, the last row/column of the result image won't be affected.
|
||||
// For sse methods, if the width if not divisable by 32, the last (width%32) pixels of each line won't be affected.
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
YCBCR_JPEG,
|
||||
YCBCR_601,
|
||||
YCBCR_709
|
||||
} YCbCrType;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// yuv to rgb, standard c implementation
|
||||
void yuv420_rgb24_std(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// yuv to rgb, yuv in nv12 semi planar format
|
||||
void nv12_rgb24_std(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// yuv to rgb, yuv in nv12 semi planar format
|
||||
void nv21_rgb24_std(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// yuv to rgb, sse implementation
|
||||
// pointers must be 16 byte aligned, and strides must be divisable by 16
|
||||
void yuv420_rgb24_sse(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// yuv to rgb, sse implementation
|
||||
// pointers do not need to be 16 byte aligned
|
||||
void yuv420_rgb24_sseu(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// yuv nv12 to rgb, sse implementation
|
||||
// pointers must be 16 byte aligned, and strides must be divisable by 16
|
||||
void nv12_rgb24_sse(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// yuv nv12 to rgb, sse implementation
|
||||
// pointers do not need to be 16 byte aligned
|
||||
void nv12_rgb24_sseu(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// yuv nv21 to rgb, sse implementation
|
||||
// pointers must be 16 byte aligned, and strides must be divisable by 16
|
||||
void nv21_rgb24_sse(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// yuv nv21 to rgb, sse implementation
|
||||
// pointers do not need to be 16 byte aligned
|
||||
void nv21_rgb24_sseu(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
|
||||
uint8_t *rgb, uint32_t rgb_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
|
||||
|
||||
|
||||
// rgb to yuv, standard c implementation
|
||||
void rgb24_yuv420_std(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *rgb, uint32_t rgb_stride,
|
||||
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// rgb to yuv, sse implementation
|
||||
// pointers must be 16 byte aligned, and strides must be divisible by 16
|
||||
void rgb24_yuv420_sse(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *rgb, uint32_t rgb_stride,
|
||||
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// rgb to yuv, sse implementation
|
||||
// pointers do not need to be 16 byte aligned
|
||||
void rgb24_yuv420_sseu(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *rgb, uint32_t rgb_stride,
|
||||
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// rgba to yuv, standard c implementation
|
||||
// alpha channel is ignored
|
||||
void rgb32_yuv420_std(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *rgba, uint32_t rgba_stride,
|
||||
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// rgba to yuv, sse implementation
|
||||
// pointers must be 16 byte aligned, and strides must be divisible by 16
|
||||
// alpha channel is ignored
|
||||
void rgb32_yuv420_sse(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *rgba, uint32_t rgba_stride,
|
||||
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
// rgba to yuv, sse implementation
|
||||
// pointers do not need to be 16 byte aligned
|
||||
// alpha channel is ignored
|
||||
void rgb32_yuv420_sseu(
|
||||
uint32_t width, uint32_t height,
|
||||
const uint8_t *rgba, uint32_t rgba_stride,
|
||||
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
|
||||
YCbCrType yuv_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -6,15 +6,20 @@
|
||||
#include <mem.hpp>
|
||||
#include <broadway.hpp>
|
||||
#include <SDL3/SDL.h>
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
weee::core::mem mem;
|
||||
weee::core::broadway broadway;
|
||||
std::string binName;
|
||||
|
||||
cflags::cflags flags;
|
||||
flags.add_string_callback(
|
||||
'\0', "elf",
|
||||
[&](const std::string &v) {
|
||||
binName = fs::path(v).filename().string();
|
||||
if (!weee::core::load_elf(v, mem, broadway))
|
||||
ircolib::panic("Could not load '{}'", v);
|
||||
},
|
||||
@@ -23,6 +28,7 @@ int main(int argc, char **argv) {
|
||||
flags.add_string_callback(
|
||||
'\0', "dol",
|
||||
[&](const std::string &v) {
|
||||
binName = fs::path(v).filename().string();
|
||||
if (!weee::core::load_dol(v, mem, broadway))
|
||||
ircolib::panic("Could not load '{}'", v);
|
||||
},
|
||||
@@ -34,13 +40,20 @@ int main(int argc, char **argv) {
|
||||
SDL_Init(SDL_INIT_VIDEO);
|
||||
SDL_Window *window = SDL_CreateWindow("weee", 800, 600, SDL_WINDOW_HIGH_PIXEL_DENSITY | SDL_WINDOW_RESIZABLE);
|
||||
SDL_Renderer *renderer = SDL_CreateRenderer(window, nullptr);
|
||||
SDL_Texture *texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_UYVY, SDL_TEXTUREACCESS_STREAMING, 640, 240);
|
||||
SDL_Texture *texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_RGB24, SDL_TEXTUREACCESS_STREAMING, 640, 240);
|
||||
SDL_SetRenderDrawColor(renderer, 0, 0, 0, 0);
|
||||
SDL_SetRenderLogicalPresentation(renderer, 640, 480, SDL_LOGICAL_PRESENTATION_LETTERBOX);
|
||||
|
||||
ircolib::u8 *rgbTexture = (ircolib::u8 *)calloc(640 * 240, 3);
|
||||
|
||||
bool open = true;
|
||||
while (open) {
|
||||
ircolib::u64 start = SDL_GetTicks();
|
||||
broadway.run(mem);
|
||||
SDL_SetWindowTitle(window,
|
||||
std::format("weee - {} - {:.2f} fps | {} ms", binName,
|
||||
1000.f / ((float)SDL_GetTicks() - start), SDL_GetTicks() - start)
|
||||
.c_str());
|
||||
|
||||
SDL_Event e;
|
||||
while (SDL_PollEvent(&e)) {
|
||||
@@ -48,8 +61,11 @@ int main(int argc, char **argv) {
|
||||
open = false;
|
||||
}
|
||||
|
||||
SDL_ConvertPixels(640, 240, SDL_PIXELFORMAT_UYVY, &mem.mem1[0x104000], 640 * 4, SDL_PIXELFORMAT_BGR24,
|
||||
rgbTexture, 640 * 3);
|
||||
|
||||
SDL_RenderClear(renderer);
|
||||
SDL_UpdateTexture(texture, nullptr, &mem.mem1[0x104000], 640 * 4);
|
||||
SDL_UpdateTexture(texture, nullptr, rgbTexture, 640 * 3);
|
||||
SDL_RenderTexture(renderer, texture, nullptr, nullptr);
|
||||
SDL_RenderPresent(renderer);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user