Squashed 'external/yuv2rgb/' content from commit ee78934c8

git-subtree-dir: external/yuv2rgb
git-subtree-split: ee78934c8d542e8402bcb6eef7259217a6f859bc
This commit is contained in:
2026-05-14 11:28:27 +02:00
commit 4870214d57
15 changed files with 2347 additions and 0 deletions
+35
View File
@@ -0,0 +1,35 @@
# Object files
*.o
*.ko
*.obj
*.elf
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
# Build directory
build/
+31
View File
@@ -0,0 +1,31 @@
cmake_minimum_required (VERSION 2.6)
project (yuv_rgb)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror -Wall -Wextra -pedantic -std=c99")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -pedantic -std=c99")
set(USE_FFMPEG FALSE CACHE BOOL "Enable ffmpeg")
if(USE_FFMPEG)
add_definitions(-DUSE_FFMPEG=1)
endif(USE_FFMPEG)
set(USE_IPP FALSE CACHE BOOL "Enable IPP")
if(USE_IPP)
set(IPP_ROOT /opt/intel CACHE PATH "IPP install path")
include_directories(${IPP_ROOT}/ipp/include)
link_directories(${IPP_ROOT}/ipp/lib/intel64)
add_definitions(-DUSE_IPP=1)
endif(USE_IPP)
include_directories ("${PROJECT_SOURCE_DIR}")
add_executable(test_yuv_rgb test_yuv_rgb.c yuv_rgb.c)
if(USE_FFMPEG)
target_link_libraries(test_yuv_rgb swscale)
endif(USE_FFMPEG)
if(USE_IPP)
target_link_libraries(test_yuv_rgb ippcc)
endif(USE_IPP)
+27
View File
@@ -0,0 +1,27 @@
Copyright (c) 2016, Adrien Descamps
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of yuv2rgb nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+62
View File
@@ -0,0 +1,62 @@
# yuv2rgb
C library for fast image conversion between yuv420p and rgb24.
This is a simple library for optimized image conversion between YUV420p and rgb24.
It was done mainly as an exercise to learn to use sse instrinsics, so there may still be room for optimization.
For each conversion, a standard c optimized function and two sse function (with aligned and unaligned memory) are implemented.
The sse version requires only SSE2, which is available on any reasonnably recent CPU.
The library also supports the three different YUV (YCrCb to be correct) color spaces that exist (see comments in code), and others can be added simply.
There is a simple test program, that convert a raw YUV file to rgb ppm format, and measure computation time.
Optionnaly, it also compares the result and computation time with the ffmpeg implementation (that uses MMX), and with the IPP functions.
To compile, simply do :
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
make
The test program only support raw YUV files for the YUV420 format, and ppm for the RGB24 format.
To generate a raw yuv file, you can use avconv:
avconv -i example.jpg -c:v rawvideo -pix_fmt yuv420p example.yuv
To generate the rgb file, you can use the ImageMagick convert program:
convert example.jpg example.ppm
Then, for YUV420 to RGB24 conversion, use the test program like that:
./test_yuv_rgb yuv2rgb image.yuv 4096 2160 image
The second and third parameters are image width and height (that are needed because not available in the raw YUV file), and fourth parameter is the output filename template (several output files will be generated, named for example output_sse.ppm, output_av.ppm, etc.)
Similarly, for RGB24 to YUV420 conversion:
./test_yuv_rgb rgb2yuv image.ppm image
On my computer, the test program on a 4K image give the following for yuv2rgb:
Time will be measured in each configuration for 100 iterations...
Processing time (std) : 2.630193 sec
Processing time (sse2_unaligned) : 0.704394 sec
Processing time (ffmpeg_unaligned) : 1.221432 sec
Processing time (ipp_unaligned) : 0.636274 sec
Processing time (sse2_aligned) : 0.606648 sec
Processing time (ffmpeg_aligned) : 1.227100 sec
Processing time (ipp_aligned) : 0.636951 sec
And for rgb2yuv:
Time will be measured in each configuration for 100 iterations...
Processing time (std) : 2.588675 sec
Processing time (sse2_unaligned) : 0.676625 sec
Processing time (ffmpeg_unaligned) : 3.385816 sec
Processing time (ipp_unaligned) : 0.593890 sec
Processing time (sse2_aligned) : 0.640630 sec
Processing time (ffmpeg_aligned) : 3.397952 sec
Processing time (ipp_aligned) : 0.579043 sec
configuration : gcc 4.9.2, swscale 3.0.0, IPP 9.0.1, intel i7-5500U
BIN
View File
Binary file not shown.
+1
View File
File diff suppressed because one or more lines are too long
BIN
View File
Binary file not shown.
+1
View File
File diff suppressed because one or more lines are too long
BIN
View File
Binary file not shown.
+1
View File
File diff suppressed because one or more lines are too long
+45
View File
@@ -0,0 +1,45 @@
Just to document the convertion between rgb24 and planar rgb..
R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5
G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 R10 G10
B10 R11 G11 B11 R12 G12 B12 R13 G13 B13 R14 G14 B14 R15 G15 B15
R16 G16 B16 R17 G17 B17 R18 G18 B18 R19 G19 B19 R20 G20 B20 R21
G21 B21 R22 G22 B22 R23 G23 B23 R24 G24 B24 R25 G25 B25 R26 G26
B26 R27 G27 B27 R28 G28 B28 R29 G29 B29 R30 G30 B30 R31 G31 B31
R0 R16 G0 G16 B0 B16 R1 R17 G1 G17 B1 B17 R2 R18 G2 G18
B2 B18 R3 R19 G3 G19 B3 B19 R4 R20 G4 G20 B4 B20 R5 R21
G5 G21 B5 B21 R6 R22 G6 G22 B6 B22 R7 R23 G7 G23 B7 B23
R8 R24 G8 G24 B8 B24 R9 R25 G9 G25 B9 B25 R10 R26 G10 G26
B10 B26 R11 R27 G11 G27 B11 B27 R12 R28 G12 G28 B12 B28 R13 R29
G13 G29 B13 B29 R14 R30 G14 G30 B14 B30 R15 R31 G15 G31 B15 B31
R0 R8 R16 R24 G0 G8 G16 G24 B0 B8 B16 B24 R1 R9 R17 R25
G1 G9 G17 G25 B1 B9 B17 B25 R2 R10 R18 R26 G2 G10 G18 G26
B2 B10 B18 B26 R3 R11 R19 R27 G3 G11 G19 G27 B3 B11 B19 B27
R4 R12 R20 R28 G4 G12 G20 G28 B4 B12 B20 B28 R5 R13 R21 R29
G5 G13 G21 G29 B5 B13 B21 B29 R6 R14 R22 R30 G6 G14 G22 G30
B6 B14 B22 B30 R7 R15 R23 R31 G7 G15 G23 G31 B7 B15 B23 B31
R0 R4 R8 R12 R16 R20 R24 R28 G0 G4 G8 G12 G16 G20 G24 G28
B0 B4 B8 B12 B16 B20 B24 B28 R1 R5 R9 R13 R17 R21 R25 R29
G1 G5 G9 G13 G17 G21 G25 G29 B1 B5 B9 B13 B17 B21 B25 B29
R2 R6 R10 R14 R18 R22 R26 R30 G2 G6 G10 G14 G18 G22 G26 G30
B2 B6 B10 B14 B18 B22 B26 B30 R3 R7 R11 R15 R19 R23 R27 R31
G3 G7 G11 G15 G19 G23 G27 G31 B3 B7 B11 B15 B19 B23 B27 B31
R0 R2 R4 R6 R8 R10 R12 R14 R16 R18 R20 R22 R24 R26 R28 R30
G0 G2 G4 G6 G8 G10 G12 G14 G16 G18 G20 G22 G24 G26 G28 G30
B0 B2 B4 B6 B8 B10 B12 B14 B16 B18 B20 B22 B24 B26 B28 B30
R1 R3 R5 R7 R9 R11 R13 R15 R17 R19 R21 R23 R25 R27 R29 R31
G1 G3 G5 G7 G9 G11 G13 G15 G17 G19 G21 G23 G25 G27 G29 G31
B1 B3 B5 B7 B9 B11 B13 B15 B17 B19 B21 B23 B25 B27 B29 B31
R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 R30 R31
G0 G1 G2 G3 G4 G5 G6 G7 G8 G9 G10 G11 G12 G13 G14 G15
G16 G17 G18 G19 G20 G21 G22 G23 G24 G25 G26 G27 G28 G29 G30 G31
B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 B10 B11 B12 B13 B14 B15
B16 B17 B18 B19 B20 B21 B22 B23 B24 B25 B26 B27 B28 B29 B30 B31
+54
View File
@@ -0,0 +1,54 @@
Just to document the convertion between rgb32 and planar rgb..
R0 G0 B0 A0 R1 G1 B1 A1 R2 G2 B2 A2 R3 G3 B3 A3
R4 G4 B4 A4 R5 G5 B5 A5 R6 G6 B6 A6 R7 G7 B7 A7
R8 G8 B8 A8 R9 G9 B9 A9 R10 G10 B10 A10 R11 G11 B11 A11
R12 G12 B12 A12 R13 G13 B13 A13 R14 G14 B14 A14 R15 G15 B15 A15
R16 G16 B16 A16 R17 G17 B17 A17 R18 G18 B18 A18 R19 G19 B19 A19
R20 G20 B20 A20 R21 G21 B21 A21 R22 G22 B22 A22 R23 G23 B23 A23
R24 G24 B24 A24 R25 G25 B25 A25 R26 G26 B26 A26 R27 G27 B27 A27
R28 G28 B28 A28 R29 G29 B29 A29 R30 G30 B30 A30 R31 G31 B31 A31
R0 R16 G0 G16 B0 B16 A0 A16 R1 R17 G1 G17 B1 B17 A1 A17
R2 R18 G2 G18 B2 B18 A2 A18 R3 R19 G3 G19 B3 B19 A3 A19
R4 R20 G4 G20 B4 B20 A4 A20 R5 R21 G5 G21 B5 B21 A5 A21
R6 R22 G6 G22 B6 B22 A6 A22 R7 R23 G7 G23 B7 B23 A7 A23
R8 R24 G8 G24 B8 B24 A8 A24 R9 R25 G9 G25 B9 B25 A9 A25
R10 R26 G10 G26 B10 B26 A10 A26 R11 R27 G11 G27 B11 B27 A11 A27
R12 R28 G12 G28 B12 B28 A12 A28 R13 R29 G13 G29 B13 B29 A13 A29
R14 R30 G14 G30 B14 B30 A14 A30 R15 R31 G15 G31 B15 B31 A15 A31
R0 R8 R16 R24 G0 G8 G16 G24 B0 B8 B16 B24 A0 A8 A16 A24
R1 R9 R17 R25 G1 G9 G17 G25 B1 B9 B17 B25 A1 A9 A17 A25
R2 R10 R18 R26 G2 G10 G18 G26 B2 B10 B18 B26 A2 A10 A18 A26
R3 R11 R19 R27 G3 G11 G19 G27 B3 B11 B19 B27 A3 A11 A19 A27
R4 R12 R20 R28 G4 G12 G20 G28 B4 B12 B20 B28 A4 A12 A20 A28
R5 R13 R21 R29 G5 G13 G21 G29 B5 B13 B21 B29 A5 A13 A21 A29
R6 R14 R22 R30 G6 G14 G22 G30 B6 B14 B22 B30 A6 A14 A22 A30
R7 R15 R23 R31 G7 G15 G23 G31 B7 B15 B23 B31 A7 A15 A23 A31
R0 R4 R8 R12 R16 R20 R24 R28 G0 G4 G8 G12 G16 G20 G24 G28
B0 B4 B8 B12 B16 B20 B24 B28 A0 A4 A8 A12 A16 A20 A24 A28
R1 R5 R9 R13 R17 R21 R25 R29 G1 G5 G9 G13 G17 G21 G25 G29
B1 B5 B9 B13 B17 B21 B25 B29 A1 A5 A9 A13 A17 A21 A25 A29
R2 R6 R10 R14 R18 R22 R26 R30 G2 G6 G10 G14 G18 G22 G26 G30
B2 B6 B10 B14 B18 B22 B26 B30 A2 A6 A10 A14 A18 A22 A26 A30
R3 R7 R11 R15 R19 R23 R27 R31 G3 G7 G11 G15 G19 G23 G27 G31
B3 B7 B11 B15 B19 B23 B27 B31 A3 A7 A11 A15 A19 A23 A27 A31
R0 R2 R4 R6 R8 R10 R12 R14 R16 R18 R20 R22 R24 R26 R28 R30
G0 G2 G4 G6 G8 G10 G12 G14 G16 G18 G20 G22 G24 G26 G28 G30
B0 B2 B4 B6 B8 B10 B12 B14 B16 B18 B20 B22 B24 B26 B28 B30
A0 A2 A4 A6 A8 A10 A12 A14 A16 A18 A20 A22 A24 A26 A28 A30
R1 R3 R5 R7 R9 R11 R13 R15 R17 R19 R21 R23 R25 R27 R29 R31
G1 G3 G5 G7 G9 G11 G13 G15 G17 G19 G21 G23 G25 G27 G29 G31
B1 B3 B5 B7 B9 B11 B13 B15 B17 B19 B21 B23 B25 B27 B29 B31
A1 A3 A5 A7 A9 A11 A13 A15 A17 A19 A21 A23 A25 A27 A29 A31
R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 R30 R31
G0 G1 G2 G3 G4 G5 G6 G7 G8 G9 G10 G11 G12 G13 G14 G15
G16 G17 G18 G19 G20 G21 G22 G23 G24 G25 G26 G27 G28 G29 G30 G31
B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 B10 B11 B12 B13 B14 B15
B16 B17 B18 B19 B20 B21 B22 B23 B24 B25 B26 B27 B28 B29 B30 B31
+623
View File
@@ -0,0 +1,623 @@
// Copyright 2016 Adrien Descamps
// Distributed under BSD 3-Clause License
// This program demonstrate how to convert a YUV420p image (raw format) to RGB (ppm format), and the reverse operation
#include "yuv_rgb.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <x86intrin.h>
#if USE_FFMPEG
#include <libswscale/swscale.h>
#endif
#if USE_IPP
#include <ippcc.h>
#endif
// read a raw yuv image file
// raw yuv files can be generated by ffmpeg, for example, using :
// ffmpeg -i test.png -c:v rawvideo -pix_fmt yuv420p test.yuv
// the returned image channels are contiguous, and Y stride=width, U and V stride=width/2
// memory must be freed with free
int readRawYUV(const char *filename, uint32_t width, uint32_t height, uint8_t **YUV)
{
FILE *fp = fopen(filename, "rb");
if(!fp)
{
perror("Error opening yuv image for read");
return 1;
}
// check file size
fseek(fp, 0, SEEK_END);
uint32_t size = ftell(fp);
if(size!=(width*height + 2*((width+1)/2)*((height+1)/2)))
{
fprintf(stderr, "Wrong size of yuv image : %d bytes, expected %d bytes\n", size, (width*height + 2*((width+1)/2)*((height+1)/2)));
fclose(fp);
return 2;
}
fseek(fp, 0, SEEK_SET);
*YUV = malloc(size);
size_t result = fread(*YUV, 1, size, fp);
if (result != size) {
perror("Error reading yuv image");
fclose(fp);
return 3;
}
fclose(fp);
return 0;
}
// write a raw yuv image file
int saveRawYUV(const char *filename, uint32_t width, uint32_t height, const uint8_t *YUV, size_t y_stride, size_t uv_stride)
{
FILE *fp = fopen(filename, "wb");
if(!fp)
{
perror("Error opening yuv image for write");
return 1;
}
if(y_stride==width)
{
fwrite(YUV, 1, width*height, fp);
YUV+=width*height;
}
else
{
for(uint32_t y=0; y<height; ++y)
{
fwrite(YUV, 1, width, fp);
YUV+=y_stride;
}
}
if(uv_stride==(width+1/2))
{
fwrite(YUV, 1, ((width+1)/2)*((height+1)/2)*2, fp);
}
else
{
for(uint32_t y=0; y<((height+1)/2); ++y)
{
fwrite(YUV, 1, ((width+1)/2), fp);
YUV+=uv_stride;
}
for(uint32_t y=0; y<((height+1)/2); ++y)
{
fwrite(YUV, 1, ((width+1)/2), fp);
YUV+=uv_stride;
}
}
fclose(fp);
return 0;
}
// read a ppm binary image file
// memory must be freed with free
int readPPM(const char* filename, uint32_t *width, uint32_t *height, uint8_t **RGB)
{
FILE *fp = fopen(filename, "rb");
if(!fp)
{
perror("Error opening rgb image for read");
return 1;
}
char magic[3];
size_t result = fread(magic, 1, 2, fp);
magic[2]='\0';
if(result!=2 || strcmp(magic,"P6")!=0)
{
perror("Error reading rgb image header, or invalid format");
fclose(fp);
return 3;
}
uint32_t max;
result = fscanf(fp, " %u %u %u ", width, height, &max);
if(result!=3 || max>255)
{
perror("Error reading rgb image header, or invalid values");
fclose(fp);
return 3;
}
size_t size = 3*(*width)*(*height);
*RGB = malloc(size);
if(!*RGB)
{
perror("Error allocating rgb image memory");
fclose(fp);
return 2;
}
result = fread(*RGB, 1, size, fp);
if(result != size)
{
perror("Error reading rgb image");
fclose(fp);
return 3;
}
fclose(fp);
return 0;
}
// save a rgb image to ppm binary format
int savePPM(const char* filename, uint32_t width, uint32_t height, const uint8_t *RGB, size_t stride)
{
FILE *fp = fopen(filename, "wb");
if(!fp)
{
perror("Error opening rgb image for write");
return 1;
}
fprintf(fp, "P6 %u %u 255\n", width, height);
if(stride==(3*width))
{
fwrite(RGB, 1, 3*width*height, fp);
}
else
{
for(uint32_t i=0; i<height; ++i)
{
fwrite(RGB+i*stride, 1, 3*width, fp);
}
}
fclose(fp);
return 0;
}
void convert_rgb_to_rgba(const uint8_t *RGB, uint32_t width, uint32_t height, uint8_t **RGBA)
{
*RGBA = malloc(4*width*height);
for(uint32_t y=0; y<height; ++y)
{
for(uint32_t x=0; x<width; ++x)
{
(*RGBA)[(y*width+x)*4] = RGB[(y*width+x)*3];
(*RGBA)[(y*width+x)*4+1] = RGB[(y*width+x)*3+1];
(*RGBA)[(y*width+x)*4+2] = RGB[(y*width+x)*3+2];
(*RGBA)[(y*width+x)*4+3] = 0;
}
}
}
typedef enum
{
RGB2YUV,
YUV2RGB,
YUV2RGB_NV12,
YUV2RGB_NV21,
RGBA2YUV
} Mode;
typedef void (*yuv2rgb_ptr)(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
typedef void (*yuvsp2rgb_ptr)(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
typedef void (*rgb2yuv_ptr)(
uint32_t width, uint32_t height,
const uint8_t *rgb, uint32_t rgb_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
// call yuv2rgb conversion function, time it and save result
void test_yuv2rgb(uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride, YCbCrType yuv_type,
const char *file, const char *name, uint32_t iteration_number, const yuv2rgb_ptr yuv2rgb_fun)
{
clock_t t = clock();
for(uint32_t i=0;i<iteration_number; ++i)
yuv2rgb_fun(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
t = clock()-t;
printf("Processing time (%s) : %f sec\n", name, ((float)t)/CLOCKS_PER_SEC);
char *out_filename = malloc(strlen(file)+strlen(name)+6);
strcpy(out_filename, file);
strcat(out_filename, "_");
strcat(out_filename, name);
strcat(out_filename, ".ppm");
savePPM(out_filename, width, height, rgb, rgb_stride);
free(out_filename);
}
// call yuv2rgb semi planar conversion function, time it and save result
void test_yuvsp2rgb(uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride, YCbCrType yuv_type,
const char *file, const char *name, uint32_t iteration_number, const yuvsp2rgb_ptr yuv2rgb_fun)
{
clock_t t = clock();
for(uint32_t i=0;i<iteration_number; ++i)
yuv2rgb_fun(width, height, y, uv, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
t = clock()-t;
printf("Processing time (%s) : %f sec\n", name, ((float)t)/CLOCKS_PER_SEC);
char *out_filename = malloc(strlen(file)+strlen(name)+6);
strcpy(out_filename, file);
strcat(out_filename, "_");
strcat(out_filename, name);
strcat(out_filename, ".ppm");
savePPM(out_filename, width, height, rgb, rgb_stride);
free(out_filename);
}
// call rgb2yuv conversion function, time it and save result
void test_rgb2yuv(uint32_t width, uint32_t height,
const uint8_t *rgb, uint32_t rgb_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type,
const char *file, const char *name, uint32_t iteration_number, const rgb2yuv_ptr rgb2yuv_fun)
{
clock_t t = clock();
for(uint32_t i=0;i<iteration_number; ++i)
rgb2yuv_fun(width, height, rgb, rgb_stride, y, u, v, y_stride, uv_stride, yuv_type);
t = clock()-t;
printf("Processing time (%s) : %f sec\n", name, ((float)t)/CLOCKS_PER_SEC);
char *out_filename = malloc(strlen(file)+strlen(name)+6);
strcpy(out_filename, file);
strcat(out_filename, "_");
strcat(out_filename, name);
strcat(out_filename, ".yuv");
saveRawYUV(out_filename, width, height, y, y_stride, uv_stride);
free(out_filename);
}
// equivalent conversion functions for external libraries
#if USE_FFMPEG
static struct SwsContext *yuv2rgb_swscale_ctx = NULL;
static struct SwsContext *rgb2yuv_swscale_ctx = NULL;
void yuv420_rgb24_ffmpeg(uint32_t __attribute__ ((unused)) width, uint32_t height,
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType __attribute__ ((unused)) yuv_type)
{
const uint8_t *const inData[3] = {y, u, v};
int inLinesize[3] = {y_stride, uv_stride, uv_stride};
int outLinesize[1] = {rgb_stride};
sws_scale(yuv2rgb_swscale_ctx, inData, inLinesize, 0, height, &rgb, outLinesize);
}
void rgb24_yuv420_ffmpeg(uint32_t __attribute__ ((unused)) width, uint32_t height,
const uint8_t *rgb, uint32_t rgb_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType __attribute__ ((unused)) yuv_type)
{
int inLineSize[1] = {rgb_stride};
int outLineSize[3] = {y_stride, uv_stride, uv_stride};
uint8_t *const outData[3] = {y, u, v};
sws_scale(rgb2yuv_swscale_ctx, &rgb, inLineSize, 0, height, outData, outLineSize);
}
#endif
#if USE_IPP
void yuv420_rgb24_ipp(uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType __attribute__ ((unused)) yuv_type)
{
const Ipp8u* pSrc[3] = {y, u, v};
int srcStep[3] = {y_stride, uv_stride, uv_stride};
Ipp8u* pDst = rgb;
int dstStep = rgb_stride;
IppiSize imgSize = {.width=width, .height=height};
ippiYCbCr420ToRGB_8u_P3C3R(pSrc, srcStep, pDst, dstStep, imgSize);
}
void rgb24_yuv420_ipp(uint32_t width, uint32_t height,
const uint8_t *rgb, uint32_t rgb_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType __attribute__ ((unused)) yuv_type)
{
const Ipp8u* pSrc = rgb;
int srcStep = rgb_stride;
Ipp8u* pDst[3] = {y, u, v};
int dstStep[3] = {y_stride, uv_stride, uv_stride};
IppiSize imgSize = {.width=width, .height=height};
ippiRGBToYCbCr420_8u_C3P3R(pSrc, srcStep, pDst, dstStep, imgSize);
}
#endif
int main(int argc, char **argv)
{
if(argc<4)
{
printf("Usage : test yuv2rgb <yuv image file> <image width> <image height> <output template filename>\n");
printf("Or : test yuv2rgb_nv12 <yuv image file> <image width> <image height> <output template filename>\n");
printf("Or : test yuv2rgb_nv21 <yuv image file> <image width> <image height> <output template filename>\n");
printf("Or : test rgb2yuv <rgb24 binary ppm image file> <output template filename>\n");
printf("Or : test rgba2yuv <rgb24 binary ppm image file> <output template filename>\n");
return 1;
}
const int iteration_number = 100;
printf("Time will be measured in each configuration for %d iterations...\n", iteration_number);
const YCbCrType yuv_format = YCBCR_601;
//const YCbCrType yuv_format = YCBCR_709;
//const YCbCrType yuv_format = YCBCR_JPEG;
Mode mode;
if(strcmp(argv[1], "yuv2rgb")==0)
{
mode=YUV2RGB;
if(argc<6)
{
printf("Invalid argument number for yuv2rgb mode, call without argument to see usage.\n");
return 1;
}
}
else if(strcmp(argv[1], "yuv2rgb_nv12")==0)
{
mode=YUV2RGB_NV12;
}
else if(strcmp(argv[1], "yuv2rgb_nv21")==0)
{
mode=YUV2RGB_NV21;
}
else if(strcmp(argv[1], "rgb2yuv")==0)
{
mode=RGB2YUV;
}
else if(strcmp(argv[1], "rgba2yuv")==0)
{
mode=RGBA2YUV;
}
else
{
printf("Invalid mode, call without argument to see usage.\n");
return 1;
}
const char *filename = argv[2];
uint32_t width, height;
const char *out;
uint8_t *YUV=NULL, *RGB=NULL, *Y=NULL, *U=NULL, *V=NULL, *RGBa=NULL, *YUVa=NULL, *Ya=NULL, *Ua=NULL, *Va=NULL;
if(mode==YUV2RGB || mode==YUV2RGB_NV12 || mode==YUV2RGB_NV21)
{
//parse argument line
width = atoi(argv[3]);
height = atoi(argv[4]);
out = argv[5];
// read input data and allocate output data
if(readRawYUV(filename, width, height, &YUV)!=0)
{
printf("Error reading image file, check that the file exists and has the correct format and resolution.\n");
return 1;
}
#if USE_FFMPEG
yuv2rgb_swscale_ctx = sws_getContext(width, height, AV_PIX_FMT_YUV420P, width, height, AV_PIX_FMT_RGB24, 0, 0, 0, 0);
#endif
RGB = malloc(3*width*height);
Y = YUV;
U = YUV+width*height;
V = YUV+width*height+((width+1)/2)*((height+1)/2);
// allocate aligned data
const size_t y_stride = width + (16-width%16)%16;
const size_t uv_stride = (mode==YUV2RGB) ? (width+1)/2 + (16-((width+1)/2)%16)%16 : y_stride;
const size_t rgb_stride = width*3 +(16-(3*width)%16)%16;
const size_t y_size = y_stride*height, uv_size = uv_stride*((height+1)/2);
YUVa = _mm_malloc(y_size+2*uv_size, 16);
Ya = YUVa;
Ua = YUVa+y_size;
Va = YUVa+y_size+uv_size;
for(unsigned int i=0; i<height; ++i)
{
memcpy(Ya+i*y_stride, Y+i*width, width);
if((i%2)==0)
{
if(mode==YUV2RGB)
{
memcpy(Ua+(i/2)*uv_stride, U+(i/2)*((width+1)/2), (width+1)/2);
memcpy(Va+(i/2)*uv_stride, V+(i/2)*((width+1)/2), (width+1)/2);
}
else
{
memcpy(Ua+(i/2)*uv_stride, U+(i/2)*width, width);
}
}
}
RGBa = _mm_malloc(rgb_stride*height, 16);
// test all versions
if(mode==YUV2RGB)
{
test_yuv2rgb(width, height, Y, U, V, width, (width+1)/2, RGB, width*3, yuv_format,
out, "std", iteration_number, yuv420_rgb24_std);
test_yuv2rgb(width, height, Y, U, V, width, (width+1)/2, RGB, width*3, yuv_format,
out, "sse2_unaligned", iteration_number, yuv420_rgb24_sseu);
#if USE_FFMPEG
test_yuv2rgb(width, height, Y, U, V, width, (width+1)/2, RGB, width*3, yuv_format,
out, "ffmpeg_unaligned", iteration_number, yuv420_rgb24_ffmpeg);
#endif
#if USE_IPP
test_yuv2rgb(width, height, Y, U, V, width, (width+1)/2, RGB, width*3, yuv_format,
out, "ipp_unaligned", iteration_number, yuv420_rgb24_ipp);
#endif
test_yuv2rgb(width, height, Ya, Ua, Va, y_stride, uv_stride, RGBa, rgb_stride, yuv_format,
out, "sse2_aligned", iteration_number, yuv420_rgb24_sse);
#if USE_FFMPEG
test_yuv2rgb(width, height, Ya, Ua, Va, y_stride, uv_stride, RGBa, rgb_stride, yuv_format,
out, "ffmpeg_aligned", iteration_number, yuv420_rgb24_ffmpeg);
#endif
#if USE_IPP
test_yuv2rgb(width, height, Ya, Ua, Va, y_stride, uv_stride, RGBa, rgb_stride, yuv_format,
out, "ipp_aligned", iteration_number, yuv420_rgb24_ipp);
#endif
}
else if(mode==YUV2RGB_NV12)
{
test_yuvsp2rgb(width, height, Y, U, width, width, RGB, width*3, yuv_format,
out, "std", iteration_number, nv12_rgb24_std);
test_yuvsp2rgb(width, height, Y, U, width, width, RGB, width*3, yuv_format,
out, "sse2_unaligned", iteration_number, nv12_rgb24_sseu);
test_yuvsp2rgb(width, height, Ya, Ua, y_stride, uv_stride, RGBa, rgb_stride, yuv_format,
out, "sse2_aligned", iteration_number, nv12_rgb24_sse);
}
else if(mode==YUV2RGB_NV21)
{
test_yuvsp2rgb(width, height, Y, U, width, width, RGB, width*3, yuv_format,
out, "std", iteration_number, nv21_rgb24_std);
test_yuvsp2rgb(width, height, Y, U, width, width, RGB, width*3, yuv_format,
out, "sse2_unaligned", iteration_number, nv21_rgb24_sseu);
test_yuvsp2rgb(width, height, Ya, Ua, y_stride, uv_stride, RGBa, rgb_stride, yuv_format,
out, "sse2_aligned", iteration_number, nv21_rgb24_sse);
}
}
else if(mode==RGB2YUV)
{
//parse argument line
out = argv[3];
// read input data and allocate output data
if(readPPM(filename, &width, &height, &RGB)!=0)
{
printf("Error reading image file, check that the file exists and has the correct format.\n");
return 1;
}
#if USE_FFMPEG
rgb2yuv_swscale_ctx = sws_getContext(width, height, AV_PIX_FMT_RGB24, width, height, AV_PIX_FMT_YUV420P, 0, 0, 0, 0);
#endif
YUV = malloc(width*height*3/2);
Y = YUV;
U = YUV+width*height;
V = YUV+width*height+((width+1)/2)*((height+1)/2);
// allocate aligned data
const size_t y_stride = width + (16-width%16)%16,
uv_stride = (width+1)/2 + (16-((width+1)/2)%16)%16,
rgb_stride = width*3 +(16-(3*width)%16)%16;
RGBa = _mm_malloc(rgb_stride*height, 16);
for(unsigned int i=0; i<height; ++i)
{
memcpy(RGBa+i*rgb_stride, RGB+i*width*3, width*3);
}
const size_t y_size = y_stride*height, uv_size = uv_stride*((height+1)/2);
YUVa = _mm_malloc(y_size+2*uv_size, 16);
Ya = YUVa;
Ua = YUVa+y_size;
Va = YUVa+y_size+uv_size;
// test all versions
test_rgb2yuv(width, height, RGB, width*3, Y, U, V, width, (width+1)/2, yuv_format,
out, "std", iteration_number, rgb24_yuv420_std);
test_rgb2yuv(width, height, RGB, width*3, Y, U, V, width, (width+1)/2, yuv_format,
out, "sse2_unaligned", iteration_number, rgb24_yuv420_sseu);
#if USE_FFMPEG
test_rgb2yuv(width, height, RGB, width*3, Y, U, V, width, (width+1)/2, yuv_format,
out, "ffmpeg_unaligned", iteration_number, rgb24_yuv420_ffmpeg);
#endif
#if USE_IPP
test_rgb2yuv(width, height, RGB, width*3, Y, U, V, width, (width+1)/2, yuv_format,
out, "ipp_unaligned", iteration_number, rgb24_yuv420_ipp);
#endif
test_rgb2yuv(width, height, RGBa, rgb_stride, Ya, Ua, Va, y_stride, uv_stride, yuv_format,
out, "sse2_aligned", iteration_number, rgb24_yuv420_sse);
#if USE_FFMPEG
test_rgb2yuv(width, height, RGBa, rgb_stride, Ya, Ua, Va, y_stride, uv_stride, yuv_format,
out, "ffmpeg_aligned", iteration_number, rgb24_yuv420_ffmpeg);
#endif
#if USE_IPP
test_rgb2yuv(width, height, RGBa, rgb_stride, Ya, Ua, Va, y_stride, uv_stride, yuv_format,
out, "ipp_aligned", iteration_number, rgb24_yuv420_ipp);
#endif
}
else if(mode==RGBA2YUV)
{
//parse argument line
out = argv[3];
// read input data and allocate output data
if(readPPM(filename, &width, &height, &RGB)!=0)
{
printf("Error reading image file, check that the file exists and has the correct format.\n");
return 1;
}
// convert rgb to rgba
uint8_t *RGBA = NULL;
convert_rgb_to_rgba(RGB, width, height, &RGBA);
YUV = malloc(width*height*3/2);
Y = YUV;
U = YUV+width*height;
V = YUV+width*height+((width+1)/2)*((height+1)/2);
// allocate aligned data
const size_t y_stride = width + (16-width%16)%16,
uv_stride = (width+1)/2 + (16-((width+1)/2)%16)%16,
rgba_stride = width*4 +(16-(4*width)%16)%16;
RGBa = _mm_malloc(rgba_stride*height, 16);
for(unsigned int i=0; i<height; ++i)
{
memcpy(RGBa+i*rgba_stride, RGBA+i*width*4, width*4);
}
const size_t y_size = y_stride*height, uv_size = uv_stride*((height+1)/2);
YUVa = _mm_malloc(y_size+2*uv_size, 16);
Ya = YUVa;
Ua = YUVa+y_size;
Va = YUVa+y_size+uv_size;
// test all versions
test_rgb2yuv(width, height, RGBA, width*4, Y, U, V, width, (width+1)/2, yuv_format,
out, "std", iteration_number, rgb32_yuv420_std);
test_rgb2yuv(width, height, RGBA, width*4, Y, U, V, width, (width+1)/2, yuv_format,
out, "sse2_unaligned", iteration_number, rgb32_yuv420_sseu);
test_rgb2yuv(width, height, RGBa, rgba_stride, Ya, Ua, Va, y_stride, uv_stride, yuv_format,
out, "sse2_aligned", iteration_number, rgb32_yuv420_sse);
free(RGBA);
}
_mm_free(RGBa);
_mm_free(YUVa);
free(RGB);
free(YUV);
return 0;
}
+1312
View File
File diff suppressed because it is too large Load Diff
+155
View File
@@ -0,0 +1,155 @@
// Copyright 2016 Adrien Descamps
// Distributed under BSD 3-Clause License
// Provide optimized functions to convert images from 8bits yuv420 to rgb24 format
// There are a few slightly different variations of the YCbCr color space with different parameters that
// change the conversion matrix.
// The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here.
// See the respective standards for details
// The matrix values used are derived from http://www.equasys.de/colorconversion.html
// YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor
// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This
// is suboptimal for image quality, but by far the fastest method.
// For all methods, width and height should be even, if not, the last row/column of the result image won't be affected.
// For sse methods, if the width if not divisable by 32, the last (width%32) pixels of each line won't be affected.
#include <stdint.h>
typedef enum
{
YCBCR_JPEG,
YCBCR_601,
YCBCR_709
} YCbCrType;
#ifdef __cplusplus
extern "C" {
#endif
// yuv to rgb, standard c implementation
void yuv420_rgb24_std(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv to rgb, yuv in nv12 semi planar format
void nv12_rgb24_std(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv to rgb, yuv in nv12 semi planar format
void nv21_rgb24_std(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv to rgb, sse implementation
// pointers must be 16 byte aligned, and strides must be divisable by 16
void yuv420_rgb24_sse(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv to rgb, sse implementation
// pointers do not need to be 16 byte aligned
void yuv420_rgb24_sseu(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv nv12 to rgb, sse implementation
// pointers must be 16 byte aligned, and strides must be divisable by 16
void nv12_rgb24_sse(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv nv12 to rgb, sse implementation
// pointers do not need to be 16 byte aligned
void nv12_rgb24_sseu(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv nv21 to rgb, sse implementation
// pointers must be 16 byte aligned, and strides must be divisable by 16
void nv21_rgb24_sse(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv nv21 to rgb, sse implementation
// pointers do not need to be 16 byte aligned
void nv21_rgb24_sseu(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// rgb to yuv, standard c implementation
void rgb24_yuv420_std(
uint32_t width, uint32_t height,
const uint8_t *rgb, uint32_t rgb_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
// rgb to yuv, sse implementation
// pointers must be 16 byte aligned, and strides must be divisible by 16
void rgb24_yuv420_sse(
uint32_t width, uint32_t height,
const uint8_t *rgb, uint32_t rgb_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
// rgb to yuv, sse implementation
// pointers do not need to be 16 byte aligned
void rgb24_yuv420_sseu(
uint32_t width, uint32_t height,
const uint8_t *rgb, uint32_t rgb_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
// rgba to yuv, standard c implementation
// alpha channel is ignored
void rgb32_yuv420_std(
uint32_t width, uint32_t height,
const uint8_t *rgba, uint32_t rgba_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
// rgba to yuv, sse implementation
// pointers must be 16 byte aligned, and strides must be divisible by 16
// alpha channel is ignored
void rgb32_yuv420_sse(
uint32_t width, uint32_t height,
const uint8_t *rgba, uint32_t rgba_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
// rgba to yuv, sse implementation
// pointers do not need to be 16 byte aligned
// alpha channel is ignored
void rgb32_yuv420_sseu(
uint32_t width, uint32_t height,
const uint8_t *rgba, uint32_t rgba_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
#ifdef __cplusplus
}
#endif