CI130X SDK API手册  2.2.0
本手册用于描述CI130X SDK各个组件和驱动API
sonic.h
浏览该文件的文档.
1 /* Sonic library
2  Copyright 2010
3  Bill Cox
4  This file is part of the Sonic Library.
5 
6  This file is licensed under the Apache 2.0 license.
7 */
8 
9 /*
10 The Sonic Library implements a new algorithm invented by Bill Cox for the
11 specific purpose of speeding up speech by high factors at high quality. It
12 generates smooth speech at speed up factors as high as 6X, possibly more. It is
13 also capable of slowing down speech, and generates high quality results
14 regardless of the speed up or slow down factor. For speeding up speech by 2X or
15 more, the following equation is used:
16 
17  newSamples = period/(speed - 1.0)
18  scale = 1.0/newSamples;
19 
20 where period is the current pitch period, determined using AMDF or any other
21 pitch estimator, and speed is the speedup factor. If the current position in
22 the input stream is pointed to by "samples", and the current output stream
23 position is pointed to by "out", then newSamples number of samples can be
24 generated with:
25 
26  out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples;
27 
28 where t = 0 to newSamples - 1.
29 
30 For speed factors < 2X, the PICOLA algorithm is used. The above
31 algorithm is first used to double the speed of one pitch period. Then, enough
32 input is directly copied from the input to the output to achieve the desired
33 speed up factor, where 1.0 < speed < 2.0. The amount of data copied is derived:
34 
35  speed = (2*period + length)/(period + length)
36  speed*length + speed*period = 2*period + length
37  length(speed - 1) = 2*period - speed*period
38  length = period*(2 - speed)/(speed - 1)
39 
40 For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into
41 the output twice, and length of input is copied from the input to the output
42 until the output desired speed is reached. The length of data copied is:
43 
44  length = period*(speed - 0.5)/(1 - speed)
45 
46 For slow down factors below 0.5, no data is copied, and an algorithm
47 similar to high speed factors is used.
48 */
49 
50 /* Uncomment this to use sin-wav based overlap add which in theory can improve
51  sound quality slightly, at the expense of lots of floating point math. */
52 /* #define SONIC_USE_SIN */
53 
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57 
58 /* This specifies the range of voice pitches we try to match.
59  Note that if we go lower than 65, we could overflow in findPitchInRange */
60 #define SONIC_MIN_PITCH 65
61 #define SONIC_MAX_PITCH 400
62 
63 /* These are used to down-sample some inputs to improve speed */
64 #define SONIC_AMDF_FREQ 4000
65 
66 struct sonicStreamStruct;
68 
69 /* For all of the following functions, numChannels is multiplied by numSamples
70  to determine the actual number of values read or returned. */
71 
72 /* Create a sonic stream. Return NULL only if we are out of memory and cannot
73  allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */
75 /* Destroy the sonic stream. */
76 void sonicDestroyStream(sonicStream stream);
77 /* Use this to write floating point data to be speed up or down into the stream.
78  Values must be between -1 and 1. Return 0 if memory realloc failed,
79  otherwise 1 */
80 int sonicWriteFloatToStream(sonicStream stream, float* samples, int numSamples);
81 /* Use this to write 16-bit data to be speed up or down into the stream.
82  Return 0 if memory realloc failed, otherwise 1 */
83 int sonicWriteShortToStream(sonicStream stream, short* samples, int numSamples);
84 /* Use this to write 8-bit unsigned data to be speed up or down into the stream.
85  Return 0 if memory realloc failed, otherwise 1 */
86 int sonicWriteUnsignedCharToStream(sonicStream stream, unsigned char* samples,
87  int numSamples);
88 /* Use this to read floating point data out of the stream. Sometimes no data
89  will be available, and zero is returned, which is not an error condition. */
90 int sonicReadFloatFromStream(sonicStream stream, float* samples,
91  int maxSamples);
92 /* Use this to read 16-bit data out of the stream. Sometimes no data will
93  be available, and zero is returned, which is not an error condition. */
94 int sonicReadShortFromStream(sonicStream stream, short* samples,
95  int maxSamples);
96 /* Use this to read 8-bit unsigned data out of the stream. Sometimes no data
97  will be available, and zero is returned, which is not an error condition. */
98 int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples,
99  int maxSamples);
100 /* Force the sonic stream to generate output using whatever data it currently
101  has. No extra delay will be added to the output, but flushing in the middle
102  of words could introduce distortion. */
103 int sonicFlushStream(sonicStream stream);
104 /* Return the number of samples in the output buffer */
106 /* Get the speed of the stream. */
107 float sonicGetSpeed(sonicStream stream);
108 /* Set the speed of the stream. */
109 void sonicSetSpeed(sonicStream stream, float speed);
110 /* Get the pitch of the stream. */
111 float sonicGetPitch(sonicStream stream);
112 /* Set the pitch of the stream. */
113 void sonicSetPitch(sonicStream stream, float pitch);
114 /* Get the rate of the stream. */
115 float sonicGetRate(sonicStream stream);
116 /* Set the rate of the stream. */
117 void sonicSetRate(sonicStream stream, float rate);
118 /* Get the scaling factor of the stream. */
119 float sonicGetVolume(sonicStream stream);
120 /* Set the scaling factor of the stream. */
121 void sonicSetVolume(sonicStream stream, float volume);
122 /* Get the chord pitch setting. */
123 int sonicGetChordPitch(sonicStream stream);
124 /* Set chord pitch mode on or off. Default is off. See the documentation
125  page for a description of this feature. */
127 /* Get the quality setting. */
128 int sonicGetQuality(sonicStream stream);
129 /* Set the "quality". Default 0 is virtually as good as 1, but very much
130  * faster. */
131 void sonicSetQuality(sonicStream stream, int quality);
132 /* Get the sample rate of the stream. */
133 int sonicGetSampleRate(sonicStream stream);
134 /* Set the sample rate of the stream. This will drop any samples that have not
135  * been read. */
136 int sonicSetSampleRate(sonicStream stream, int sampleRate);
137 /* Get the number of channels. */
138 int sonicGetNumChannels(sonicStream stream);
139 /* Set the number of channels. This will drop any samples that have not been
140  * read. */
142 /* Clean a sonic stream. */
143 void sonicCleanStream(sonicStream stream);
144 /* This is a non-stream oriented interface to just change the speed of a sound
145  sample. It works in-place on the sample array, so there must be at least
146  speed*numSamples available space in the array. Returns the new number of
147  samples. */
148 int sonicChangeFloatSpeed(float* samples, int numSamples, float speed,
149  float pitch, float rate, float volume,
150  int useChordPitch, int sampleRate, int numChannels);
151 /* This is a non-stream oriented interface to just change the speed of a sound
152  sample. It works in-place on the sample array, so there must be at least
153  speed*numSamples available space in the array. Returns the new number of
154  samples. */
155 int sonicChangeShortSpeed(short* samples, int numSamples, float speed,
156  float pitch, float rate, float volume,
157  int useChordPitch, int sampleRate, int numChannels);
158 
159 #ifdef SONIC_SPECTROGRAM
160 /*
161 This code generates high quality spectrograms from sound samples, using
162 Time-Aliased-FFTs as described at:
163 
164  https://github.com/waywardgeek/spectrogram
165 
166 Basically, two adjacent pitch periods are overlap-added to create a sound
167 sample that accurately represents the speech sound at that moment in time.
168 This set of samples is converted to a spetral line using an FFT, and the result
169 is saved as a single spectral line at that moment in time. The resulting
170 spectral lines vary in resolution (it is equal to the number of samples in the
171 pitch period), and the spacing of spectral lines also varies (proportional to
172 the numver of samples in the pitch period).
173 
174 To generate a bitmap, linear interpolation is used to render the grayscale
175 value at any particular point in time and frequency.
176 */
177 
178 #define SONIC_MAX_SPECTRUM_FREQ 5000
179 
180 struct sonicSpectrogramStruct;
181 struct sonicBitmapStruct;
182 typedef struct sonicSpectrogramStruct* sonicSpectrogram;
183 typedef struct sonicBitmapStruct* sonicBitmap;
184 
185 /* sonicBitmap objects represent spectrograms as grayscale bitmaps where each
186  pixel is from 0 (black) to 255 (white). Bitmaps are rows*cols in size.
187  Rows are indexed top to bottom and columns are indexed left to right */
188 struct sonicBitmapStruct {
189  unsigned char* data;
190  int numRows;
191  int numCols;
192 };
193 
194 typedef struct sonicBitmapStruct* sonicBitmap;
195 
196 /* Enable coomputation of a spectrogram on the fly. */
197 void sonicComputeSpectrogram(sonicStream stream);
198 
199 /* Get the spectrogram. */
200 sonicSpectrogram sonicGetSpectrogram(sonicStream stream);
201 
202 /* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram
203  has been called. */
204 sonicSpectrogram sonicCreateSpectrogram(int sampleRate);
205 
206 /* Destroy the spectrotram. This is called automatically when calling
207  sonicDestroyStream. */
208 void sonicDestroySpectrogram(sonicSpectrogram spectrogram);
209 
210 /* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */
211 sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram,
212  int numRows, int numCols);
213 
214 /* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */
215 void sonicDestroyBitmap(sonicBitmap bitmap);
216 
217 int sonicWritePGM(sonicBitmap bitmap, char* fileName);
218 
219 /* Add two pitch periods worth of samples to the spectrogram. There must be
220  2*period samples. Time should advance one pitch period for each call to
221  this function. */
222 void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram,
223  short* samples, int period,
224  int numChannels);
225 #endif /* SONIC_SPECTROGRAM */
226 
227 #ifdef __cplusplus
228 }
229 #endif
void sonicSetChordPitch(sonicStream stream, int useChordPitch)
Definition: sonic.c:226
int sonicWriteUnsignedCharToStream(sonicStream stream, unsigned char *samples, int numSamples)
Definition: sonic.c:1151
int sonicSamplesAvailable(sonicStream stream)
Definition: sonic.c:613
float sonicGetRate(sonicStream stream)
Definition: sonic.c:211
float speed
Definition: sonic.c:141
void sonicSetVolume(sonicStream stream, float volume)
Definition: sonic.c:243
void sonicSetSpeed(sonicStream stream, float speed)
Definition: sonic.c:202
int sonicReadShortFromStream(sonicStream stream, short *samples, int maxSamples)
Definition: sonic.c:526
sonicStream sonicCreateStream(int sampleRate, int numChannels)
Definition: sonic.c:320
int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char *samples, int maxSamples)
Definition: sonic.c:551
int sampleRate
Definition: sonic.c:160
int sonicSetSampleRate(sonicStream stream, int sampleRate)
Definition: sonic.c:346
void sonicCleanStream(sonicStream stream)
Definition: sonic.c:362
float volume
Definition: sonic.c:142
int sonicChangeFloatSpeed(float *samples, int numSamples, float speed, float pitch, float rate, float volume, int useChordPitch, int sampleRate, int numChannels)
Definition: sonic.c:1161
int sonicFlushStream(sonicStream stream)
Definition: sonic.c:582
int sonicReadFloatFromStream(sonicStream stream, float *samples, int maxSamples)
Definition: sonic.c:496
int sonicSetNumChannels(sonicStream stream, int numChannels)
Definition: sonic.c:356
int sonicChangeShortSpeed(short *samples, int numSamples, float speed, float pitch, float rate, float volume, int useChordPitch, int sampleRate, int numChannels)
Definition: sonic.c:1181
void sonicSetQuality(sonicStream stream, int quality)
Definition: sonic.c:235
float sonicGetPitch(sonicStream stream)
Definition: sonic.c:205
Definition: sonic.c:133
int sonicGetNumChannels(sonicStream stream)
Definition: sonic.c:352
void sonicSetPitch(sonicStream stream, float pitch)
Definition: sonic.c:208
int quality
Definition: sonic.c:148
int sonicGetChordPitch(sonicStream stream)
Definition: sonic.c:223
float pitch
Definition: sonic.c:143
void sonicSetRate(sonicStream stream, float rate)
Definition: sonic.c:215
float sonicGetVolume(sonicStream stream)
Definition: sonic.c:240
int sonicWriteShortToStream(sonicStream stream, short *samples, int numSamples)
Definition: sonic.c:1141
int useChordPitch
Definition: sonic.c:147
float rate
Definition: sonic.c:144
float sonicGetSpeed(sonicStream stream)
Definition: sonic.c:199
void sonicDestroyStream(sonicStream stream)
Definition: sonic.c:264
int sonicGetQuality(sonicStream stream)
Definition: sonic.c:231
struct sonicStreamStruct * sonicStream
Definition: sonic.h:67
int sonicWriteFloatToStream(sonicStream stream, float *samples, int numSamples)
Definition: sonic.c:1131
int sonicGetSampleRate(sonicStream stream)
Definition: sonic.c:342
int numChannels
Definition: sonic.c:149