-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgoogletts.go
105 lines (88 loc) · 2.85 KB
/
googletts.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
package goEagi
import (
"context"
"fmt"
"hash/fnv"
"os"
"path/filepath"
"strconv"
"strings"
texttospeech "cloud.google.com/go/texttospeech/apiv1"
"cloud.google.com/go/texttospeech/apiv1/texttospeechpb"
)
const (
audioExtension = ".wav"
)
type GoogleTTS struct {
AudioOutputDirectory string
LanguageCode string
VoiceName string
}
func NewGoogleTTS(googleCred, audioOutputDir, languageCode, voiceName string) (*GoogleTTS, error) {
tts := GoogleTTS{
AudioOutputDirectory: audioOutputDir,
LanguageCode: languageCode,
VoiceName: voiceName,
}
if env := os.Getenv("GOOGLE_APPLICATION_CREDENTIALS"); env == "" {
os.Setenv("GOOGLE_APPLICATION_CREDENTIALS", googleCred)
}
if _, err := os.Stat(tts.AudioOutputDirectory); os.IsNotExist(err) {
if err := os.MkdirAll(tts.AudioOutputDirectory, os.ModePerm); err != nil {
return nil, fmt.Errorf("failed to create audio output directory: %w", err)
}
}
return &tts, nil
}
// GenerateAudio generates audio file from content.
// It returns audio file path without extension for playback, and error if any.
func (tts *GoogleTTS) GenerateAudio(content string) (string, error) {
audioName := generateHash(strings.ToLower(content))
audioFilepathWithoutWavExtension := filepath.Join(tts.AudioOutputDirectory, audioName)
audioFilepathWithWavExtension := filepath.Join(tts.AudioOutputDirectory, audioName+audioExtension)
if _, err := os.Stat(audioFilepathWithWavExtension); os.IsExist(err) {
return audioFilepathWithoutWavExtension, nil
}
file, err := os.OpenFile(audioFilepathWithWavExtension, os.O_APPEND|os.O_WRONLY|os.O_CREATE, os.ModePerm)
if err != nil {
return "", fmt.Errorf("failed to create file: %w", err)
}
defer file.Close()
ctx := context.Background()
client, err := texttospeech.NewClient(ctx)
if err != nil {
return "", fmt.Errorf("failed to create client: %w", err)
}
defer client.Close()
req := &texttospeechpb.SynthesizeSpeechRequest{
Input: &texttospeechpb.SynthesisInput{
InputSource: &texttospeechpb.SynthesisInput_Text{
Text: content,
},
},
Voice: &texttospeechpb.VoiceSelectionParams{
LanguageCode: tts.LanguageCode,
Name: tts.VoiceName,
},
AudioConfig: &texttospeechpb.AudioConfig{
AudioEncoding: texttospeechpb.AudioEncoding_LINEAR16,
SampleRateHertz: 8000,
},
}
resp, err := client.SynthesizeSpeech(ctx, req)
if err != nil {
return "", fmt.Errorf("failed to synthesize speech: %w", err)
}
if _, err := file.Write(resp.AudioContent); err != nil {
return "", fmt.Errorf("failed to write file: %w", err)
}
return audioFilepathWithoutWavExtension, nil
}
// generateHash generates hash from input string.
func generateHash(input string) string {
hasher := fnv.New32a()
hasher.Write([]byte(input))
hashValue := hasher.Sum32()
hashString := strconv.FormatUint(uint64(hashValue), 10)
return hashString
}