forked from SciSharp/TensorFlow.NET
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTextApi.cs
More file actions
35 lines (32 loc) · 1.12 KB
/
TextApi.cs
File metadata and controls
35 lines (32 loc) · 1.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Tensorflow.Keras.Text;
namespace Tensorflow.Keras
{
public class TextApi
{
public Tensorflow.Keras.Text.Tokenizer Tokenizer(
int num_words = -1,
string filters = DefaultFilter,
bool lower = true,
char split = ' ',
bool char_level = false,
string oov_token = null,
Func<string, IEnumerable<string>> analyzer = null)
{
return new Keras.Text.Tokenizer(num_words, filters, lower, split, char_level, oov_token, analyzer);
}
public static IEnumerable<string> text_to_word_sequence(string text, string filters = DefaultFilter, bool lower = true, char split = ' ')
{
if (lower)
{
text = text.ToLower();
}
var newText = new String(text.Where(c => !filters.Contains(c)).ToArray());
return newText.Split(split);
}
private const string DefaultFilter = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n";
}
}