MeCabはC/C++のライブラリを提供しています。また, SWIGを通して Perl/Ruby/Python から利用することも可能です。
シングルスレッド環境で単純な形態素解析を行う場合は、MeCab::Tagger
クラスのみでほとんどのことが行えます。マルチスレッド環境で1つの辞書を共有しながら形態素解析を行いたい場合や、
MeCabの辞書を解析中にアップデートするなど、高度な応用には MeCab::Tagger
に加えて、 MeCab::Model
, MeCab::Lattice
クラスを使用します。
C++のAPIセットのドキュメントはこちらを参照ください。 CのAPIセットはこちらを参照ください。
C++ サンプルコード
シングルスレッド環境 (MeCab::Tagger
)
Taggerオブジェクトを生成すると、1つの辞書オブジェクトが生成されます。
#include <iostream>
#include <mecab.h>
#define CHECK(eval) if (! eval) { \
const char *e = tagger ? tagger->what() : MeCab::getTaggerError(); \
std::cerr << "Exception:" << e << std::endl; \
delete tagger; \
return -1; }
// Sample of MeCab::Tagger class.
int main (int argc, char **argv) {
char input[1024] = "太郎は次郎が持っている本を花子に渡した。";
MeCab::Tagger *tagger = MeCab::createTagger("");
CHECK(tagger);
// Gets tagged result in string format.
const char *result = tagger->parse(input);
CHECK(result);
std::cout << "INPUT: " << input << std::endl;
std::cout << "RESULT: " << result << std::endl;
// Gets N best results in string format.
result = tagger->parseNBest(3, input);
CHECK(result);
std::cout << "NBEST: " << std::endl << result;
// Gets N best results in sequence.
CHECK(tagger->parseNBestInit(input));
for (int i = 0; i < 3; ++i) {
std::cout << i << ":" << std::endl << tagger->next();
}
// Gets Node object.
const MeCab::Node* node = tagger->parseToNode(input);
CHECK(node);
for (; node; node = node->next) {
std::cout << node->id << ' ';
if (node->stat == MECAB_BOS_NODE)
std::cout << "BOS";
else if (node->stat == MECAB_EOS_NODE)
std::cout << "EOS";
else
std::cout.write (node->surface, node->length);
std::cout << ' ' << node->feature
<< ' ' << (int)(node->surface - input)
<< ' ' << (int)(node->surface - input + node->length)
<< ' ' << node->rcAttr
<< ' ' << node->lcAttr
<< ' ' << node->posid
<< ' ' << (int)node->char_type
<< ' ' << (int)node->stat
<< ' ' << (int)node->isbest
<< ' ' << node->alpha
<< ' ' << node->beta
<< ' ' << node->prob
<< ' ' << node->cost << std::endl;
}
// Dictionary info.
const MeCab::DictionaryInfo *d = tagger->dictionary_info();
for (; d; d = d->next) {
std::cout << "filename: " << d->filename << std::endl;
std::cout << "charset: " << d->charset << std::endl;
std::cout << "size: " << d->size << std::endl;
std::cout << "type: " << d->type << std::endl;
std::cout << "lsize: " << d->lsize << std::endl;
std::cout << "rsize: " << d->rsize << std::endl;
std::cout << "version: " << d->version << std::endl;
}
delete tagger;
return 0;
}
C++ サンプルコード
マルチスレッド環境 (MeCab::Tagger
, MeCab::Model
, MeCab::Lattice
)
MeCab::createModel()
を使い、Modelオブジェクトを生成しますmodel->createTagger()
を使い、Taggerオブジェクトを生成します。Taggerはスレッド毎に複数作成しても、同一のmodelを共有します。Taggerがアクティブの間は、modelを削除してはなりません。model->createLattice
もしくはMeCab::createLattice()
を使い、Latticeオブジェクトを作成します。Latticeオプジェクトは解析に必要なすべてのローカル変数を含んでいます。必ずスレッド毎に1つのオブジェクトを作成してくださいmodel->swap(another_model)
を呼ぶと、model から生成されたすべてのTaggerオブジェクトのモデルを another_modelに置き換えます。この操作はスレッドセーフです。
#include <iostream>
#include <mecab.h>
#define CHECK(eval) if (! eval) { \
const char *e = tagger ? tagger->what() : MeCab::getTaggerError(); \
std::cerr << "Exception:" << e << std::endl; \
delete tagger; \
return -1; }
int main (int argc, char **argv) {
char input[1024] = "太郎は次郎が持っている本を花子に渡した。";
// Create model object.
MeCab::Model *model = MeCab::createModel(argc, argv);
// Create Tagger
// All taggers generated by Model::createTagger() method share
// the same model/dictionary.
MeCab::Tagger *tagger = model->createTagger();
CHECK(tagger);
// Create lattice object per thread.
MeCab::Lattice *lattice = model->createLattice();
// Gets tagged result in string
lattice->set_sentence(input);
// this method is thread safe, as long as |lattice| is thread local.
CHECK(tagger->parse(lattice));
std::cout << lattice->toString() << std::endl;
// Gets node object.
const MeCab::Node* node = lattice->bos_node();
CHECK(node);
for (; node; node = node->next) {
std::cout << node->id << ' ';
if (node->stat == MECAB_BOS_NODE)
std::cout << "BOS";
else if (node->stat == MECAB_EOS_NODE)
std::cout << "EOS";
else
std::cout.write (node->surface, node->length);
std::cout << ' ' << node->feature
<< ' ' << (int)(node->surface - input)
<< ' ' << (int)(node->surface - input + node->length)
<< ' ' << node->rcAttr
<< ' ' << node->lcAttr
<< ' ' << node->posid
<< ' ' << (int)node->char_type
<< ' ' << (int)node->stat
<< ' ' << (int)node->isbest
<< ' ' << node->alpha
<< ' ' << node->beta
<< ' ' << node->prob
<< ' ' << node->cost << std::endl;
}
// begin_nodes/end_nodes
const size_t len = lattice->size();
for (int i = 0; i <= len; ++i) {
MeCab::Node *b = lattice->begin_nodes(i);
MeCab::Node *e = lattice->end_nodes(i);
for (; b; b = b->bnext) {
printf("B[%d] %s\t%s\n", i, b->surface, b->feature);
}
for (; e; e = e->enext) {
printf("E[%d] %s\t%s\n", i, e->surface, e->feature);
}
}
// N best results
lattice->set_request_type(MECAB_NBEST);
lattice->set_sentence(input);
CHECK(tagger->parse(lattice));
for (int i = 0; i < 10; ++i) {
std::cout << "NBEST: " << i << std::endl;
std::cout << lattice->toString();
if (!lattice->next()) {
// No more results
break;
}
}
// Marginal probabilities
lattice->remove_request_type(MECAB_NBEST);
lattice->set_request_type(MECAB_MARGINAL_PROB);
lattice->set_sentence(input);
CHECK(tagger->parse(lattice));
std::cout << lattice->theta() << std::endl;
for (const MeCab::Node *node = lattice->bos_node();
node; node = node->next) {
std::cout.write(node->surface, node->length);
std::cout << "\t" << node->feature;
std::cout << "\t" << node->prob << std::endl;
}
// Dictionary info
const MeCab::DictionaryInfo *d = model->dictionary_info();
for (; d; d = d->next) {
std::cout << "filename: " << d->filename << std::endl;
std::cout << "charset: " << d->charset << std::endl;
std::cout << "size: " << d->size << std::endl;
std::cout << "type: " << d->type << std::endl;
std::cout << "lsize: " << d->lsize << std::endl;
std::cout << "rsize: " << d->rsize << std::endl;
std::cout << "version: " << d->version << std::endl;
}
// Swap model atomically.
MeCab::Model *another_model = MeCab::createModel("");
model->swap(another_model);
delete lattice;
delete tagger;
delete model;
return 0;
}
C サンプルコード
シングルスレッド環境
#include <mecab.h>
#include <stdio.h>
#define CHECK(eval) if (! eval) { \
fprintf (stderr, "Exception:%s\n", mecab_strerror (mecab)); \
mecab_destroy(mecab); \
return -1; }
int main (int argc, char **argv) {
char input[] = "太郎は次郎が持っている本を花子に渡した。";
mecab_t *mecab;
const mecab_node_t *node;
const char *result;
int i;
size_t len;
// Create tagger object
mecab = mecab_new(argc, argv);
CHECK(mecab);
// Gets tagged result in string.
result = mecab_sparse_tostr(mecab, input);
CHECK(result)
printf ("INPUT: %s\n", input);
printf ("RESULT:\n%s", result);
// Gets N best results
result = mecab_nbest_sparse_tostr (mecab, 3, input);
CHECK(result);
fprintf (stdout, "NBEST:\n%s", result);
CHECK(mecab_nbest_init(mecab, input));
for (i = 0; i < 3; ++i) {
printf ("%d:\n%s", i, mecab_nbest_next_tostr (mecab));
}
// Gets node object
node = mecab_sparse_tonode(mecab, input);
CHECK(node);
for (; node; node = node->next) {
if (node->stat == MECAB_NOR_NODE || node->stat == MECAB_UNK_NODE) {
fwrite (node->surface, sizeof(char), node->length, stdout);
printf("\t%s\n", node->feature);
}
}
// Dictionary info
const mecab_dictionary_info_t *d = mecab_dictionary_info(mecab);
for (; d; d = d->next) {
printf("filename: %s\n", d->filename);
printf("charset: %s\n", d->charset);
printf("size: %d\n", d->size);
printf("type: %d\n", d->type);
printf("lsize: %d\n", d->lsize);
printf("rsize: %d\n", d->rsize);
printf("version: %d\n", d->version);
}
mecab_destroy(mecab);
return 0;
}
C サンプルコード
マルチスレッド環境
#include <mecab.h>
#include <stdio.h>
#define CHECK(eval) if (! eval) { \
fprintf (stderr, "Exception:%s\n", mecab_strerror (mecab)); \
mecab_destroy(mecab); \
return -1; }
int main (int argc, char **argv) {
char input[] = "太郎は次郎が持っている本を花子に渡した。";
mecab_model_t *model, *another_model;
mecab_t *mecab;
mecab_lattice_t *lattice;
const mecab_node_t *node;
const char *result;
int i;
size_t len;
model = mecab_model_new(argc, argv);
CHECK(model);
mecab = mecab_model_new_tagger(model);
CHECK(mecab);
lattice = mecab_model_new_lattice(model);
CHECK(lattice);
mecab_lattice_set_sentence(lattice, input);
mecab_parse_lattice(mecab, lattice);
printf("RESULT: %s\n", mecab_lattice_tostr(lattice));
node = mecab_lattice_get_bos_node(lattice);
for (; node; node = node->next) {
printf("%d ", node->id);
if (node->stat == MECAB_BOS_NODE)
printf("BOS");
else if (node->stat == MECAB_EOS_NODE)
printf("EOS");
else
fwrite (node->surface, sizeof(char), node->length, stdout);
printf(" %s %d %d %d %d %d %d %d %d %f %f %f %ld\n",
node->feature,
(int)(node->surface - input),
(int)(node->surface - input + node->length),
node->rcAttr,
node->lcAttr,
node->posid,
(int)node->char_type,
(int)node->stat,
(int)node->isbest,
node->alpha,
node->beta,
node->prob,
node->cost);
}
len = mecab_lattice_get_size(lattice);
for (i = 0; i <= len; ++i) {
mecab_node_t *b, *e;
b = mecab_lattice_get_begin_nodes(lattice, (size_t)i);
e = mecab_lattice_get_end_nodes(lattice, (size_t)i);
for (; b; b = b->bnext) {
printf("B[%d] %s\t%s\n", i, b->surface, b->feature);
}
for (; e; e = e->enext) {
printf("E[%d] %s\t%s\n", i, e->surface, e->feature);
}
}
mecab_lattice_set_sentence(lattice, input);
mecab_lattice_set_request_type(lattice, MECAB_NBEST);
mecab_parse_lattice(mecab, lattice);
for (i = 0; i < 10; ++i) {
fprintf(stdout, "%s", mecab_lattice_tostr(lattice));
if (!mecab_lattice_next(lattice)) {
break;
}
}
mecab_lattice_set_sentence(lattice, input);
mecab_lattice_set_request_type(lattice, MECAB_MARGINAL_PROB);
mecab_lattice_set_theta(lattice, 0.001);
mecab_parse_lattice(mecab, lattice);
node = mecab_lattice_get_bos_node(lattice);
for (; node; node = node->next) {
fwrite(node->surface, sizeof(char), node->length, stdout);
fprintf(stdout, "\t%s\t%f\n", node->feature, node->prob);
}
mecab_set_lattice_level(mecab, 0);
mecab_set_all_morphs(mecab, 1);
node = mecab_sparse_tonode(mecab, input);
CHECK(node);
for (; node; node = node->next) {
fwrite (node->surface, sizeof(char), node->length, stdout);
printf("\t%s\n", node->feature);
}
const mecab_dictionary_info_t *d = mecab_dictionary_info(mecab);
for (; d; d = d->next) {
printf("filename: %s\n", d->filename);
printf("charset: %s\n", d->charset);
printf("size: %d\n", d->size);
printf("type: %d\n", d->type);
printf("lsize: %d\n", d->lsize);
printf("rsize: %d\n", d->rsize);
printf("version: %d\n", d->version);
}
mecab_destroy(mecab);
mecab_lattice_destroy(lattice);
mecab_model_destroy(model);
return 0;
}
コンパイル方法
UNIX の場合
% cc -O2 `mecab-config --cflags` example.c -o example \
`mecab-config --libs`
Windows の場合
まず, コンパイル作業を行うディレクトリに include\mecab.h
,
bin\libmecab.dll
lib\libmecab.lib
をコピーします.
この後の作業は, 使用するコンパイラによって微妙に変わります.
cygwin/mingw 環境の場合
% gcc -DDLL_IMPORT -I. example.c -o example.exe libmecab.dll
VC++ 環境の場合
% cl -DDLL_IMPORT -I. example.c libmecab.lib