Azure Document Intelligence ile .NET'te Form ve Fatura Analizi
Azure Document Intelligence (eski adıyla Form Recognizer), belgelerden yapılandırılmış veri çıkaran bir AI hizmetidir. Faturalar, sözleşmeler, kimlik belgeleri ve özel formlardan anahtar-değer çiftleri, tablolar ve metin bloklarını otomatik olarak tanıyabilir.
Kurulum
dotnet add package Azure.AI.FormRecognizer
Fatura Analizi (Prebuilt Invoice Modeli)
using Azure;
using Azure.AI.FormRecognizer.DocumentAnalysis;
public class InvoiceAnalysisService
{
private readonly DocumentAnalysisClient _client;
public InvoiceAnalysisService(IConfiguration config)
{
_client = new DocumentAnalysisClient(
new Uri(config["DocumentIntelligence:Endpoint"]!),
new AzureKeyCredential(config["DocumentIntelligence:Key"]!));
}
public async Task<InvoiceData> AnalyzeInvoiceAsync(Uri invoiceUri)
{
var operation = await _client.AnalyzeDocumentFromUriAsync(
WaitUntil.Completed,
"prebuilt-invoice",
invoiceUri);
var result = operation.Value;
var invoice = result.Documents[0];
return new InvoiceData
{
VendorName = GetFieldValue(invoice, "VendorName"),
CustomerName = GetFieldValue(invoice, "CustomerName"),
InvoiceId = GetFieldValue(invoice, "InvoiceId"),
InvoiceDate = GetDateField(invoice, "InvoiceDate"),
DueDate = GetDateField(invoice, "DueDate"),
SubTotal = GetCurrencyField(invoice, "SubTotal"),
TotalTax = GetCurrencyField(invoice, "TotalTax"),
InvoiceTotal = GetCurrencyField(invoice, "InvoiceTotal"),
LineItems = ExtractLineItems(invoice)
};
}
private string? GetFieldValue(AnalyzedDocument doc, string fieldName)
{
if (doc.Fields.TryGetValue(fieldName, out var field))
return field.Content;
return null;
}
private DateTimeOffset? GetDateField(AnalyzedDocument doc, string fieldName)
{
if (doc.Fields.TryGetValue(fieldName, out var field) && field.FieldType == DocumentFieldType.Date)
return field.Value.AsDate();
return null;
}
private double? GetCurrencyField(AnalyzedDocument doc, string fieldName)
{
if (doc.Fields.TryGetValue(fieldName, out var field) && field.FieldType == DocumentFieldType.Currency)
return field.Value.AsCurrency().Amount;
return null;
}
private List<InvoiceLineItem> ExtractLineItems(AnalyzedDocument doc)
{
var items = new List<InvoiceLineItem>();
if (!doc.Fields.TryGetValue("Items", out var itemsField)) return items;
foreach (var item in itemsField.Value.AsList())
{
var fields = item.Value.AsDictionary();
items.Add(new InvoiceLineItem
{
Description = fields.TryGetValue("Description", out var desc) ? desc.Content : null,
Quantity = fields.TryGetValue("Quantity", out var qty) ? qty.Value.AsDouble() : null,
UnitPrice = fields.TryGetValue("UnitPrice", out var price) ? price.Value.AsCurrency().Amount : null,
Amount = fields.TryGetValue("Amount", out var amt) ? amt.Value.AsCurrency().Amount : null,
});
}
return items;
}
}
public record InvoiceData(
string? VendorName, string? CustomerName, string? InvoiceId,
DateTimeOffset? InvoiceDate, DateTimeOffset? DueDate,
double? SubTotal, double? TotalTax, double? InvoiceTotal,
List<InvoiceLineItem> LineItems);
public record InvoiceLineItem(
string? Description, double? Quantity, double? UnitPrice, double? Amount);
Kimlik Belgesi Analizi
public async Task<IdDocumentData> AnalyzeIdDocumentAsync(Stream documentStream)
{
var operation = await _client.AnalyzeDocumentAsync(
WaitUntil.Completed,
"prebuilt-idDocument",
documentStream,
new AnalyzeDocumentOptions { Locale = "tr-TR" });
var doc = operation.Value.Documents[0];
return new IdDocumentData
{
FirstName = GetFieldValue(doc, "FirstName"),
LastName = GetFieldValue(doc, "LastName"),
DocumentNumber = GetFieldValue(doc, "DocumentNumber"),
DateOfBirth = GetDateField(doc, "DateOfBirth"),
Nationality = GetFieldValue(doc, "Nationality"),
Confidence = doc.Confidence
};
}
Özel Model Eğitimi
Kendi form şablonlarınız için özel model eğitebilirsiniz:
public class CustomModelTrainer
{
private readonly DocumentModelAdministrationClient _adminClient;
public async Task<string> TrainCustomModelAsync(
Uri trainingDataUri,
string modelId,
string description)
{
var operation = await _adminClient.BuildDocumentModelAsync(
WaitUntil.Completed,
modelId,
DocumentBuildMode.Template, // veya Neural
trainingDataUri,
new BuildDocumentModelOptions
{
Description = description
});
Console.WriteLine($"Model eğitildi: {operation.Value.ModelId}");
Console.WriteLine($"Desteklenen belge türleri: " +
$"{string.Join(", ", operation.Value.DocumentTypes.Keys)}");
return operation.Value.ModelId;
}
public async Task<AnalyzeResult> AnalyzeWithCustomModelAsync(
string modelId, Stream documentStream)
{
var operation = await _client.AnalyzeDocumentAsync(
WaitUntil.Completed,
modelId,
documentStream);
return operation.Value;
}
}
Toplu Belge İşleme
public class BatchDocumentProcessor
{
private readonly InvoiceAnalysisService _analyzer;
private const int MaxConcurrentRequests = 5;
public async Task<IReadOnlyList<ProcessingResult>> ProcessBatchAsync(
IEnumerable<string> documentUrls,
CancellationToken ct = default)
{
var semaphore = new SemaphoreSlim(MaxConcurrentRequests);
var results = new ConcurrentBag<ProcessingResult>();
await Parallel.ForEachAsync(documentUrls,
new ParallelOptions { MaxDegreeOfParallelism = MaxConcurrentRequests, CancellationToken = ct },
async (url, token) =>
{
await semaphore.WaitAsync(token);
try
{
var data = await _analyzer.AnalyzeInvoiceAsync(new Uri(url));
results.Add(new ProcessingResult(url, data, Success: true));
}
catch (Exception ex)
{
results.Add(new ProcessingResult(url, null, Success: false, Error: ex.Message));
}
finally
{
semaphore.Release();
}
});
return results.ToList();
}
}
ERP Entegrasyonu
[HttpPost("invoices/process")]
public async Task<IActionResult> ProcessInvoice(IFormFile file)
{
await using var stream = file.OpenReadStream();
var operation = await _client.AnalyzeDocumentAsync(
WaitUntil.Completed, "prebuilt-invoice", stream);
var invoiceData = _mapper.MapToInvoiceDto(operation.Value.Documents[0]);
// ERP sistemine aktar
var erpResult = await _erpService.CreateInvoiceAsync(invoiceData);
return Ok(new { InvoiceId = erpResult.Id, Status = "Created" });
}
Desteklenen Prebuilt Modeller
| Model | Kullanım Amacı |
|---|---|
| prebuilt-invoice | Faturalar |
| prebuilt-receipt | Perakende fişleri |
| prebuilt-idDocument | Kimlik belgesi, pasaport |
| prebuilt-businessCard | Kartvizitler |
| prebuilt-contract | Sözleşmeler |
| prebuilt-read | Genel OCR |
| prebuilt-layout | Tablo ve yapı analizi |
Sonuç
Azure Document Intelligence, belge işleme süreçlerini otomatikleştirmenin en etkili yollarından biridir. Muhasebe, lojistik ve finans sektöründe fatura, irsaliye ve sözleşme işleme sürelerini dramatik biçimde kısaltabilir. Özel modeller sayesinde kendi şablonlarınıza özel çıkarım yapabilirsiniz.